{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9999915560152837, "eval_steps": 61000, "global_step": 236854, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 8.443984716387663e-05, "grad_norm": 10.087146759033203, "learning_rate": 4.2219032339778774e-09, "loss": 0.7973, "step": 10 }, { "epoch": 0.00016887969432775326, "grad_norm": 6.2347187995910645, "learning_rate": 8.443806467955755e-09, "loss": 0.7518, "step": 20 }, { "epoch": 0.0002533195414916299, "grad_norm": 10.24992847442627, "learning_rate": 1.2665709701933633e-08, "loss": 0.7661, "step": 30 }, { "epoch": 0.0003377593886555065, "grad_norm": 10.03251838684082, "learning_rate": 1.688761293591151e-08, "loss": 0.8023, "step": 40 }, { "epoch": 0.0004221992358193832, "grad_norm": 9.621115684509277, "learning_rate": 2.1109516169889388e-08, "loss": 0.7935, "step": 50 }, { "epoch": 0.0005066390829832598, "grad_norm": 9.284114837646484, "learning_rate": 2.5331419403867266e-08, "loss": 0.8331, "step": 60 }, { "epoch": 0.0005910789301471365, "grad_norm": 9.815573692321777, "learning_rate": 2.955332263784514e-08, "loss": 0.8451, "step": 70 }, { "epoch": 0.000675518777311013, "grad_norm": 10.312376976013184, "learning_rate": 3.377522587182302e-08, "loss": 0.7666, "step": 80 }, { "epoch": 0.0007599586244748897, "grad_norm": 11.606963157653809, "learning_rate": 3.79971291058009e-08, "loss": 0.8013, "step": 90 }, { "epoch": 0.0008443984716387663, "grad_norm": 7.778165340423584, "learning_rate": 4.2219032339778776e-08, "loss": 0.7136, "step": 100 }, { "epoch": 0.000928838318802643, "grad_norm": 8.04053783416748, "learning_rate": 4.6440935573756654e-08, "loss": 0.7763, "step": 110 }, { "epoch": 0.0010132781659665197, "grad_norm": 11.623458862304688, "learning_rate": 5.066283880773453e-08, "loss": 0.7874, "step": 120 }, { "epoch": 0.0010977180131303962, "grad_norm": 9.442659378051758, "learning_rate": 5.488474204171241e-08, "loss": 0.7871, "step": 130 }, { "epoch": 0.001182157860294273, "grad_norm": 8.898256301879883, "learning_rate": 5.910664527569028e-08, "loss": 0.7511, "step": 140 }, { "epoch": 0.0012665977074581495, "grad_norm": 8.01179027557373, "learning_rate": 6.332854850966817e-08, "loss": 0.759, "step": 150 }, { "epoch": 0.001351037554622026, "grad_norm": 9.182121276855469, "learning_rate": 6.755045174364604e-08, "loss": 0.7546, "step": 160 }, { "epoch": 0.0014354774017859028, "grad_norm": 8.594508171081543, "learning_rate": 7.177235497762391e-08, "loss": 0.776, "step": 170 }, { "epoch": 0.0015199172489497794, "grad_norm": 7.111359119415283, "learning_rate": 7.59942582116018e-08, "loss": 0.72, "step": 180 }, { "epoch": 0.001604357096113656, "grad_norm": 13.45103645324707, "learning_rate": 8.021616144557968e-08, "loss": 0.7382, "step": 190 }, { "epoch": 0.0016887969432775327, "grad_norm": 7.127725601196289, "learning_rate": 8.443806467955755e-08, "loss": 0.7864, "step": 200 }, { "epoch": 0.0017732367904414092, "grad_norm": 8.798710823059082, "learning_rate": 8.865996791353544e-08, "loss": 0.6957, "step": 210 }, { "epoch": 0.001857676637605286, "grad_norm": 7.282878398895264, "learning_rate": 9.288187114751331e-08, "loss": 0.6582, "step": 220 }, { "epoch": 0.0019421164847691626, "grad_norm": 6.2688751220703125, "learning_rate": 9.710377438149118e-08, "loss": 0.6536, "step": 230 }, { "epoch": 0.0020265563319330393, "grad_norm": 9.69054889678955, "learning_rate": 1.0132567761546906e-07, "loss": 0.7056, "step": 240 }, { "epoch": 0.0021109961790969156, "grad_norm": 5.4253249168396, "learning_rate": 1.0554758084944694e-07, "loss": 0.6438, "step": 250 }, { "epoch": 0.0021954360262607924, "grad_norm": 8.247864723205566, "learning_rate": 1.0976948408342482e-07, "loss": 0.6689, "step": 260 }, { "epoch": 0.002279875873424669, "grad_norm": 6.820531845092773, "learning_rate": 1.1399138731740269e-07, "loss": 0.6388, "step": 270 }, { "epoch": 0.002364315720588546, "grad_norm": 6.143106937408447, "learning_rate": 1.1821329055138056e-07, "loss": 0.6479, "step": 280 }, { "epoch": 0.0024487555677524223, "grad_norm": 7.08033561706543, "learning_rate": 1.2243519378535845e-07, "loss": 0.6279, "step": 290 }, { "epoch": 0.002533195414916299, "grad_norm": 7.196671485900879, "learning_rate": 1.2665709701933633e-07, "loss": 0.5602, "step": 300 }, { "epoch": 0.002617635262080176, "grad_norm": 7.062415599822998, "learning_rate": 1.308790002533142e-07, "loss": 0.5408, "step": 310 }, { "epoch": 0.002702075109244052, "grad_norm": 3.680528402328491, "learning_rate": 1.3510090348729208e-07, "loss": 0.5401, "step": 320 }, { "epoch": 0.002786514956407929, "grad_norm": 4.7802557945251465, "learning_rate": 1.3932280672126996e-07, "loss": 0.544, "step": 330 }, { "epoch": 0.0028709548035718057, "grad_norm": 5.278905391693115, "learning_rate": 1.4354470995524782e-07, "loss": 0.4883, "step": 340 }, { "epoch": 0.002955394650735682, "grad_norm": 3.691737174987793, "learning_rate": 1.477666131892257e-07, "loss": 0.4841, "step": 350 }, { "epoch": 0.0030398344978995588, "grad_norm": 4.834394454956055, "learning_rate": 1.519885164232036e-07, "loss": 0.4325, "step": 360 }, { "epoch": 0.0031242743450634355, "grad_norm": 4.723413944244385, "learning_rate": 1.5621041965718148e-07, "loss": 0.4036, "step": 370 }, { "epoch": 0.003208714192227312, "grad_norm": 5.5974016189575195, "learning_rate": 1.6043232289115936e-07, "loss": 0.3831, "step": 380 }, { "epoch": 0.0032931540393911886, "grad_norm": 4.842167377471924, "learning_rate": 1.6465422612513722e-07, "loss": 0.3963, "step": 390 }, { "epoch": 0.0033775938865550654, "grad_norm": 4.024375915527344, "learning_rate": 1.688761293591151e-07, "loss": 0.3525, "step": 400 }, { "epoch": 0.003462033733718942, "grad_norm": 3.815513849258423, "learning_rate": 1.73098032593093e-07, "loss": 0.3432, "step": 410 }, { "epoch": 0.0035464735808828185, "grad_norm": 3.6889092922210693, "learning_rate": 1.7731993582707087e-07, "loss": 0.3255, "step": 420 }, { "epoch": 0.0036309134280466952, "grad_norm": 4.664348602294922, "learning_rate": 1.8154183906104873e-07, "loss": 0.2978, "step": 430 }, { "epoch": 0.003715353275210572, "grad_norm": 2.6930270195007324, "learning_rate": 1.8576374229502662e-07, "loss": 0.2909, "step": 440 }, { "epoch": 0.0037997931223744483, "grad_norm": 5.037738800048828, "learning_rate": 1.8998564552900448e-07, "loss": 0.295, "step": 450 }, { "epoch": 0.003884232969538325, "grad_norm": 3.924527406692505, "learning_rate": 1.9420754876298236e-07, "loss": 0.2552, "step": 460 }, { "epoch": 0.003968672816702202, "grad_norm": 4.591635704040527, "learning_rate": 1.9842945199696027e-07, "loss": 0.2534, "step": 470 }, { "epoch": 0.004053112663866079, "grad_norm": 2.945456027984619, "learning_rate": 2.0265135523093813e-07, "loss": 0.2601, "step": 480 }, { "epoch": 0.004137552511029955, "grad_norm": 2.9587841033935547, "learning_rate": 2.0687325846491601e-07, "loss": 0.2395, "step": 490 }, { "epoch": 0.004221992358193831, "grad_norm": 2.953735828399658, "learning_rate": 2.1109516169889387e-07, "loss": 0.2116, "step": 500 }, { "epoch": 0.004306432205357708, "grad_norm": 4.143046855926514, "learning_rate": 2.1531706493287173e-07, "loss": 0.2094, "step": 510 }, { "epoch": 0.004390872052521585, "grad_norm": 3.422013282775879, "learning_rate": 2.1953896816684964e-07, "loss": 0.2042, "step": 520 }, { "epoch": 0.004475311899685462, "grad_norm": 4.896239757537842, "learning_rate": 2.2376087140082753e-07, "loss": 0.209, "step": 530 }, { "epoch": 0.004559751746849338, "grad_norm": 3.695052146911621, "learning_rate": 2.2798277463480539e-07, "loss": 0.1674, "step": 540 }, { "epoch": 0.004644191594013215, "grad_norm": 4.452960968017578, "learning_rate": 2.3220467786878327e-07, "loss": 0.2238, "step": 550 }, { "epoch": 0.004728631441177092, "grad_norm": 3.391371011734009, "learning_rate": 2.3642658110276113e-07, "loss": 0.1863, "step": 560 }, { "epoch": 0.004813071288340968, "grad_norm": 3.195920467376709, "learning_rate": 2.40648484336739e-07, "loss": 0.2045, "step": 570 }, { "epoch": 0.0048975111355048445, "grad_norm": 3.7249486446380615, "learning_rate": 2.448703875707169e-07, "loss": 0.1959, "step": 580 }, { "epoch": 0.004981950982668721, "grad_norm": 5.279894828796387, "learning_rate": 2.4909229080469476e-07, "loss": 0.2041, "step": 590 }, { "epoch": 0.005066390829832598, "grad_norm": 3.12263822555542, "learning_rate": 2.5331419403867267e-07, "loss": 0.1821, "step": 600 }, { "epoch": 0.005150830676996475, "grad_norm": 4.295114517211914, "learning_rate": 2.5753609727265053e-07, "loss": 0.2206, "step": 610 }, { "epoch": 0.005235270524160352, "grad_norm": 2.8154942989349365, "learning_rate": 2.617580005066284e-07, "loss": 0.1908, "step": 620 }, { "epoch": 0.0053197103713242275, "grad_norm": 2.797760009765625, "learning_rate": 2.659799037406063e-07, "loss": 0.1699, "step": 630 }, { "epoch": 0.005404150218488104, "grad_norm": 4.516080379486084, "learning_rate": 2.7020180697458416e-07, "loss": 0.1942, "step": 640 }, { "epoch": 0.005488590065651981, "grad_norm": 2.9561781883239746, "learning_rate": 2.7442371020856207e-07, "loss": 0.1765, "step": 650 }, { "epoch": 0.005573029912815858, "grad_norm": 3.1466033458709717, "learning_rate": 2.786456134425399e-07, "loss": 0.1581, "step": 660 }, { "epoch": 0.0056574697599797346, "grad_norm": 3.5887973308563232, "learning_rate": 2.828675166765178e-07, "loss": 0.1954, "step": 670 }, { "epoch": 0.005741909607143611, "grad_norm": 2.723294973373413, "learning_rate": 2.8708941991049564e-07, "loss": 0.1567, "step": 680 }, { "epoch": 0.005826349454307488, "grad_norm": 2.6293394565582275, "learning_rate": 2.9131132314447355e-07, "loss": 0.1858, "step": 690 }, { "epoch": 0.005910789301471364, "grad_norm": 2.7193658351898193, "learning_rate": 2.955332263784514e-07, "loss": 0.1609, "step": 700 }, { "epoch": 0.005995229148635241, "grad_norm": 3.150743007659912, "learning_rate": 2.997551296124293e-07, "loss": 0.1246, "step": 710 }, { "epoch": 0.0060796689957991175, "grad_norm": 3.769573450088501, "learning_rate": 3.039770328464072e-07, "loss": 0.1691, "step": 720 }, { "epoch": 0.006164108842962994, "grad_norm": 3.6088621616363525, "learning_rate": 3.0819893608038504e-07, "loss": 0.1707, "step": 730 }, { "epoch": 0.006248548690126871, "grad_norm": 3.640928268432617, "learning_rate": 3.1242083931436295e-07, "loss": 0.1604, "step": 740 }, { "epoch": 0.006332988537290748, "grad_norm": 3.4169061183929443, "learning_rate": 3.166427425483408e-07, "loss": 0.1468, "step": 750 }, { "epoch": 0.006417428384454624, "grad_norm": 3.2991421222686768, "learning_rate": 3.208646457823187e-07, "loss": 0.1335, "step": 760 }, { "epoch": 0.0065018682316185005, "grad_norm": 4.769773960113525, "learning_rate": 3.250865490162966e-07, "loss": 0.1492, "step": 770 }, { "epoch": 0.006586308078782377, "grad_norm": 2.5119946002960205, "learning_rate": 3.2930845225027444e-07, "loss": 0.1319, "step": 780 }, { "epoch": 0.006670747925946254, "grad_norm": 2.544731855392456, "learning_rate": 3.3353035548425235e-07, "loss": 0.1417, "step": 790 }, { "epoch": 0.006755187773110131, "grad_norm": 2.5548043251037598, "learning_rate": 3.377522587182302e-07, "loss": 0.124, "step": 800 }, { "epoch": 0.0068396276202740075, "grad_norm": 2.826637029647827, "learning_rate": 3.4197416195220807e-07, "loss": 0.1433, "step": 810 }, { "epoch": 0.006924067467437884, "grad_norm": 2.959484815597534, "learning_rate": 3.46196065186186e-07, "loss": 0.1342, "step": 820 }, { "epoch": 0.00700850731460176, "grad_norm": 2.8642234802246094, "learning_rate": 3.5041796842016384e-07, "loss": 0.1348, "step": 830 }, { "epoch": 0.007092947161765637, "grad_norm": 5.479578971862793, "learning_rate": 3.5463987165414175e-07, "loss": 0.1046, "step": 840 }, { "epoch": 0.007177387008929514, "grad_norm": 2.6781153678894043, "learning_rate": 3.5886177488811955e-07, "loss": 0.1281, "step": 850 }, { "epoch": 0.0072618268560933905, "grad_norm": 4.198520660400391, "learning_rate": 3.6308367812209746e-07, "loss": 0.1432, "step": 860 }, { "epoch": 0.007346266703257267, "grad_norm": 7.023375511169434, "learning_rate": 3.673055813560753e-07, "loss": 0.1433, "step": 870 }, { "epoch": 0.007430706550421144, "grad_norm": 2.821544647216797, "learning_rate": 3.7152748459005323e-07, "loss": 0.145, "step": 880 }, { "epoch": 0.007515146397585021, "grad_norm": 4.544361591339111, "learning_rate": 3.7574938782403115e-07, "loss": 0.1405, "step": 890 }, { "epoch": 0.007599586244748897, "grad_norm": 2.517685890197754, "learning_rate": 3.7997129105800895e-07, "loss": 0.1867, "step": 900 }, { "epoch": 0.0076840260919127734, "grad_norm": 2.6137266159057617, "learning_rate": 3.8419319429198686e-07, "loss": 0.1248, "step": 910 }, { "epoch": 0.00776846593907665, "grad_norm": 3.1351020336151123, "learning_rate": 3.884150975259647e-07, "loss": 0.1594, "step": 920 }, { "epoch": 0.007852905786240526, "grad_norm": 2.6930434703826904, "learning_rate": 3.9263700075994263e-07, "loss": 0.112, "step": 930 }, { "epoch": 0.007937345633404404, "grad_norm": 3.444685935974121, "learning_rate": 3.9685890399392054e-07, "loss": 0.1245, "step": 940 }, { "epoch": 0.00802178548056828, "grad_norm": 2.9987869262695312, "learning_rate": 4.0108080722789835e-07, "loss": 0.1161, "step": 950 }, { "epoch": 0.008106225327732157, "grad_norm": 3.6073319911956787, "learning_rate": 4.0530271046187626e-07, "loss": 0.1169, "step": 960 }, { "epoch": 0.008190665174896033, "grad_norm": 2.983776092529297, "learning_rate": 4.095246136958541e-07, "loss": 0.1254, "step": 970 }, { "epoch": 0.00827510502205991, "grad_norm": 3.4330084323883057, "learning_rate": 4.1374651692983203e-07, "loss": 0.1308, "step": 980 }, { "epoch": 0.008359544869223787, "grad_norm": 2.4620275497436523, "learning_rate": 4.179684201638099e-07, "loss": 0.1223, "step": 990 }, { "epoch": 0.008443984716387663, "grad_norm": 4.431446075439453, "learning_rate": 4.2219032339778775e-07, "loss": 0.0994, "step": 1000 }, { "epoch": 0.00852842456355154, "grad_norm": 2.3285257816314697, "learning_rate": 4.2641222663176566e-07, "loss": 0.1154, "step": 1010 }, { "epoch": 0.008612864410715416, "grad_norm": 2.7794997692108154, "learning_rate": 4.3063412986574346e-07, "loss": 0.1195, "step": 1020 }, { "epoch": 0.008697304257879294, "grad_norm": 5.141390800476074, "learning_rate": 4.348560330997214e-07, "loss": 0.1064, "step": 1030 }, { "epoch": 0.00878174410504317, "grad_norm": 3.273175001144409, "learning_rate": 4.390779363336993e-07, "loss": 0.1009, "step": 1040 }, { "epoch": 0.008866183952207047, "grad_norm": 2.47127366065979, "learning_rate": 4.4329983956767714e-07, "loss": 0.1037, "step": 1050 }, { "epoch": 0.008950623799370923, "grad_norm": 2.3374221324920654, "learning_rate": 4.4752174280165506e-07, "loss": 0.1149, "step": 1060 }, { "epoch": 0.009035063646534799, "grad_norm": 3.0102171897888184, "learning_rate": 4.5174364603563286e-07, "loss": 0.1055, "step": 1070 }, { "epoch": 0.009119503493698677, "grad_norm": 4.153960227966309, "learning_rate": 4.5596554926961077e-07, "loss": 0.1072, "step": 1080 }, { "epoch": 0.009203943340862553, "grad_norm": 2.732839822769165, "learning_rate": 4.6018745250358863e-07, "loss": 0.1153, "step": 1090 }, { "epoch": 0.00928838318802643, "grad_norm": 3.518399715423584, "learning_rate": 4.6440935573756654e-07, "loss": 0.1509, "step": 1100 }, { "epoch": 0.009372823035190306, "grad_norm": 2.9556548595428467, "learning_rate": 4.6863125897154445e-07, "loss": 0.1097, "step": 1110 }, { "epoch": 0.009457262882354184, "grad_norm": 2.9214866161346436, "learning_rate": 4.7285316220552226e-07, "loss": 0.0888, "step": 1120 }, { "epoch": 0.00954170272951806, "grad_norm": 3.9316062927246094, "learning_rate": 4.770750654395002e-07, "loss": 0.1165, "step": 1130 }, { "epoch": 0.009626142576681936, "grad_norm": 3.583216428756714, "learning_rate": 4.81296968673478e-07, "loss": 0.0996, "step": 1140 }, { "epoch": 0.009710582423845813, "grad_norm": 3.952094078063965, "learning_rate": 4.855188719074559e-07, "loss": 0.126, "step": 1150 }, { "epoch": 0.009795022271009689, "grad_norm": 2.9675400257110596, "learning_rate": 4.897407751414338e-07, "loss": 0.1178, "step": 1160 }, { "epoch": 0.009879462118173567, "grad_norm": 2.459113836288452, "learning_rate": 4.939626783754117e-07, "loss": 0.1144, "step": 1170 }, { "epoch": 0.009963901965337443, "grad_norm": 2.266054630279541, "learning_rate": 4.981845816093895e-07, "loss": 0.0973, "step": 1180 }, { "epoch": 0.010048341812501319, "grad_norm": 2.7974767684936523, "learning_rate": 5.024064848433674e-07, "loss": 0.1053, "step": 1190 }, { "epoch": 0.010132781659665196, "grad_norm": 3.4881975650787354, "learning_rate": 5.066283880773453e-07, "loss": 0.1136, "step": 1200 }, { "epoch": 0.010217221506829072, "grad_norm": 4.070862770080566, "learning_rate": 5.108502913113232e-07, "loss": 0.0959, "step": 1210 }, { "epoch": 0.01030166135399295, "grad_norm": 3.4497318267822266, "learning_rate": 5.150721945453011e-07, "loss": 0.0918, "step": 1220 }, { "epoch": 0.010386101201156826, "grad_norm": 4.7074384689331055, "learning_rate": 5.192940977792789e-07, "loss": 0.1156, "step": 1230 }, { "epoch": 0.010470541048320703, "grad_norm": 4.996912479400635, "learning_rate": 5.235160010132568e-07, "loss": 0.1032, "step": 1240 }, { "epoch": 0.010554980895484579, "grad_norm": 3.274121046066284, "learning_rate": 5.277379042472347e-07, "loss": 0.1112, "step": 1250 }, { "epoch": 0.010639420742648455, "grad_norm": 2.201754093170166, "learning_rate": 5.319598074812126e-07, "loss": 0.096, "step": 1260 }, { "epoch": 0.010723860589812333, "grad_norm": 3.4792606830596924, "learning_rate": 5.361817107151905e-07, "loss": 0.1246, "step": 1270 }, { "epoch": 0.010808300436976209, "grad_norm": 4.000758171081543, "learning_rate": 5.404036139491683e-07, "loss": 0.1387, "step": 1280 }, { "epoch": 0.010892740284140086, "grad_norm": 2.6465868949890137, "learning_rate": 5.446255171831462e-07, "loss": 0.0907, "step": 1290 }, { "epoch": 0.010977180131303962, "grad_norm": 2.6772477626800537, "learning_rate": 5.488474204171241e-07, "loss": 0.0919, "step": 1300 }, { "epoch": 0.01106161997846784, "grad_norm": 3.1138808727264404, "learning_rate": 5.530693236511019e-07, "loss": 0.0899, "step": 1310 }, { "epoch": 0.011146059825631716, "grad_norm": 2.717363119125366, "learning_rate": 5.572912268850799e-07, "loss": 0.0971, "step": 1320 }, { "epoch": 0.011230499672795591, "grad_norm": 2.9028117656707764, "learning_rate": 5.615131301190577e-07, "loss": 0.0721, "step": 1330 }, { "epoch": 0.011314939519959469, "grad_norm": 4.128818511962891, "learning_rate": 5.657350333530356e-07, "loss": 0.1047, "step": 1340 }, { "epoch": 0.011399379367123345, "grad_norm": 2.4739773273468018, "learning_rate": 5.699569365870135e-07, "loss": 0.0961, "step": 1350 }, { "epoch": 0.011483819214287223, "grad_norm": 3.194352149963379, "learning_rate": 5.741788398209913e-07, "loss": 0.0771, "step": 1360 }, { "epoch": 0.011568259061451099, "grad_norm": 2.8473024368286133, "learning_rate": 5.784007430549692e-07, "loss": 0.1007, "step": 1370 }, { "epoch": 0.011652698908614976, "grad_norm": 3.097639560699463, "learning_rate": 5.826226462889471e-07, "loss": 0.0897, "step": 1380 }, { "epoch": 0.011737138755778852, "grad_norm": 2.7524163722991943, "learning_rate": 5.86844549522925e-07, "loss": 0.0859, "step": 1390 }, { "epoch": 0.011821578602942728, "grad_norm": 2.812886953353882, "learning_rate": 5.910664527569028e-07, "loss": 0.102, "step": 1400 }, { "epoch": 0.011906018450106606, "grad_norm": 2.871734857559204, "learning_rate": 5.952883559908807e-07, "loss": 0.0913, "step": 1410 }, { "epoch": 0.011990458297270482, "grad_norm": 2.9918084144592285, "learning_rate": 5.995102592248586e-07, "loss": 0.0865, "step": 1420 }, { "epoch": 0.01207489814443436, "grad_norm": 1.6703165769577026, "learning_rate": 6.037321624588365e-07, "loss": 0.0839, "step": 1430 }, { "epoch": 0.012159337991598235, "grad_norm": 3.3101541996002197, "learning_rate": 6.079540656928144e-07, "loss": 0.0909, "step": 1440 }, { "epoch": 0.012243777838762113, "grad_norm": 1.9424680471420288, "learning_rate": 6.121759689267922e-07, "loss": 0.0979, "step": 1450 }, { "epoch": 0.012328217685925989, "grad_norm": 2.4500296115875244, "learning_rate": 6.163978721607701e-07, "loss": 0.0958, "step": 1460 }, { "epoch": 0.012412657533089864, "grad_norm": 2.3683817386627197, "learning_rate": 6.20619775394748e-07, "loss": 0.0865, "step": 1470 }, { "epoch": 0.012497097380253742, "grad_norm": 3.524068832397461, "learning_rate": 6.248416786287259e-07, "loss": 0.0963, "step": 1480 }, { "epoch": 0.012581537227417618, "grad_norm": 1.8881573677062988, "learning_rate": 6.290635818627038e-07, "loss": 0.0786, "step": 1490 }, { "epoch": 0.012665977074581496, "grad_norm": 3.962364673614502, "learning_rate": 6.332854850966816e-07, "loss": 0.0755, "step": 1500 }, { "epoch": 0.012750416921745372, "grad_norm": 2.778834342956543, "learning_rate": 6.375073883306596e-07, "loss": 0.0923, "step": 1510 }, { "epoch": 0.012834856768909247, "grad_norm": 3.418003797531128, "learning_rate": 6.417292915646374e-07, "loss": 0.0783, "step": 1520 }, { "epoch": 0.012919296616073125, "grad_norm": 2.2331998348236084, "learning_rate": 6.459511947986152e-07, "loss": 0.0878, "step": 1530 }, { "epoch": 0.013003736463237001, "grad_norm": 3.1121506690979004, "learning_rate": 6.501730980325932e-07, "loss": 0.1073, "step": 1540 }, { "epoch": 0.013088176310400879, "grad_norm": 2.8491971492767334, "learning_rate": 6.54395001266571e-07, "loss": 0.0617, "step": 1550 }, { "epoch": 0.013172616157564754, "grad_norm": 3.3820576667785645, "learning_rate": 6.586169045005489e-07, "loss": 0.0589, "step": 1560 }, { "epoch": 0.013257056004728632, "grad_norm": 3.2648940086364746, "learning_rate": 6.628388077345267e-07, "loss": 0.0709, "step": 1570 }, { "epoch": 0.013341495851892508, "grad_norm": 4.8924689292907715, "learning_rate": 6.670607109685047e-07, "loss": 0.0778, "step": 1580 }, { "epoch": 0.013425935699056384, "grad_norm": 2.830343008041382, "learning_rate": 6.712826142024826e-07, "loss": 0.0848, "step": 1590 }, { "epoch": 0.013510375546220262, "grad_norm": 2.959474563598633, "learning_rate": 6.755045174364604e-07, "loss": 0.0903, "step": 1600 }, { "epoch": 0.013594815393384137, "grad_norm": 1.6519373655319214, "learning_rate": 6.797264206704384e-07, "loss": 0.058, "step": 1610 }, { "epoch": 0.013679255240548015, "grad_norm": 3.053774833679199, "learning_rate": 6.839483239044161e-07, "loss": 0.1108, "step": 1620 }, { "epoch": 0.013763695087711891, "grad_norm": 3.433568000793457, "learning_rate": 6.88170227138394e-07, "loss": 0.0835, "step": 1630 }, { "epoch": 0.013848134934875769, "grad_norm": 3.667022943496704, "learning_rate": 6.92392130372372e-07, "loss": 0.0732, "step": 1640 }, { "epoch": 0.013932574782039645, "grad_norm": 3.385007858276367, "learning_rate": 6.966140336063498e-07, "loss": 0.0681, "step": 1650 }, { "epoch": 0.01401701462920352, "grad_norm": 3.701810121536255, "learning_rate": 7.008359368403277e-07, "loss": 0.0976, "step": 1660 }, { "epoch": 0.014101454476367398, "grad_norm": 2.1519477367401123, "learning_rate": 7.050578400743055e-07, "loss": 0.0758, "step": 1670 }, { "epoch": 0.014185894323531274, "grad_norm": 2.4112610816955566, "learning_rate": 7.092797433082835e-07, "loss": 0.0789, "step": 1680 }, { "epoch": 0.014270334170695152, "grad_norm": 2.329559087753296, "learning_rate": 7.135016465422614e-07, "loss": 0.0828, "step": 1690 }, { "epoch": 0.014354774017859027, "grad_norm": 1.5967094898223877, "learning_rate": 7.177235497762391e-07, "loss": 0.0785, "step": 1700 }, { "epoch": 0.014439213865022905, "grad_norm": 4.322620391845703, "learning_rate": 7.219454530102172e-07, "loss": 0.0711, "step": 1710 }, { "epoch": 0.014523653712186781, "grad_norm": 2.2584266662597656, "learning_rate": 7.261673562441949e-07, "loss": 0.0896, "step": 1720 }, { "epoch": 0.014608093559350657, "grad_norm": 3.9456627368927, "learning_rate": 7.303892594781728e-07, "loss": 0.0633, "step": 1730 }, { "epoch": 0.014692533406514535, "grad_norm": 2.3157594203948975, "learning_rate": 7.346111627121506e-07, "loss": 0.0827, "step": 1740 }, { "epoch": 0.01477697325367841, "grad_norm": 2.4921655654907227, "learning_rate": 7.388330659461286e-07, "loss": 0.0792, "step": 1750 }, { "epoch": 0.014861413100842288, "grad_norm": 2.454730272293091, "learning_rate": 7.430549691801065e-07, "loss": 0.0575, "step": 1760 }, { "epoch": 0.014945852948006164, "grad_norm": 4.029074192047119, "learning_rate": 7.472768724140843e-07, "loss": 0.1152, "step": 1770 }, { "epoch": 0.015030292795170042, "grad_norm": 2.2625174522399902, "learning_rate": 7.514987756480623e-07, "loss": 0.0833, "step": 1780 }, { "epoch": 0.015114732642333917, "grad_norm": 2.129829168319702, "learning_rate": 7.5572067888204e-07, "loss": 0.0995, "step": 1790 }, { "epoch": 0.015199172489497793, "grad_norm": 2.532195568084717, "learning_rate": 7.599425821160179e-07, "loss": 0.0694, "step": 1800 }, { "epoch": 0.015283612336661671, "grad_norm": 2.9687838554382324, "learning_rate": 7.641644853499959e-07, "loss": 0.0833, "step": 1810 }, { "epoch": 0.015368052183825547, "grad_norm": 2.9400150775909424, "learning_rate": 7.683863885839737e-07, "loss": 0.0723, "step": 1820 }, { "epoch": 0.015452492030989425, "grad_norm": 2.394500970840454, "learning_rate": 7.726082918179516e-07, "loss": 0.0765, "step": 1830 }, { "epoch": 0.0155369318781533, "grad_norm": 3.422314167022705, "learning_rate": 7.768301950519294e-07, "loss": 0.0926, "step": 1840 }, { "epoch": 0.015621371725317176, "grad_norm": 2.5770370960235596, "learning_rate": 7.810520982859074e-07, "loss": 0.0519, "step": 1850 }, { "epoch": 0.015705811572481052, "grad_norm": 2.484764814376831, "learning_rate": 7.852740015198853e-07, "loss": 0.0815, "step": 1860 }, { "epoch": 0.01579025141964493, "grad_norm": 2.1195855140686035, "learning_rate": 7.89495904753863e-07, "loss": 0.0683, "step": 1870 }, { "epoch": 0.015874691266808807, "grad_norm": 3.3871116638183594, "learning_rate": 7.937178079878411e-07, "loss": 0.0622, "step": 1880 }, { "epoch": 0.015959131113972683, "grad_norm": 2.644131898880005, "learning_rate": 7.979397112218188e-07, "loss": 0.067, "step": 1890 }, { "epoch": 0.01604357096113656, "grad_norm": 2.731428384780884, "learning_rate": 8.021616144557967e-07, "loss": 0.0781, "step": 1900 }, { "epoch": 0.01612801080830044, "grad_norm": 2.8757500648498535, "learning_rate": 8.063835176897746e-07, "loss": 0.076, "step": 1910 }, { "epoch": 0.016212450655464315, "grad_norm": 2.741854429244995, "learning_rate": 8.106054209237525e-07, "loss": 0.0829, "step": 1920 }, { "epoch": 0.01629689050262819, "grad_norm": 3.4143950939178467, "learning_rate": 8.148273241577304e-07, "loss": 0.0829, "step": 1930 }, { "epoch": 0.016381330349792066, "grad_norm": 3.206226110458374, "learning_rate": 8.190492273917082e-07, "loss": 0.0986, "step": 1940 }, { "epoch": 0.016465770196955942, "grad_norm": 2.104917526245117, "learning_rate": 8.232711306256862e-07, "loss": 0.0646, "step": 1950 }, { "epoch": 0.01655021004411982, "grad_norm": 2.133633613586426, "learning_rate": 8.274930338596641e-07, "loss": 0.0496, "step": 1960 }, { "epoch": 0.016634649891283698, "grad_norm": 2.9391396045684814, "learning_rate": 8.317149370936418e-07, "loss": 0.0758, "step": 1970 }, { "epoch": 0.016719089738447573, "grad_norm": 2.271786689758301, "learning_rate": 8.359368403276198e-07, "loss": 0.0623, "step": 1980 }, { "epoch": 0.01680352958561145, "grad_norm": 2.387314796447754, "learning_rate": 8.401587435615976e-07, "loss": 0.0757, "step": 1990 }, { "epoch": 0.016887969432775325, "grad_norm": 2.5915751457214355, "learning_rate": 8.443806467955755e-07, "loss": 0.0795, "step": 2000 }, { "epoch": 0.016972409279939205, "grad_norm": 2.1149685382843018, "learning_rate": 8.486025500295534e-07, "loss": 0.0592, "step": 2010 }, { "epoch": 0.01705684912710308, "grad_norm": 2.2077908515930176, "learning_rate": 8.528244532635313e-07, "loss": 0.077, "step": 2020 }, { "epoch": 0.017141288974266956, "grad_norm": 2.148824691772461, "learning_rate": 8.570463564975092e-07, "loss": 0.0585, "step": 2030 }, { "epoch": 0.017225728821430832, "grad_norm": 3.27801513671875, "learning_rate": 8.612682597314869e-07, "loss": 0.0818, "step": 2040 }, { "epoch": 0.017310168668594708, "grad_norm": 2.5208075046539307, "learning_rate": 8.65490162965465e-07, "loss": 0.0875, "step": 2050 }, { "epoch": 0.017394608515758588, "grad_norm": 2.1636486053466797, "learning_rate": 8.697120661994427e-07, "loss": 0.0573, "step": 2060 }, { "epoch": 0.017479048362922463, "grad_norm": 4.208245754241943, "learning_rate": 8.739339694334206e-07, "loss": 0.0605, "step": 2070 }, { "epoch": 0.01756348821008634, "grad_norm": 3.2256760597229004, "learning_rate": 8.781558726673986e-07, "loss": 0.0791, "step": 2080 }, { "epoch": 0.017647928057250215, "grad_norm": 2.9323983192443848, "learning_rate": 8.823777759013764e-07, "loss": 0.0646, "step": 2090 }, { "epoch": 0.017732367904414095, "grad_norm": 4.324909687042236, "learning_rate": 8.865996791353543e-07, "loss": 0.05, "step": 2100 }, { "epoch": 0.01781680775157797, "grad_norm": 2.298271656036377, "learning_rate": 8.908215823693321e-07, "loss": 0.0763, "step": 2110 }, { "epoch": 0.017901247598741846, "grad_norm": 3.07177472114563, "learning_rate": 8.950434856033101e-07, "loss": 0.0848, "step": 2120 }, { "epoch": 0.017985687445905722, "grad_norm": 3.139775037765503, "learning_rate": 8.99265388837288e-07, "loss": 0.0591, "step": 2130 }, { "epoch": 0.018070127293069598, "grad_norm": 3.842341184616089, "learning_rate": 9.034872920712657e-07, "loss": 0.0437, "step": 2140 }, { "epoch": 0.018154567140233478, "grad_norm": 2.7231709957122803, "learning_rate": 9.077091953052437e-07, "loss": 0.0677, "step": 2150 }, { "epoch": 0.018239006987397353, "grad_norm": 3.344883441925049, "learning_rate": 9.119310985392215e-07, "loss": 0.0879, "step": 2160 }, { "epoch": 0.01832344683456123, "grad_norm": 3.3105087280273438, "learning_rate": 9.161530017731994e-07, "loss": 0.0694, "step": 2170 }, { "epoch": 0.018407886681725105, "grad_norm": 2.254345178604126, "learning_rate": 9.203749050071773e-07, "loss": 0.0683, "step": 2180 }, { "epoch": 0.01849232652888898, "grad_norm": 2.4425697326660156, "learning_rate": 9.245968082411552e-07, "loss": 0.0584, "step": 2190 }, { "epoch": 0.01857676637605286, "grad_norm": 3.4580957889556885, "learning_rate": 9.288187114751331e-07, "loss": 0.0615, "step": 2200 }, { "epoch": 0.018661206223216736, "grad_norm": 2.4155097007751465, "learning_rate": 9.330406147091109e-07, "loss": 0.0376, "step": 2210 }, { "epoch": 0.018745646070380612, "grad_norm": 2.8740620613098145, "learning_rate": 9.372625179430889e-07, "loss": 0.0818, "step": 2220 }, { "epoch": 0.018830085917544488, "grad_norm": 3.149132490158081, "learning_rate": 9.414844211770667e-07, "loss": 0.0736, "step": 2230 }, { "epoch": 0.018914525764708368, "grad_norm": 2.3062331676483154, "learning_rate": 9.457063244110445e-07, "loss": 0.0729, "step": 2240 }, { "epoch": 0.018998965611872243, "grad_norm": 2.7013626098632812, "learning_rate": 9.499282276450225e-07, "loss": 0.068, "step": 2250 }, { "epoch": 0.01908340545903612, "grad_norm": 2.3439855575561523, "learning_rate": 9.541501308790004e-07, "loss": 0.0633, "step": 2260 }, { "epoch": 0.019167845306199995, "grad_norm": 1.6935551166534424, "learning_rate": 9.58372034112978e-07, "loss": 0.0801, "step": 2270 }, { "epoch": 0.01925228515336387, "grad_norm": 3.0133862495422363, "learning_rate": 9.62593937346956e-07, "loss": 0.0773, "step": 2280 }, { "epoch": 0.01933672500052775, "grad_norm": 1.637819766998291, "learning_rate": 9.66815840580934e-07, "loss": 0.0638, "step": 2290 }, { "epoch": 0.019421164847691626, "grad_norm": 3.8671622276306152, "learning_rate": 9.710377438149119e-07, "loss": 0.0584, "step": 2300 }, { "epoch": 0.019505604694855502, "grad_norm": 3.2416293621063232, "learning_rate": 9.752596470488897e-07, "loss": 0.073, "step": 2310 }, { "epoch": 0.019590044542019378, "grad_norm": 3.300074577331543, "learning_rate": 9.794815502828676e-07, "loss": 0.063, "step": 2320 }, { "epoch": 0.019674484389183254, "grad_norm": 1.6865839958190918, "learning_rate": 9.837034535168455e-07, "loss": 0.0798, "step": 2330 }, { "epoch": 0.019758924236347133, "grad_norm": 2.627620220184326, "learning_rate": 9.879253567508233e-07, "loss": 0.0663, "step": 2340 }, { "epoch": 0.01984336408351101, "grad_norm": 2.2929468154907227, "learning_rate": 9.921472599848014e-07, "loss": 0.077, "step": 2350 }, { "epoch": 0.019927803930674885, "grad_norm": 3.2138946056365967, "learning_rate": 9.96369163218779e-07, "loss": 0.0625, "step": 2360 }, { "epoch": 0.02001224377783876, "grad_norm": 2.6448395252227783, "learning_rate": 1.0005910664527569e-06, "loss": 0.0793, "step": 2370 }, { "epoch": 0.020096683625002637, "grad_norm": 1.7525991201400757, "learning_rate": 1.0048129696867347e-06, "loss": 0.0713, "step": 2380 }, { "epoch": 0.020181123472166516, "grad_norm": 5.429071426391602, "learning_rate": 1.0090348729207128e-06, "loss": 0.0863, "step": 2390 }, { "epoch": 0.020265563319330392, "grad_norm": 2.120157241821289, "learning_rate": 1.0132567761546907e-06, "loss": 0.074, "step": 2400 }, { "epoch": 0.020350003166494268, "grad_norm": 1.66353178024292, "learning_rate": 1.0174786793886685e-06, "loss": 0.0798, "step": 2410 }, { "epoch": 0.020434443013658144, "grad_norm": 2.8410446643829346, "learning_rate": 1.0217005826226464e-06, "loss": 0.0652, "step": 2420 }, { "epoch": 0.020518882860822023, "grad_norm": 3.011291027069092, "learning_rate": 1.0259224858566243e-06, "loss": 0.0509, "step": 2430 }, { "epoch": 0.0206033227079859, "grad_norm": 2.6410071849823, "learning_rate": 1.0301443890906021e-06, "loss": 0.05, "step": 2440 }, { "epoch": 0.020687762555149775, "grad_norm": 2.4265494346618652, "learning_rate": 1.03436629232458e-06, "loss": 0.0551, "step": 2450 }, { "epoch": 0.02077220240231365, "grad_norm": 3.146648406982422, "learning_rate": 1.0385881955585578e-06, "loss": 0.0709, "step": 2460 }, { "epoch": 0.020856642249477527, "grad_norm": 1.951441764831543, "learning_rate": 1.0428100987925357e-06, "loss": 0.0581, "step": 2470 }, { "epoch": 0.020941082096641406, "grad_norm": 2.5109684467315674, "learning_rate": 1.0470320020265135e-06, "loss": 0.0635, "step": 2480 }, { "epoch": 0.021025521943805282, "grad_norm": 2.2302963733673096, "learning_rate": 1.0512539052604916e-06, "loss": 0.0722, "step": 2490 }, { "epoch": 0.021109961790969158, "grad_norm": 3.7568063735961914, "learning_rate": 1.0554758084944695e-06, "loss": 0.0642, "step": 2500 }, { "epoch": 0.021194401638133034, "grad_norm": 2.6207504272460938, "learning_rate": 1.0596977117284473e-06, "loss": 0.07, "step": 2510 }, { "epoch": 0.02127884148529691, "grad_norm": 2.07879900932312, "learning_rate": 1.0639196149624252e-06, "loss": 0.0537, "step": 2520 }, { "epoch": 0.02136328133246079, "grad_norm": 2.575425863265991, "learning_rate": 1.068141518196403e-06, "loss": 0.062, "step": 2530 }, { "epoch": 0.021447721179624665, "grad_norm": 2.898991346359253, "learning_rate": 1.072363421430381e-06, "loss": 0.0681, "step": 2540 }, { "epoch": 0.02153216102678854, "grad_norm": 2.428011417388916, "learning_rate": 1.0765853246643588e-06, "loss": 0.0684, "step": 2550 }, { "epoch": 0.021616600873952417, "grad_norm": 3.3492207527160645, "learning_rate": 1.0808072278983366e-06, "loss": 0.0793, "step": 2560 }, { "epoch": 0.021701040721116296, "grad_norm": 2.2780649662017822, "learning_rate": 1.0850291311323145e-06, "loss": 0.0753, "step": 2570 }, { "epoch": 0.021785480568280172, "grad_norm": 2.5529260635375977, "learning_rate": 1.0892510343662923e-06, "loss": 0.0885, "step": 2580 }, { "epoch": 0.021869920415444048, "grad_norm": 2.3934388160705566, "learning_rate": 1.0934729376002704e-06, "loss": 0.0767, "step": 2590 }, { "epoch": 0.021954360262607924, "grad_norm": 2.034986972808838, "learning_rate": 1.0976948408342483e-06, "loss": 0.0724, "step": 2600 }, { "epoch": 0.0220388001097718, "grad_norm": 2.5560507774353027, "learning_rate": 1.101916744068226e-06, "loss": 0.0897, "step": 2610 }, { "epoch": 0.02212323995693568, "grad_norm": 1.5883057117462158, "learning_rate": 1.1061386473022038e-06, "loss": 0.0714, "step": 2620 }, { "epoch": 0.022207679804099555, "grad_norm": 1.9850255250930786, "learning_rate": 1.1103605505361818e-06, "loss": 0.0495, "step": 2630 }, { "epoch": 0.02229211965126343, "grad_norm": 2.569040298461914, "learning_rate": 1.1145824537701597e-06, "loss": 0.0645, "step": 2640 }, { "epoch": 0.022376559498427307, "grad_norm": 2.16740083694458, "learning_rate": 1.1188043570041376e-06, "loss": 0.0511, "step": 2650 }, { "epoch": 0.022460999345591183, "grad_norm": 1.6815080642700195, "learning_rate": 1.1230262602381154e-06, "loss": 0.0506, "step": 2660 }, { "epoch": 0.022545439192755062, "grad_norm": 2.040097236633301, "learning_rate": 1.1272481634720933e-06, "loss": 0.0561, "step": 2670 }, { "epoch": 0.022629879039918938, "grad_norm": 4.282937049865723, "learning_rate": 1.1314700667060711e-06, "loss": 0.0533, "step": 2680 }, { "epoch": 0.022714318887082814, "grad_norm": 1.8467177152633667, "learning_rate": 1.1356919699400492e-06, "loss": 0.0732, "step": 2690 }, { "epoch": 0.02279875873424669, "grad_norm": 3.2368385791778564, "learning_rate": 1.139913873174027e-06, "loss": 0.0635, "step": 2700 }, { "epoch": 0.022883198581410566, "grad_norm": 1.7945170402526855, "learning_rate": 1.1441357764080047e-06, "loss": 0.0472, "step": 2710 }, { "epoch": 0.022967638428574445, "grad_norm": 2.233715295791626, "learning_rate": 1.1483576796419826e-06, "loss": 0.0715, "step": 2720 }, { "epoch": 0.02305207827573832, "grad_norm": 2.168396234512329, "learning_rate": 1.1525795828759606e-06, "loss": 0.0574, "step": 2730 }, { "epoch": 0.023136518122902197, "grad_norm": 1.8964381217956543, "learning_rate": 1.1568014861099385e-06, "loss": 0.071, "step": 2740 }, { "epoch": 0.023220957970066073, "grad_norm": 3.0503194332122803, "learning_rate": 1.1610233893439164e-06, "loss": 0.0595, "step": 2750 }, { "epoch": 0.023305397817229952, "grad_norm": 3.163673162460327, "learning_rate": 1.1652452925778942e-06, "loss": 0.0757, "step": 2760 }, { "epoch": 0.02338983766439383, "grad_norm": 2.002464532852173, "learning_rate": 1.169467195811872e-06, "loss": 0.0552, "step": 2770 }, { "epoch": 0.023474277511557704, "grad_norm": 2.6664958000183105, "learning_rate": 1.17368909904585e-06, "loss": 0.039, "step": 2780 }, { "epoch": 0.02355871735872158, "grad_norm": 1.6083166599273682, "learning_rate": 1.177911002279828e-06, "loss": 0.044, "step": 2790 }, { "epoch": 0.023643157205885456, "grad_norm": 3.0875654220581055, "learning_rate": 1.1821329055138056e-06, "loss": 0.0618, "step": 2800 }, { "epoch": 0.023727597053049335, "grad_norm": 2.511791467666626, "learning_rate": 1.1863548087477835e-06, "loss": 0.0431, "step": 2810 }, { "epoch": 0.02381203690021321, "grad_norm": 2.025913953781128, "learning_rate": 1.1905767119817614e-06, "loss": 0.0658, "step": 2820 }, { "epoch": 0.023896476747377087, "grad_norm": 2.0698721408843994, "learning_rate": 1.1947986152157394e-06, "loss": 0.0469, "step": 2830 }, { "epoch": 0.023980916594540963, "grad_norm": 2.592576503753662, "learning_rate": 1.1990205184497173e-06, "loss": 0.067, "step": 2840 }, { "epoch": 0.02406535644170484, "grad_norm": 1.885839581489563, "learning_rate": 1.2032424216836952e-06, "loss": 0.0416, "step": 2850 }, { "epoch": 0.02414979628886872, "grad_norm": 2.638221502304077, "learning_rate": 1.207464324917673e-06, "loss": 0.0536, "step": 2860 }, { "epoch": 0.024234236136032594, "grad_norm": 2.3508501052856445, "learning_rate": 1.2116862281516509e-06, "loss": 0.0531, "step": 2870 }, { "epoch": 0.02431867598319647, "grad_norm": 2.321514129638672, "learning_rate": 1.2159081313856287e-06, "loss": 0.0513, "step": 2880 }, { "epoch": 0.024403115830360346, "grad_norm": 1.1447190046310425, "learning_rate": 1.2201300346196066e-06, "loss": 0.054, "step": 2890 }, { "epoch": 0.024487555677524225, "grad_norm": 2.2621471881866455, "learning_rate": 1.2243519378535844e-06, "loss": 0.0489, "step": 2900 }, { "epoch": 0.0245719955246881, "grad_norm": 2.1550049781799316, "learning_rate": 1.2285738410875623e-06, "loss": 0.0518, "step": 2910 }, { "epoch": 0.024656435371851977, "grad_norm": 2.3310022354125977, "learning_rate": 1.2327957443215402e-06, "loss": 0.061, "step": 2920 }, { "epoch": 0.024740875219015853, "grad_norm": 3.598963737487793, "learning_rate": 1.2370176475555182e-06, "loss": 0.0561, "step": 2930 }, { "epoch": 0.02482531506617973, "grad_norm": 2.2629685401916504, "learning_rate": 1.241239550789496e-06, "loss": 0.0549, "step": 2940 }, { "epoch": 0.02490975491334361, "grad_norm": 2.5164847373962402, "learning_rate": 1.245461454023474e-06, "loss": 0.0482, "step": 2950 }, { "epoch": 0.024994194760507484, "grad_norm": 2.6640877723693848, "learning_rate": 1.2496833572574518e-06, "loss": 0.0433, "step": 2960 }, { "epoch": 0.02507863460767136, "grad_norm": 1.2245242595672607, "learning_rate": 1.2539052604914295e-06, "loss": 0.0506, "step": 2970 }, { "epoch": 0.025163074454835236, "grad_norm": 4.436812400817871, "learning_rate": 1.2581271637254075e-06, "loss": 0.0662, "step": 2980 }, { "epoch": 0.025247514301999112, "grad_norm": 2.532512903213501, "learning_rate": 1.2623490669593854e-06, "loss": 0.0777, "step": 2990 }, { "epoch": 0.02533195414916299, "grad_norm": 2.366654396057129, "learning_rate": 1.2665709701933632e-06, "loss": 0.051, "step": 3000 }, { "epoch": 0.025416393996326867, "grad_norm": 2.639366626739502, "learning_rate": 1.270792873427341e-06, "loss": 0.0483, "step": 3010 }, { "epoch": 0.025500833843490743, "grad_norm": 2.838047504425049, "learning_rate": 1.2750147766613192e-06, "loss": 0.0614, "step": 3020 }, { "epoch": 0.02558527369065462, "grad_norm": 2.1618869304656982, "learning_rate": 1.2792366798952968e-06, "loss": 0.0592, "step": 3030 }, { "epoch": 0.025669713537818495, "grad_norm": 1.9984639883041382, "learning_rate": 1.2834585831292749e-06, "loss": 0.0573, "step": 3040 }, { "epoch": 0.025754153384982374, "grad_norm": 2.391662836074829, "learning_rate": 1.2876804863632527e-06, "loss": 0.0538, "step": 3050 }, { "epoch": 0.02583859323214625, "grad_norm": 1.6013888120651245, "learning_rate": 1.2919023895972304e-06, "loss": 0.0484, "step": 3060 }, { "epoch": 0.025923033079310126, "grad_norm": 2.162224054336548, "learning_rate": 1.2961242928312085e-06, "loss": 0.0466, "step": 3070 }, { "epoch": 0.026007472926474002, "grad_norm": 1.8205119371414185, "learning_rate": 1.3003461960651863e-06, "loss": 0.0521, "step": 3080 }, { "epoch": 0.02609191277363788, "grad_norm": 2.360917329788208, "learning_rate": 1.3045680992991642e-06, "loss": 0.0437, "step": 3090 }, { "epoch": 0.026176352620801757, "grad_norm": 2.1663732528686523, "learning_rate": 1.308790002533142e-06, "loss": 0.0457, "step": 3100 }, { "epoch": 0.026260792467965633, "grad_norm": 1.8047688007354736, "learning_rate": 1.3130119057671199e-06, "loss": 0.0624, "step": 3110 }, { "epoch": 0.02634523231512951, "grad_norm": 2.691815137863159, "learning_rate": 1.3172338090010978e-06, "loss": 0.0477, "step": 3120 }, { "epoch": 0.026429672162293385, "grad_norm": 2.1458704471588135, "learning_rate": 1.3214557122350758e-06, "loss": 0.0485, "step": 3130 }, { "epoch": 0.026514112009457264, "grad_norm": 2.082428455352783, "learning_rate": 1.3256776154690535e-06, "loss": 0.0578, "step": 3140 }, { "epoch": 0.02659855185662114, "grad_norm": 3.4327049255371094, "learning_rate": 1.3298995187030313e-06, "loss": 0.0576, "step": 3150 }, { "epoch": 0.026682991703785016, "grad_norm": 4.337438106536865, "learning_rate": 1.3341214219370094e-06, "loss": 0.0608, "step": 3160 }, { "epoch": 0.026767431550948892, "grad_norm": 2.121814012527466, "learning_rate": 1.338343325170987e-06, "loss": 0.0338, "step": 3170 }, { "epoch": 0.026851871398112768, "grad_norm": 1.978150486946106, "learning_rate": 1.3425652284049651e-06, "loss": 0.0455, "step": 3180 }, { "epoch": 0.026936311245276647, "grad_norm": 2.845569133758545, "learning_rate": 1.346787131638943e-06, "loss": 0.0728, "step": 3190 }, { "epoch": 0.027020751092440523, "grad_norm": 2.9120945930480957, "learning_rate": 1.3510090348729208e-06, "loss": 0.0579, "step": 3200 }, { "epoch": 0.0271051909396044, "grad_norm": 1.4262176752090454, "learning_rate": 1.3552309381068987e-06, "loss": 0.0496, "step": 3210 }, { "epoch": 0.027189630786768275, "grad_norm": 1.6709036827087402, "learning_rate": 1.3594528413408768e-06, "loss": 0.0523, "step": 3220 }, { "epoch": 0.027274070633932154, "grad_norm": 3.057816743850708, "learning_rate": 1.3636747445748544e-06, "loss": 0.0797, "step": 3230 }, { "epoch": 0.02735851048109603, "grad_norm": 2.9717650413513184, "learning_rate": 1.3678966478088323e-06, "loss": 0.0414, "step": 3240 }, { "epoch": 0.027442950328259906, "grad_norm": 1.9009032249450684, "learning_rate": 1.3721185510428103e-06, "loss": 0.0404, "step": 3250 }, { "epoch": 0.027527390175423782, "grad_norm": 1.3425058126449585, "learning_rate": 1.376340454276788e-06, "loss": 0.0496, "step": 3260 }, { "epoch": 0.027611830022587658, "grad_norm": 3.101351499557495, "learning_rate": 1.380562357510766e-06, "loss": 0.0501, "step": 3270 }, { "epoch": 0.027696269869751537, "grad_norm": 4.452205181121826, "learning_rate": 1.384784260744744e-06, "loss": 0.0599, "step": 3280 }, { "epoch": 0.027780709716915413, "grad_norm": 1.789632797241211, "learning_rate": 1.3890061639787218e-06, "loss": 0.0487, "step": 3290 }, { "epoch": 0.02786514956407929, "grad_norm": 2.40547513961792, "learning_rate": 1.3932280672126996e-06, "loss": 0.0801, "step": 3300 }, { "epoch": 0.027949589411243165, "grad_norm": 5.856931209564209, "learning_rate": 1.3974499704466773e-06, "loss": 0.0738, "step": 3310 }, { "epoch": 0.02803402925840704, "grad_norm": 1.7856389284133911, "learning_rate": 1.4016718736806553e-06, "loss": 0.0652, "step": 3320 }, { "epoch": 0.02811846910557092, "grad_norm": 2.0714306831359863, "learning_rate": 1.4058937769146332e-06, "loss": 0.0693, "step": 3330 }, { "epoch": 0.028202908952734796, "grad_norm": 3.420205593109131, "learning_rate": 1.410115680148611e-06, "loss": 0.0689, "step": 3340 }, { "epoch": 0.028287348799898672, "grad_norm": 9.289542198181152, "learning_rate": 1.414337583382589e-06, "loss": 0.0479, "step": 3350 }, { "epoch": 0.028371788647062548, "grad_norm": 2.025261163711548, "learning_rate": 1.418559486616567e-06, "loss": 0.0582, "step": 3360 }, { "epoch": 0.028456228494226424, "grad_norm": 2.90282940864563, "learning_rate": 1.4227813898505446e-06, "loss": 0.0513, "step": 3370 }, { "epoch": 0.028540668341390303, "grad_norm": 2.716384172439575, "learning_rate": 1.4270032930845227e-06, "loss": 0.0626, "step": 3380 }, { "epoch": 0.02862510818855418, "grad_norm": 2.142070770263672, "learning_rate": 1.4312251963185006e-06, "loss": 0.0449, "step": 3390 }, { "epoch": 0.028709548035718055, "grad_norm": 1.3559563159942627, "learning_rate": 1.4354470995524782e-06, "loss": 0.0688, "step": 3400 }, { "epoch": 0.02879398788288193, "grad_norm": 2.249556303024292, "learning_rate": 1.4396690027864563e-06, "loss": 0.0479, "step": 3410 }, { "epoch": 0.02887842773004581, "grad_norm": 1.8325326442718506, "learning_rate": 1.4438909060204344e-06, "loss": 0.062, "step": 3420 }, { "epoch": 0.028962867577209686, "grad_norm": 1.6892166137695312, "learning_rate": 1.448112809254412e-06, "loss": 0.0426, "step": 3430 }, { "epoch": 0.029047307424373562, "grad_norm": 2.2034246921539307, "learning_rate": 1.4523347124883899e-06, "loss": 0.0578, "step": 3440 }, { "epoch": 0.029131747271537438, "grad_norm": 2.5062193870544434, "learning_rate": 1.456556615722368e-06, "loss": 0.0621, "step": 3450 }, { "epoch": 0.029216187118701314, "grad_norm": 1.6310186386108398, "learning_rate": 1.4607785189563456e-06, "loss": 0.0517, "step": 3460 }, { "epoch": 0.029300626965865193, "grad_norm": 2.045597791671753, "learning_rate": 1.4650004221903236e-06, "loss": 0.05, "step": 3470 }, { "epoch": 0.02938506681302907, "grad_norm": 1.1845635175704956, "learning_rate": 1.4692223254243013e-06, "loss": 0.0617, "step": 3480 }, { "epoch": 0.029469506660192945, "grad_norm": 1.8875031471252441, "learning_rate": 1.4734442286582791e-06, "loss": 0.0584, "step": 3490 }, { "epoch": 0.02955394650735682, "grad_norm": 2.9354279041290283, "learning_rate": 1.4776661318922572e-06, "loss": 0.0476, "step": 3500 }, { "epoch": 0.029638386354520697, "grad_norm": 1.6304339170455933, "learning_rate": 1.4818880351262349e-06, "loss": 0.0385, "step": 3510 }, { "epoch": 0.029722826201684576, "grad_norm": 3.5282351970672607, "learning_rate": 1.486109938360213e-06, "loss": 0.0795, "step": 3520 }, { "epoch": 0.029807266048848452, "grad_norm": 1.4592491388320923, "learning_rate": 1.4903318415941908e-06, "loss": 0.0578, "step": 3530 }, { "epoch": 0.029891705896012328, "grad_norm": 2.270376682281494, "learning_rate": 1.4945537448281687e-06, "loss": 0.0435, "step": 3540 }, { "epoch": 0.029976145743176204, "grad_norm": 2.6938016414642334, "learning_rate": 1.4987756480621465e-06, "loss": 0.0581, "step": 3550 }, { "epoch": 0.030060585590340083, "grad_norm": 4.1751933097839355, "learning_rate": 1.5029975512961246e-06, "loss": 0.0614, "step": 3560 }, { "epoch": 0.03014502543750396, "grad_norm": 1.8558214902877808, "learning_rate": 1.5072194545301022e-06, "loss": 0.0609, "step": 3570 }, { "epoch": 0.030229465284667835, "grad_norm": 2.2390694618225098, "learning_rate": 1.51144135776408e-06, "loss": 0.0437, "step": 3580 }, { "epoch": 0.03031390513183171, "grad_norm": 2.1032073497772217, "learning_rate": 1.5156632609980582e-06, "loss": 0.0646, "step": 3590 }, { "epoch": 0.030398344978995587, "grad_norm": 1.7726972103118896, "learning_rate": 1.5198851642320358e-06, "loss": 0.046, "step": 3600 }, { "epoch": 0.030482784826159466, "grad_norm": 3.413623332977295, "learning_rate": 1.5241070674660139e-06, "loss": 0.0592, "step": 3610 }, { "epoch": 0.030567224673323342, "grad_norm": 1.4999080896377563, "learning_rate": 1.5283289706999917e-06, "loss": 0.0738, "step": 3620 }, { "epoch": 0.030651664520487218, "grad_norm": 3.271307945251465, "learning_rate": 1.5325508739339696e-06, "loss": 0.0418, "step": 3630 }, { "epoch": 0.030736104367651094, "grad_norm": 1.6762921810150146, "learning_rate": 1.5367727771679474e-06, "loss": 0.0392, "step": 3640 }, { "epoch": 0.03082054421481497, "grad_norm": 2.1880266666412354, "learning_rate": 1.540994680401925e-06, "loss": 0.0517, "step": 3650 }, { "epoch": 0.03090498406197885, "grad_norm": 2.440981149673462, "learning_rate": 1.5452165836359032e-06, "loss": 0.0357, "step": 3660 }, { "epoch": 0.030989423909142725, "grad_norm": 2.2364864349365234, "learning_rate": 1.5494384868698812e-06, "loss": 0.0627, "step": 3670 }, { "epoch": 0.0310738637563066, "grad_norm": 2.5936977863311768, "learning_rate": 1.5536603901038589e-06, "loss": 0.0504, "step": 3680 }, { "epoch": 0.031158303603470477, "grad_norm": 2.510735273361206, "learning_rate": 1.5578822933378367e-06, "loss": 0.0736, "step": 3690 }, { "epoch": 0.031242743450634353, "grad_norm": 1.8341799974441528, "learning_rate": 1.5621041965718148e-06, "loss": 0.0669, "step": 3700 }, { "epoch": 0.03132718329779823, "grad_norm": 2.2668747901916504, "learning_rate": 1.5663260998057925e-06, "loss": 0.048, "step": 3710 }, { "epoch": 0.031411623144962104, "grad_norm": 2.9342944622039795, "learning_rate": 1.5705480030397705e-06, "loss": 0.0496, "step": 3720 }, { "epoch": 0.031496062992125984, "grad_norm": 4.142918109893799, "learning_rate": 1.5747699062737484e-06, "loss": 0.0527, "step": 3730 }, { "epoch": 0.03158050283928986, "grad_norm": 3.3670613765716553, "learning_rate": 1.578991809507726e-06, "loss": 0.0582, "step": 3740 }, { "epoch": 0.031664942686453736, "grad_norm": 1.7989331483840942, "learning_rate": 1.583213712741704e-06, "loss": 0.0546, "step": 3750 }, { "epoch": 0.031749382533617615, "grad_norm": 2.81916880607605, "learning_rate": 1.5874356159756822e-06, "loss": 0.0618, "step": 3760 }, { "epoch": 0.03183382238078149, "grad_norm": 2.2426369190216064, "learning_rate": 1.5916575192096598e-06, "loss": 0.0344, "step": 3770 }, { "epoch": 0.03191826222794537, "grad_norm": 1.5863184928894043, "learning_rate": 1.5958794224436377e-06, "loss": 0.0543, "step": 3780 }, { "epoch": 0.032002702075109246, "grad_norm": 3.282541275024414, "learning_rate": 1.6001013256776157e-06, "loss": 0.0385, "step": 3790 }, { "epoch": 0.03208714192227312, "grad_norm": 1.6768584251403809, "learning_rate": 1.6043232289115934e-06, "loss": 0.0618, "step": 3800 }, { "epoch": 0.032171581769437, "grad_norm": 3.358717918395996, "learning_rate": 1.6085451321455715e-06, "loss": 0.0417, "step": 3810 }, { "epoch": 0.03225602161660088, "grad_norm": 2.089548110961914, "learning_rate": 1.6127670353795491e-06, "loss": 0.0467, "step": 3820 }, { "epoch": 0.03234046146376475, "grad_norm": 2.097691774368286, "learning_rate": 1.616988938613527e-06, "loss": 0.0449, "step": 3830 }, { "epoch": 0.03242490131092863, "grad_norm": 1.762855887413025, "learning_rate": 1.621210841847505e-06, "loss": 0.0496, "step": 3840 }, { "epoch": 0.0325093411580925, "grad_norm": 1.9925322532653809, "learning_rate": 1.6254327450814827e-06, "loss": 0.0444, "step": 3850 }, { "epoch": 0.03259378100525638, "grad_norm": 1.8197135925292969, "learning_rate": 1.6296546483154608e-06, "loss": 0.0401, "step": 3860 }, { "epoch": 0.03267822085242026, "grad_norm": 2.468031406402588, "learning_rate": 1.6338765515494386e-06, "loss": 0.0636, "step": 3870 }, { "epoch": 0.03276266069958413, "grad_norm": 1.895848274230957, "learning_rate": 1.6380984547834165e-06, "loss": 0.0532, "step": 3880 }, { "epoch": 0.03284710054674801, "grad_norm": 2.6955068111419678, "learning_rate": 1.6423203580173943e-06, "loss": 0.0608, "step": 3890 }, { "epoch": 0.032931540393911884, "grad_norm": 1.5537422895431519, "learning_rate": 1.6465422612513724e-06, "loss": 0.0406, "step": 3900 }, { "epoch": 0.033015980241075764, "grad_norm": 2.8972954750061035, "learning_rate": 1.65076416448535e-06, "loss": 0.0699, "step": 3910 }, { "epoch": 0.03310042008823964, "grad_norm": 1.7807739973068237, "learning_rate": 1.6549860677193281e-06, "loss": 0.0431, "step": 3920 }, { "epoch": 0.033184859935403516, "grad_norm": 1.3911386728286743, "learning_rate": 1.659207970953306e-06, "loss": 0.0404, "step": 3930 }, { "epoch": 0.033269299782567395, "grad_norm": 1.2613005638122559, "learning_rate": 1.6634298741872836e-06, "loss": 0.0597, "step": 3940 }, { "epoch": 0.03335373962973127, "grad_norm": 1.7659046649932861, "learning_rate": 1.6676517774212617e-06, "loss": 0.0462, "step": 3950 }, { "epoch": 0.03343817947689515, "grad_norm": 2.9078574180603027, "learning_rate": 1.6718736806552396e-06, "loss": 0.0514, "step": 3960 }, { "epoch": 0.033522619324059026, "grad_norm": 2.268601894378662, "learning_rate": 1.6760955838892174e-06, "loss": 0.0494, "step": 3970 }, { "epoch": 0.0336070591712229, "grad_norm": 1.712074875831604, "learning_rate": 1.6803174871231953e-06, "loss": 0.0422, "step": 3980 }, { "epoch": 0.03369149901838678, "grad_norm": 2.7518749237060547, "learning_rate": 1.6845393903571733e-06, "loss": 0.0607, "step": 3990 }, { "epoch": 0.03377593886555065, "grad_norm": 3.221118688583374, "learning_rate": 1.688761293591151e-06, "loss": 0.0495, "step": 4000 }, { "epoch": 0.03386037871271453, "grad_norm": 2.2781715393066406, "learning_rate": 1.692983196825129e-06, "loss": 0.0614, "step": 4010 }, { "epoch": 0.03394481855987841, "grad_norm": 2.153461456298828, "learning_rate": 1.6972051000591067e-06, "loss": 0.068, "step": 4020 }, { "epoch": 0.03402925840704228, "grad_norm": 1.2835074663162231, "learning_rate": 1.7014270032930846e-06, "loss": 0.0617, "step": 4030 }, { "epoch": 0.03411369825420616, "grad_norm": 1.690331220626831, "learning_rate": 1.7056489065270626e-06, "loss": 0.0308, "step": 4040 }, { "epoch": 0.03419813810137003, "grad_norm": 2.725266456604004, "learning_rate": 1.7098708097610403e-06, "loss": 0.0532, "step": 4050 }, { "epoch": 0.03428257794853391, "grad_norm": 1.713057041168213, "learning_rate": 1.7140927129950183e-06, "loss": 0.0338, "step": 4060 }, { "epoch": 0.03436701779569779, "grad_norm": 2.653424024581909, "learning_rate": 1.7183146162289962e-06, "loss": 0.0461, "step": 4070 }, { "epoch": 0.034451457642861665, "grad_norm": 2.1264989376068115, "learning_rate": 1.7225365194629739e-06, "loss": 0.0339, "step": 4080 }, { "epoch": 0.034535897490025544, "grad_norm": 2.7521069049835205, "learning_rate": 1.726758422696952e-06, "loss": 0.0375, "step": 4090 }, { "epoch": 0.034620337337189416, "grad_norm": 2.0249602794647217, "learning_rate": 1.73098032593093e-06, "loss": 0.0461, "step": 4100 }, { "epoch": 0.034704777184353296, "grad_norm": 1.7890759706497192, "learning_rate": 1.7352022291649076e-06, "loss": 0.0438, "step": 4110 }, { "epoch": 0.034789217031517175, "grad_norm": 2.4213485717773438, "learning_rate": 1.7394241323988855e-06, "loss": 0.0495, "step": 4120 }, { "epoch": 0.03487365687868105, "grad_norm": 1.6048338413238525, "learning_rate": 1.7436460356328636e-06, "loss": 0.0405, "step": 4130 }, { "epoch": 0.03495809672584493, "grad_norm": 1.1947100162506104, "learning_rate": 1.7478679388668412e-06, "loss": 0.0457, "step": 4140 }, { "epoch": 0.035042536573008806, "grad_norm": 2.285526990890503, "learning_rate": 1.7520898421008193e-06, "loss": 0.0493, "step": 4150 }, { "epoch": 0.03512697642017268, "grad_norm": 1.8622442483901978, "learning_rate": 1.7563117453347971e-06, "loss": 0.0549, "step": 4160 }, { "epoch": 0.03521141626733656, "grad_norm": 1.96103835105896, "learning_rate": 1.760533648568775e-06, "loss": 0.0494, "step": 4170 }, { "epoch": 0.03529585611450043, "grad_norm": 0.9229766726493835, "learning_rate": 1.7647555518027529e-06, "loss": 0.0361, "step": 4180 }, { "epoch": 0.03538029596166431, "grad_norm": 1.4527137279510498, "learning_rate": 1.7689774550367305e-06, "loss": 0.0507, "step": 4190 }, { "epoch": 0.03546473580882819, "grad_norm": 1.6334781646728516, "learning_rate": 1.7731993582707086e-06, "loss": 0.0277, "step": 4200 }, { "epoch": 0.03554917565599206, "grad_norm": 2.511493682861328, "learning_rate": 1.7774212615046864e-06, "loss": 0.0388, "step": 4210 }, { "epoch": 0.03563361550315594, "grad_norm": 2.2716143131256104, "learning_rate": 1.7816431647386643e-06, "loss": 0.0506, "step": 4220 }, { "epoch": 0.03571805535031981, "grad_norm": 2.0950403213500977, "learning_rate": 1.7858650679726422e-06, "loss": 0.0478, "step": 4230 }, { "epoch": 0.03580249519748369, "grad_norm": 1.4646528959274292, "learning_rate": 1.7900869712066202e-06, "loss": 0.0405, "step": 4240 }, { "epoch": 0.03588693504464757, "grad_norm": 1.8396779298782349, "learning_rate": 1.7943088744405979e-06, "loss": 0.0651, "step": 4250 }, { "epoch": 0.035971374891811445, "grad_norm": 1.6940555572509766, "learning_rate": 1.798530777674576e-06, "loss": 0.0342, "step": 4260 }, { "epoch": 0.036055814738975324, "grad_norm": 1.600752592086792, "learning_rate": 1.8027526809085538e-06, "loss": 0.0543, "step": 4270 }, { "epoch": 0.036140254586139196, "grad_norm": 2.2213046550750732, "learning_rate": 1.8069745841425314e-06, "loss": 0.0569, "step": 4280 }, { "epoch": 0.036224694433303076, "grad_norm": 1.5226800441741943, "learning_rate": 1.8111964873765095e-06, "loss": 0.0383, "step": 4290 }, { "epoch": 0.036309134280466955, "grad_norm": 1.3796727657318115, "learning_rate": 1.8154183906104874e-06, "loss": 0.0608, "step": 4300 }, { "epoch": 0.03639357412763083, "grad_norm": 2.415709972381592, "learning_rate": 1.8196402938444652e-06, "loss": 0.0388, "step": 4310 }, { "epoch": 0.03647801397479471, "grad_norm": 2.042078971862793, "learning_rate": 1.823862197078443e-06, "loss": 0.0361, "step": 4320 }, { "epoch": 0.03656245382195858, "grad_norm": 1.7332442998886108, "learning_rate": 1.8280841003124212e-06, "loss": 0.0546, "step": 4330 }, { "epoch": 0.03664689366912246, "grad_norm": 2.940079689025879, "learning_rate": 1.8323060035463988e-06, "loss": 0.0459, "step": 4340 }, { "epoch": 0.03673133351628634, "grad_norm": 1.8703289031982422, "learning_rate": 1.8365279067803769e-06, "loss": 0.0554, "step": 4350 }, { "epoch": 0.03681577336345021, "grad_norm": 2.611837148666382, "learning_rate": 1.8407498100143545e-06, "loss": 0.0349, "step": 4360 }, { "epoch": 0.03690021321061409, "grad_norm": 2.0391316413879395, "learning_rate": 1.8449717132483324e-06, "loss": 0.0603, "step": 4370 }, { "epoch": 0.03698465305777796, "grad_norm": 2.06838059425354, "learning_rate": 1.8491936164823105e-06, "loss": 0.0457, "step": 4380 }, { "epoch": 0.03706909290494184, "grad_norm": 1.8573780059814453, "learning_rate": 1.853415519716288e-06, "loss": 0.0538, "step": 4390 }, { "epoch": 0.03715353275210572, "grad_norm": 2.520561695098877, "learning_rate": 1.8576374229502662e-06, "loss": 0.0468, "step": 4400 }, { "epoch": 0.03723797259926959, "grad_norm": 1.8526381254196167, "learning_rate": 1.861859326184244e-06, "loss": 0.0466, "step": 4410 }, { "epoch": 0.03732241244643347, "grad_norm": 1.8517736196517944, "learning_rate": 1.8660812294182219e-06, "loss": 0.0501, "step": 4420 }, { "epoch": 0.037406852293597345, "grad_norm": 1.9717013835906982, "learning_rate": 1.8703031326521997e-06, "loss": 0.0415, "step": 4430 }, { "epoch": 0.037491292140761225, "grad_norm": 1.4466196298599243, "learning_rate": 1.8745250358861778e-06, "loss": 0.0403, "step": 4440 }, { "epoch": 0.037575731987925104, "grad_norm": 0.9020782709121704, "learning_rate": 1.8787469391201555e-06, "loss": 0.0363, "step": 4450 }, { "epoch": 0.037660171835088976, "grad_norm": 0.9818556308746338, "learning_rate": 1.8829688423541333e-06, "loss": 0.0378, "step": 4460 }, { "epoch": 0.037744611682252856, "grad_norm": 1.8160443305969238, "learning_rate": 1.8871907455881114e-06, "loss": 0.0502, "step": 4470 }, { "epoch": 0.037829051529416735, "grad_norm": 1.7836613655090332, "learning_rate": 1.891412648822089e-06, "loss": 0.0404, "step": 4480 }, { "epoch": 0.03791349137658061, "grad_norm": 1.0921761989593506, "learning_rate": 1.895634552056067e-06, "loss": 0.0492, "step": 4490 }, { "epoch": 0.03799793122374449, "grad_norm": 2.380746603012085, "learning_rate": 1.899856455290045e-06, "loss": 0.0659, "step": 4500 }, { "epoch": 0.03808237107090836, "grad_norm": 2.2015926837921143, "learning_rate": 1.9040783585240228e-06, "loss": 0.0446, "step": 4510 }, { "epoch": 0.03816681091807224, "grad_norm": 1.920109510421753, "learning_rate": 1.908300261758001e-06, "loss": 0.0352, "step": 4520 }, { "epoch": 0.03825125076523612, "grad_norm": 1.5940377712249756, "learning_rate": 1.9125221649919783e-06, "loss": 0.0422, "step": 4530 }, { "epoch": 0.03833569061239999, "grad_norm": 1.718117594718933, "learning_rate": 1.916744068225956e-06, "loss": 0.0277, "step": 4540 }, { "epoch": 0.03842013045956387, "grad_norm": 0.6959659457206726, "learning_rate": 1.9209659714599345e-06, "loss": 0.0611, "step": 4550 }, { "epoch": 0.03850457030672774, "grad_norm": 1.7767473459243774, "learning_rate": 1.925187874693912e-06, "loss": 0.059, "step": 4560 }, { "epoch": 0.03858901015389162, "grad_norm": 2.092149496078491, "learning_rate": 1.92940977792789e-06, "loss": 0.0449, "step": 4570 }, { "epoch": 0.0386734500010555, "grad_norm": 1.644154667854309, "learning_rate": 1.933631681161868e-06, "loss": 0.065, "step": 4580 }, { "epoch": 0.03875788984821937, "grad_norm": 2.319685459136963, "learning_rate": 1.937853584395846e-06, "loss": 0.054, "step": 4590 }, { "epoch": 0.03884232969538325, "grad_norm": 1.613333821296692, "learning_rate": 1.9420754876298238e-06, "loss": 0.0424, "step": 4600 }, { "epoch": 0.038926769542547125, "grad_norm": 2.038214921951294, "learning_rate": 1.9462973908638016e-06, "loss": 0.042, "step": 4610 }, { "epoch": 0.039011209389711005, "grad_norm": 1.1498078107833862, "learning_rate": 1.9505192940977795e-06, "loss": 0.0536, "step": 4620 }, { "epoch": 0.039095649236874884, "grad_norm": 1.9114172458648682, "learning_rate": 1.9547411973317573e-06, "loss": 0.0449, "step": 4630 }, { "epoch": 0.039180089084038756, "grad_norm": 1.962649941444397, "learning_rate": 1.958963100565735e-06, "loss": 0.0422, "step": 4640 }, { "epoch": 0.039264528931202636, "grad_norm": 2.0468852519989014, "learning_rate": 1.963185003799713e-06, "loss": 0.0324, "step": 4650 }, { "epoch": 0.03934896877836651, "grad_norm": 2.41953182220459, "learning_rate": 1.967406907033691e-06, "loss": 0.0592, "step": 4660 }, { "epoch": 0.03943340862553039, "grad_norm": 3.910409688949585, "learning_rate": 1.9716288102676688e-06, "loss": 0.0447, "step": 4670 }, { "epoch": 0.03951784847269427, "grad_norm": 2.317204713821411, "learning_rate": 1.9758507135016466e-06, "loss": 0.0653, "step": 4680 }, { "epoch": 0.03960228831985814, "grad_norm": 0.8550176024436951, "learning_rate": 1.9800726167356245e-06, "loss": 0.0343, "step": 4690 }, { "epoch": 0.03968672816702202, "grad_norm": 1.509864091873169, "learning_rate": 1.9842945199696028e-06, "loss": 0.0387, "step": 4700 }, { "epoch": 0.03977116801418589, "grad_norm": 1.3722314834594727, "learning_rate": 1.98851642320358e-06, "loss": 0.0402, "step": 4710 }, { "epoch": 0.03985560786134977, "grad_norm": 2.0256285667419434, "learning_rate": 1.992738326437558e-06, "loss": 0.0546, "step": 4720 }, { "epoch": 0.03994004770851365, "grad_norm": 1.3932690620422363, "learning_rate": 1.996960229671536e-06, "loss": 0.0483, "step": 4730 }, { "epoch": 0.04002448755567752, "grad_norm": 2.767771005630493, "learning_rate": 2.0011821329055138e-06, "loss": 0.0527, "step": 4740 }, { "epoch": 0.0401089274028414, "grad_norm": 1.9950774908065796, "learning_rate": 2.005404036139492e-06, "loss": 0.0377, "step": 4750 }, { "epoch": 0.040193367250005274, "grad_norm": 2.113179922103882, "learning_rate": 2.0096259393734695e-06, "loss": 0.0411, "step": 4760 }, { "epoch": 0.040277807097169153, "grad_norm": 1.9948128461837769, "learning_rate": 2.0138478426074478e-06, "loss": 0.0511, "step": 4770 }, { "epoch": 0.04036224694433303, "grad_norm": 2.263556480407715, "learning_rate": 2.0180697458414256e-06, "loss": 0.0409, "step": 4780 }, { "epoch": 0.040446686791496905, "grad_norm": 1.4189047813415527, "learning_rate": 2.022291649075403e-06, "loss": 0.0568, "step": 4790 }, { "epoch": 0.040531126638660785, "grad_norm": 2.4242947101593018, "learning_rate": 2.0265135523093814e-06, "loss": 0.0327, "step": 4800 }, { "epoch": 0.040615566485824664, "grad_norm": 3.2242677211761475, "learning_rate": 2.030735455543359e-06, "loss": 0.0435, "step": 4810 }, { "epoch": 0.040700006332988536, "grad_norm": 2.0777740478515625, "learning_rate": 2.034957358777337e-06, "loss": 0.0449, "step": 4820 }, { "epoch": 0.040784446180152416, "grad_norm": 1.6283632516860962, "learning_rate": 2.039179262011315e-06, "loss": 0.0346, "step": 4830 }, { "epoch": 0.04086888602731629, "grad_norm": 2.7396607398986816, "learning_rate": 2.0434011652452928e-06, "loss": 0.0404, "step": 4840 }, { "epoch": 0.04095332587448017, "grad_norm": 2.2048418521881104, "learning_rate": 2.0476230684792706e-06, "loss": 0.0577, "step": 4850 }, { "epoch": 0.04103776572164405, "grad_norm": 4.827001094818115, "learning_rate": 2.0518449717132485e-06, "loss": 0.0507, "step": 4860 }, { "epoch": 0.04112220556880792, "grad_norm": 1.4848034381866455, "learning_rate": 2.0560668749472264e-06, "loss": 0.044, "step": 4870 }, { "epoch": 0.0412066454159718, "grad_norm": 1.365055799484253, "learning_rate": 2.0602887781812042e-06, "loss": 0.0328, "step": 4880 }, { "epoch": 0.04129108526313567, "grad_norm": 2.4829163551330566, "learning_rate": 2.064510681415182e-06, "loss": 0.0454, "step": 4890 }, { "epoch": 0.04137552511029955, "grad_norm": 1.60871422290802, "learning_rate": 2.06873258464916e-06, "loss": 0.0428, "step": 4900 }, { "epoch": 0.04145996495746343, "grad_norm": 1.2819846868515015, "learning_rate": 2.072954487883138e-06, "loss": 0.045, "step": 4910 }, { "epoch": 0.0415444048046273, "grad_norm": 1.54840087890625, "learning_rate": 2.0771763911171157e-06, "loss": 0.0495, "step": 4920 }, { "epoch": 0.04162884465179118, "grad_norm": 1.3731462955474854, "learning_rate": 2.0813982943510935e-06, "loss": 0.0411, "step": 4930 }, { "epoch": 0.041713284498955054, "grad_norm": 3.006135940551758, "learning_rate": 2.0856201975850714e-06, "loss": 0.0583, "step": 4940 }, { "epoch": 0.041797724346118934, "grad_norm": 2.021110773086548, "learning_rate": 2.0898421008190497e-06, "loss": 0.0466, "step": 4950 }, { "epoch": 0.04188216419328281, "grad_norm": 1.1361554861068726, "learning_rate": 2.094064004053027e-06, "loss": 0.0314, "step": 4960 }, { "epoch": 0.041966604040446685, "grad_norm": 0.9381704926490784, "learning_rate": 2.098285907287005e-06, "loss": 0.0311, "step": 4970 }, { "epoch": 0.042051043887610565, "grad_norm": 2.2297356128692627, "learning_rate": 2.1025078105209832e-06, "loss": 0.0475, "step": 4980 }, { "epoch": 0.04213548373477444, "grad_norm": 1.614568829536438, "learning_rate": 2.1067297137549607e-06, "loss": 0.0377, "step": 4990 }, { "epoch": 0.042219923581938316, "grad_norm": 1.467035174369812, "learning_rate": 2.110951616988939e-06, "loss": 0.0462, "step": 5000 }, { "epoch": 0.042304363429102196, "grad_norm": 2.4716508388519287, "learning_rate": 2.115173520222917e-06, "loss": 0.0402, "step": 5010 }, { "epoch": 0.04238880327626607, "grad_norm": 2.3485829830169678, "learning_rate": 2.1193954234568947e-06, "loss": 0.0508, "step": 5020 }, { "epoch": 0.04247324312342995, "grad_norm": 1.8687998056411743, "learning_rate": 2.1236173266908725e-06, "loss": 0.0521, "step": 5030 }, { "epoch": 0.04255768297059382, "grad_norm": 1.5920912027359009, "learning_rate": 2.1278392299248504e-06, "loss": 0.0395, "step": 5040 }, { "epoch": 0.0426421228177577, "grad_norm": 1.7070653438568115, "learning_rate": 2.1320611331588282e-06, "loss": 0.0408, "step": 5050 }, { "epoch": 0.04272656266492158, "grad_norm": 2.0028464794158936, "learning_rate": 2.136283036392806e-06, "loss": 0.0483, "step": 5060 }, { "epoch": 0.04281100251208545, "grad_norm": 1.6266223192214966, "learning_rate": 2.140504939626784e-06, "loss": 0.0598, "step": 5070 }, { "epoch": 0.04289544235924933, "grad_norm": 1.0376193523406982, "learning_rate": 2.144726842860762e-06, "loss": 0.0241, "step": 5080 }, { "epoch": 0.0429798822064132, "grad_norm": 1.8885222673416138, "learning_rate": 2.1489487460947397e-06, "loss": 0.0444, "step": 5090 }, { "epoch": 0.04306432205357708, "grad_norm": 1.7803075313568115, "learning_rate": 2.1531706493287175e-06, "loss": 0.0388, "step": 5100 }, { "epoch": 0.04314876190074096, "grad_norm": 1.4575238227844238, "learning_rate": 2.1573925525626954e-06, "loss": 0.0236, "step": 5110 }, { "epoch": 0.043233201747904834, "grad_norm": 2.646749973297119, "learning_rate": 2.1616144557966732e-06, "loss": 0.0307, "step": 5120 }, { "epoch": 0.043317641595068714, "grad_norm": 1.4950978755950928, "learning_rate": 2.165836359030651e-06, "loss": 0.0468, "step": 5130 }, { "epoch": 0.04340208144223259, "grad_norm": 1.3048564195632935, "learning_rate": 2.170058262264629e-06, "loss": 0.0338, "step": 5140 }, { "epoch": 0.043486521289396465, "grad_norm": 1.132040023803711, "learning_rate": 2.1742801654986072e-06, "loss": 0.0376, "step": 5150 }, { "epoch": 0.043570961136560345, "grad_norm": 1.597182035446167, "learning_rate": 2.1785020687325847e-06, "loss": 0.0474, "step": 5160 }, { "epoch": 0.04365540098372422, "grad_norm": 1.4539347887039185, "learning_rate": 2.1827239719665625e-06, "loss": 0.0268, "step": 5170 }, { "epoch": 0.043739840830888096, "grad_norm": 2.4661688804626465, "learning_rate": 2.186945875200541e-06, "loss": 0.0424, "step": 5180 }, { "epoch": 0.043824280678051976, "grad_norm": 1.933766484260559, "learning_rate": 2.1911677784345183e-06, "loss": 0.043, "step": 5190 }, { "epoch": 0.04390872052521585, "grad_norm": 1.3488160371780396, "learning_rate": 2.1953896816684965e-06, "loss": 0.0398, "step": 5200 }, { "epoch": 0.04399316037237973, "grad_norm": 1.9345619678497314, "learning_rate": 2.1996115849024744e-06, "loss": 0.0355, "step": 5210 }, { "epoch": 0.0440776002195436, "grad_norm": 2.2331881523132324, "learning_rate": 2.203833488136452e-06, "loss": 0.0468, "step": 5220 }, { "epoch": 0.04416204006670748, "grad_norm": 2.820672035217285, "learning_rate": 2.20805539137043e-06, "loss": 0.0555, "step": 5230 }, { "epoch": 0.04424647991387136, "grad_norm": 3.9056007862091064, "learning_rate": 2.2122772946044075e-06, "loss": 0.0447, "step": 5240 }, { "epoch": 0.04433091976103523, "grad_norm": 1.2248703241348267, "learning_rate": 2.216499197838386e-06, "loss": 0.032, "step": 5250 }, { "epoch": 0.04441535960819911, "grad_norm": 0.6535999774932861, "learning_rate": 2.2207211010723637e-06, "loss": 0.0621, "step": 5260 }, { "epoch": 0.04449979945536298, "grad_norm": 2.673907995223999, "learning_rate": 2.2249430043063415e-06, "loss": 0.0458, "step": 5270 }, { "epoch": 0.04458423930252686, "grad_norm": 1.9703853130340576, "learning_rate": 2.2291649075403194e-06, "loss": 0.0582, "step": 5280 }, { "epoch": 0.04466867914969074, "grad_norm": 1.2992054224014282, "learning_rate": 2.2333868107742973e-06, "loss": 0.0266, "step": 5290 }, { "epoch": 0.044753118996854614, "grad_norm": 0.8935354351997375, "learning_rate": 2.237608714008275e-06, "loss": 0.0496, "step": 5300 }, { "epoch": 0.044837558844018494, "grad_norm": 0.5735737085342407, "learning_rate": 2.241830617242253e-06, "loss": 0.0469, "step": 5310 }, { "epoch": 0.044921998691182366, "grad_norm": 1.4430738687515259, "learning_rate": 2.246052520476231e-06, "loss": 0.0435, "step": 5320 }, { "epoch": 0.045006438538346245, "grad_norm": 1.7332866191864014, "learning_rate": 2.2502744237102087e-06, "loss": 0.0609, "step": 5330 }, { "epoch": 0.045090878385510125, "grad_norm": 1.9371541738510132, "learning_rate": 2.2544963269441866e-06, "loss": 0.0334, "step": 5340 }, { "epoch": 0.045175318232674, "grad_norm": 0.9979975819587708, "learning_rate": 2.2587182301781644e-06, "loss": 0.0245, "step": 5350 }, { "epoch": 0.045259758079837877, "grad_norm": 2.3005125522613525, "learning_rate": 2.2629401334121423e-06, "loss": 0.0555, "step": 5360 }, { "epoch": 0.04534419792700175, "grad_norm": 1.5119777917861938, "learning_rate": 2.26716203664612e-06, "loss": 0.0378, "step": 5370 }, { "epoch": 0.04542863777416563, "grad_norm": 1.952648639678955, "learning_rate": 2.2713839398800984e-06, "loss": 0.0474, "step": 5380 }, { "epoch": 0.04551307762132951, "grad_norm": 2.0396101474761963, "learning_rate": 2.275605843114076e-06, "loss": 0.0517, "step": 5390 }, { "epoch": 0.04559751746849338, "grad_norm": 1.2054836750030518, "learning_rate": 2.279827746348054e-06, "loss": 0.0374, "step": 5400 }, { "epoch": 0.04568195731565726, "grad_norm": 1.6000128984451294, "learning_rate": 2.284049649582032e-06, "loss": 0.0373, "step": 5410 }, { "epoch": 0.04576639716282113, "grad_norm": 1.2177733182907104, "learning_rate": 2.2882715528160094e-06, "loss": 0.0404, "step": 5420 }, { "epoch": 0.04585083700998501, "grad_norm": 2.7752039432525635, "learning_rate": 2.2924934560499877e-06, "loss": 0.0367, "step": 5430 }, { "epoch": 0.04593527685714889, "grad_norm": 1.6745631694793701, "learning_rate": 2.296715359283965e-06, "loss": 0.0316, "step": 5440 }, { "epoch": 0.04601971670431276, "grad_norm": 1.5013325214385986, "learning_rate": 2.3009372625179434e-06, "loss": 0.0701, "step": 5450 }, { "epoch": 0.04610415655147664, "grad_norm": 1.4917477369308472, "learning_rate": 2.3051591657519213e-06, "loss": 0.0423, "step": 5460 }, { "epoch": 0.04618859639864052, "grad_norm": 1.8881072998046875, "learning_rate": 2.3093810689858987e-06, "loss": 0.0343, "step": 5470 }, { "epoch": 0.046273036245804394, "grad_norm": 2.3692800998687744, "learning_rate": 2.313602972219877e-06, "loss": 0.0445, "step": 5480 }, { "epoch": 0.046357476092968274, "grad_norm": 1.374211072921753, "learning_rate": 2.317824875453855e-06, "loss": 0.0392, "step": 5490 }, { "epoch": 0.046441915940132146, "grad_norm": 2.071645975112915, "learning_rate": 2.3220467786878327e-06, "loss": 0.0382, "step": 5500 }, { "epoch": 0.046526355787296025, "grad_norm": 0.7311633229255676, "learning_rate": 2.3262686819218106e-06, "loss": 0.0508, "step": 5510 }, { "epoch": 0.046610795634459905, "grad_norm": 1.2912954092025757, "learning_rate": 2.3304905851557884e-06, "loss": 0.0489, "step": 5520 }, { "epoch": 0.04669523548162378, "grad_norm": 3.6130831241607666, "learning_rate": 2.3347124883897663e-06, "loss": 0.0477, "step": 5530 }, { "epoch": 0.04677967532878766, "grad_norm": 1.129448413848877, "learning_rate": 2.338934391623744e-06, "loss": 0.0424, "step": 5540 }, { "epoch": 0.04686411517595153, "grad_norm": 1.3296360969543457, "learning_rate": 2.343156294857722e-06, "loss": 0.035, "step": 5550 }, { "epoch": 0.04694855502311541, "grad_norm": 2.468234062194824, "learning_rate": 2.3473781980917e-06, "loss": 0.0477, "step": 5560 }, { "epoch": 0.04703299487027929, "grad_norm": 2.6571733951568604, "learning_rate": 2.3516001013256777e-06, "loss": 0.0513, "step": 5570 }, { "epoch": 0.04711743471744316, "grad_norm": 1.8609249591827393, "learning_rate": 2.355822004559656e-06, "loss": 0.0398, "step": 5580 }, { "epoch": 0.04720187456460704, "grad_norm": 1.4860527515411377, "learning_rate": 2.3600439077936334e-06, "loss": 0.0511, "step": 5590 }, { "epoch": 0.04728631441177091, "grad_norm": 1.3698259592056274, "learning_rate": 2.3642658110276113e-06, "loss": 0.0409, "step": 5600 }, { "epoch": 0.04737075425893479, "grad_norm": 2.048175573348999, "learning_rate": 2.368487714261589e-06, "loss": 0.0437, "step": 5610 }, { "epoch": 0.04745519410609867, "grad_norm": 1.809330701828003, "learning_rate": 2.372709617495567e-06, "loss": 0.0278, "step": 5620 }, { "epoch": 0.04753963395326254, "grad_norm": 1.890978455543518, "learning_rate": 2.3769315207295453e-06, "loss": 0.0457, "step": 5630 }, { "epoch": 0.04762407380042642, "grad_norm": 1.627073884010315, "learning_rate": 2.3811534239635227e-06, "loss": 0.0353, "step": 5640 }, { "epoch": 0.047708513647590295, "grad_norm": 1.7458840608596802, "learning_rate": 2.385375327197501e-06, "loss": 0.0573, "step": 5650 }, { "epoch": 0.047792953494754174, "grad_norm": 2.147501230239868, "learning_rate": 2.389597230431479e-06, "loss": 0.0488, "step": 5660 }, { "epoch": 0.047877393341918054, "grad_norm": 1.6316269636154175, "learning_rate": 2.3938191336654563e-06, "loss": 0.0436, "step": 5670 }, { "epoch": 0.047961833189081926, "grad_norm": 1.6181169748306274, "learning_rate": 2.3980410368994346e-06, "loss": 0.0279, "step": 5680 }, { "epoch": 0.048046273036245805, "grad_norm": 1.961558222770691, "learning_rate": 2.4022629401334124e-06, "loss": 0.0376, "step": 5690 }, { "epoch": 0.04813071288340968, "grad_norm": 1.5844342708587646, "learning_rate": 2.4064848433673903e-06, "loss": 0.0426, "step": 5700 }, { "epoch": 0.04821515273057356, "grad_norm": 1.7205880880355835, "learning_rate": 2.410706746601368e-06, "loss": 0.0489, "step": 5710 }, { "epoch": 0.04829959257773744, "grad_norm": 1.7258710861206055, "learning_rate": 2.414928649835346e-06, "loss": 0.044, "step": 5720 }, { "epoch": 0.04838403242490131, "grad_norm": 1.3896266222000122, "learning_rate": 2.419150553069324e-06, "loss": 0.0289, "step": 5730 }, { "epoch": 0.04846847227206519, "grad_norm": 1.127782940864563, "learning_rate": 2.4233724563033017e-06, "loss": 0.0362, "step": 5740 }, { "epoch": 0.04855291211922906, "grad_norm": 1.3707879781723022, "learning_rate": 2.4275943595372796e-06, "loss": 0.0368, "step": 5750 }, { "epoch": 0.04863735196639294, "grad_norm": 1.3939543962478638, "learning_rate": 2.4318162627712575e-06, "loss": 0.0437, "step": 5760 }, { "epoch": 0.04872179181355682, "grad_norm": 1.803209662437439, "learning_rate": 2.4360381660052353e-06, "loss": 0.0411, "step": 5770 }, { "epoch": 0.04880623166072069, "grad_norm": 2.7080657482147217, "learning_rate": 2.440260069239213e-06, "loss": 0.0463, "step": 5780 }, { "epoch": 0.04889067150788457, "grad_norm": 2.0822031497955322, "learning_rate": 2.444481972473191e-06, "loss": 0.0426, "step": 5790 }, { "epoch": 0.04897511135504845, "grad_norm": 1.5506500005722046, "learning_rate": 2.448703875707169e-06, "loss": 0.0515, "step": 5800 }, { "epoch": 0.04905955120221232, "grad_norm": 1.2192107439041138, "learning_rate": 2.4529257789411467e-06, "loss": 0.0372, "step": 5810 }, { "epoch": 0.0491439910493762, "grad_norm": 1.4957797527313232, "learning_rate": 2.4571476821751246e-06, "loss": 0.05, "step": 5820 }, { "epoch": 0.049228430896540075, "grad_norm": 2.4473371505737305, "learning_rate": 2.461369585409103e-06, "loss": 0.0345, "step": 5830 }, { "epoch": 0.049312870743703954, "grad_norm": 1.9916613101959229, "learning_rate": 2.4655914886430803e-06, "loss": 0.0634, "step": 5840 }, { "epoch": 0.049397310590867834, "grad_norm": 1.7214759588241577, "learning_rate": 2.469813391877058e-06, "loss": 0.042, "step": 5850 }, { "epoch": 0.049481750438031706, "grad_norm": 1.6266798973083496, "learning_rate": 2.4740352951110365e-06, "loss": 0.042, "step": 5860 }, { "epoch": 0.049566190285195585, "grad_norm": 1.312451720237732, "learning_rate": 2.478257198345014e-06, "loss": 0.0491, "step": 5870 }, { "epoch": 0.04965063013235946, "grad_norm": 1.5920473337173462, "learning_rate": 2.482479101578992e-06, "loss": 0.0536, "step": 5880 }, { "epoch": 0.04973506997952334, "grad_norm": 1.8527082204818726, "learning_rate": 2.48670100481297e-06, "loss": 0.0353, "step": 5890 }, { "epoch": 0.04981950982668722, "grad_norm": 1.404153823852539, "learning_rate": 2.490922908046948e-06, "loss": 0.0446, "step": 5900 }, { "epoch": 0.04990394967385109, "grad_norm": 1.773929238319397, "learning_rate": 2.4951448112809258e-06, "loss": 0.0462, "step": 5910 }, { "epoch": 0.04998838952101497, "grad_norm": 2.0891058444976807, "learning_rate": 2.4993667145149036e-06, "loss": 0.0459, "step": 5920 }, { "epoch": 0.05007282936817884, "grad_norm": 2.489778757095337, "learning_rate": 2.5035886177488815e-06, "loss": 0.0474, "step": 5930 }, { "epoch": 0.05015726921534272, "grad_norm": 1.4530580043792725, "learning_rate": 2.507810520982859e-06, "loss": 0.0431, "step": 5940 }, { "epoch": 0.0502417090625066, "grad_norm": 1.4989279508590698, "learning_rate": 2.512032424216837e-06, "loss": 0.0436, "step": 5950 }, { "epoch": 0.05032614890967047, "grad_norm": 1.7977852821350098, "learning_rate": 2.516254327450815e-06, "loss": 0.0375, "step": 5960 }, { "epoch": 0.05041058875683435, "grad_norm": 2.2467148303985596, "learning_rate": 2.520476230684793e-06, "loss": 0.0496, "step": 5970 }, { "epoch": 0.050495028603998224, "grad_norm": 1.8764207363128662, "learning_rate": 2.5246981339187708e-06, "loss": 0.0438, "step": 5980 }, { "epoch": 0.0505794684511621, "grad_norm": 1.3496068716049194, "learning_rate": 2.528920037152749e-06, "loss": 0.0369, "step": 5990 }, { "epoch": 0.05066390829832598, "grad_norm": 1.2302978038787842, "learning_rate": 2.5331419403867265e-06, "loss": 0.0673, "step": 6000 }, { "epoch": 0.050748348145489855, "grad_norm": 0.6032615303993225, "learning_rate": 2.5373638436207043e-06, "loss": 0.0493, "step": 6010 }, { "epoch": 0.050832787992653734, "grad_norm": 1.6713778972625732, "learning_rate": 2.541585746854682e-06, "loss": 0.0288, "step": 6020 }, { "epoch": 0.05091722783981761, "grad_norm": 1.4589879512786865, "learning_rate": 2.5458076500886605e-06, "loss": 0.0381, "step": 6030 }, { "epoch": 0.051001667686981486, "grad_norm": 0.5533219575881958, "learning_rate": 2.5500295533226383e-06, "loss": 0.0339, "step": 6040 }, { "epoch": 0.051086107534145365, "grad_norm": 1.7422775030136108, "learning_rate": 2.5542514565566158e-06, "loss": 0.0465, "step": 6050 }, { "epoch": 0.05117054738130924, "grad_norm": 1.330998420715332, "learning_rate": 2.5584733597905936e-06, "loss": 0.0404, "step": 6060 }, { "epoch": 0.05125498722847312, "grad_norm": 1.3663759231567383, "learning_rate": 2.5626952630245715e-06, "loss": 0.027, "step": 6070 }, { "epoch": 0.05133942707563699, "grad_norm": 0.9202468991279602, "learning_rate": 2.5669171662585498e-06, "loss": 0.0444, "step": 6080 }, { "epoch": 0.05142386692280087, "grad_norm": 2.062509775161743, "learning_rate": 2.5711390694925276e-06, "loss": 0.0461, "step": 6090 }, { "epoch": 0.05150830676996475, "grad_norm": 2.2407162189483643, "learning_rate": 2.5753609727265055e-06, "loss": 0.0501, "step": 6100 }, { "epoch": 0.05159274661712862, "grad_norm": 0.8327210545539856, "learning_rate": 2.579582875960483e-06, "loss": 0.0471, "step": 6110 }, { "epoch": 0.0516771864642925, "grad_norm": 1.4929258823394775, "learning_rate": 2.5838047791944608e-06, "loss": 0.0325, "step": 6120 }, { "epoch": 0.05176162631145638, "grad_norm": 1.2569879293441772, "learning_rate": 2.588026682428439e-06, "loss": 0.028, "step": 6130 }, { "epoch": 0.05184606615862025, "grad_norm": 2.490823268890381, "learning_rate": 2.592248585662417e-06, "loss": 0.0435, "step": 6140 }, { "epoch": 0.05193050600578413, "grad_norm": 2.6066830158233643, "learning_rate": 2.5964704888963948e-06, "loss": 0.0379, "step": 6150 }, { "epoch": 0.052014945852948004, "grad_norm": 2.273591995239258, "learning_rate": 2.6006923921303726e-06, "loss": 0.0258, "step": 6160 }, { "epoch": 0.05209938570011188, "grad_norm": 1.8001598119735718, "learning_rate": 2.60491429536435e-06, "loss": 0.0367, "step": 6170 }, { "epoch": 0.05218382554727576, "grad_norm": 2.444659948348999, "learning_rate": 2.6091361985983284e-06, "loss": 0.0357, "step": 6180 }, { "epoch": 0.052268265394439635, "grad_norm": 1.0890159606933594, "learning_rate": 2.613358101832306e-06, "loss": 0.0385, "step": 6190 }, { "epoch": 0.052352705241603514, "grad_norm": 2.418789863586426, "learning_rate": 2.617580005066284e-06, "loss": 0.0493, "step": 6200 }, { "epoch": 0.05243714508876739, "grad_norm": 2.028454542160034, "learning_rate": 2.6218019083002624e-06, "loss": 0.0349, "step": 6210 }, { "epoch": 0.052521584935931266, "grad_norm": 1.6064870357513428, "learning_rate": 2.6260238115342398e-06, "loss": 0.0339, "step": 6220 }, { "epoch": 0.052606024783095146, "grad_norm": 2.3546268939971924, "learning_rate": 2.6302457147682176e-06, "loss": 0.0433, "step": 6230 }, { "epoch": 0.05269046463025902, "grad_norm": 1.5606986284255981, "learning_rate": 2.6344676180021955e-06, "loss": 0.0414, "step": 6240 }, { "epoch": 0.0527749044774229, "grad_norm": 0.9160112738609314, "learning_rate": 2.6386895212361734e-06, "loss": 0.0534, "step": 6250 }, { "epoch": 0.05285934432458677, "grad_norm": 0.9366395473480225, "learning_rate": 2.6429114244701516e-06, "loss": 0.0325, "step": 6260 }, { "epoch": 0.05294378417175065, "grad_norm": 1.1781346797943115, "learning_rate": 2.6471333277041295e-06, "loss": 0.0418, "step": 6270 }, { "epoch": 0.05302822401891453, "grad_norm": 1.5923813581466675, "learning_rate": 2.651355230938107e-06, "loss": 0.0303, "step": 6280 }, { "epoch": 0.0531126638660784, "grad_norm": 1.3025888204574585, "learning_rate": 2.655577134172085e-06, "loss": 0.034, "step": 6290 }, { "epoch": 0.05319710371324228, "grad_norm": 1.5970972776412964, "learning_rate": 2.6597990374060627e-06, "loss": 0.0433, "step": 6300 }, { "epoch": 0.05328154356040615, "grad_norm": 1.1294317245483398, "learning_rate": 2.664020940640041e-06, "loss": 0.0434, "step": 6310 }, { "epoch": 0.05336598340757003, "grad_norm": 1.5970070362091064, "learning_rate": 2.668242843874019e-06, "loss": 0.0264, "step": 6320 }, { "epoch": 0.05345042325473391, "grad_norm": 2.785287380218506, "learning_rate": 2.6724647471079967e-06, "loss": 0.0449, "step": 6330 }, { "epoch": 0.053534863101897784, "grad_norm": 1.44333016872406, "learning_rate": 2.676686650341974e-06, "loss": 0.0323, "step": 6340 }, { "epoch": 0.05361930294906166, "grad_norm": 1.1812759637832642, "learning_rate": 2.680908553575952e-06, "loss": 0.0403, "step": 6350 }, { "epoch": 0.053703742796225536, "grad_norm": 1.4968843460083008, "learning_rate": 2.6851304568099302e-06, "loss": 0.0237, "step": 6360 }, { "epoch": 0.053788182643389415, "grad_norm": 2.0035436153411865, "learning_rate": 2.689352360043908e-06, "loss": 0.0423, "step": 6370 }, { "epoch": 0.053872622490553294, "grad_norm": 2.0883610248565674, "learning_rate": 2.693574263277886e-06, "loss": 0.042, "step": 6380 }, { "epoch": 0.05395706233771717, "grad_norm": 1.2580931186676025, "learning_rate": 2.6977961665118642e-06, "loss": 0.043, "step": 6390 }, { "epoch": 0.054041502184881046, "grad_norm": 1.7601640224456787, "learning_rate": 2.7020180697458417e-06, "loss": 0.0472, "step": 6400 }, { "epoch": 0.05412594203204492, "grad_norm": 1.0406180620193481, "learning_rate": 2.7062399729798195e-06, "loss": 0.0356, "step": 6410 }, { "epoch": 0.0542103818792088, "grad_norm": 2.0920355319976807, "learning_rate": 2.7104618762137974e-06, "loss": 0.0378, "step": 6420 }, { "epoch": 0.05429482172637268, "grad_norm": 2.3293986320495605, "learning_rate": 2.7146837794477752e-06, "loss": 0.0369, "step": 6430 }, { "epoch": 0.05437926157353655, "grad_norm": 1.7952420711517334, "learning_rate": 2.7189056826817535e-06, "loss": 0.0367, "step": 6440 }, { "epoch": 0.05446370142070043, "grad_norm": 1.1982258558273315, "learning_rate": 2.723127585915731e-06, "loss": 0.0398, "step": 6450 }, { "epoch": 0.05454814126786431, "grad_norm": 1.2871631383895874, "learning_rate": 2.727349489149709e-06, "loss": 0.0429, "step": 6460 }, { "epoch": 0.05463258111502818, "grad_norm": 1.7134239673614502, "learning_rate": 2.7315713923836867e-06, "loss": 0.0486, "step": 6470 }, { "epoch": 0.05471702096219206, "grad_norm": 2.2121591567993164, "learning_rate": 2.7357932956176645e-06, "loss": 0.0565, "step": 6480 }, { "epoch": 0.05480146080935593, "grad_norm": 1.5060211420059204, "learning_rate": 2.740015198851643e-06, "loss": 0.0374, "step": 6490 }, { "epoch": 0.05488590065651981, "grad_norm": 1.467221975326538, "learning_rate": 2.7442371020856207e-06, "loss": 0.0281, "step": 6500 }, { "epoch": 0.05497034050368369, "grad_norm": 0.6569867730140686, "learning_rate": 2.748459005319598e-06, "loss": 0.0445, "step": 6510 }, { "epoch": 0.055054780350847564, "grad_norm": 0.8097078204154968, "learning_rate": 2.752680908553576e-06, "loss": 0.0337, "step": 6520 }, { "epoch": 0.05513922019801144, "grad_norm": 0.977865993976593, "learning_rate": 2.7569028117875542e-06, "loss": 0.0386, "step": 6530 }, { "epoch": 0.055223660045175316, "grad_norm": 1.218598484992981, "learning_rate": 2.761124715021532e-06, "loss": 0.0354, "step": 6540 }, { "epoch": 0.055308099892339195, "grad_norm": 1.5361019372940063, "learning_rate": 2.76534661825551e-06, "loss": 0.0267, "step": 6550 }, { "epoch": 0.055392539739503074, "grad_norm": 1.360234022140503, "learning_rate": 2.769568521489488e-06, "loss": 0.0436, "step": 6560 }, { "epoch": 0.05547697958666695, "grad_norm": 4.310755729675293, "learning_rate": 2.7737904247234653e-06, "loss": 0.0426, "step": 6570 }, { "epoch": 0.055561419433830826, "grad_norm": 1.4447271823883057, "learning_rate": 2.7780123279574435e-06, "loss": 0.0249, "step": 6580 }, { "epoch": 0.0556458592809947, "grad_norm": 0.7529255747795105, "learning_rate": 2.7822342311914214e-06, "loss": 0.0317, "step": 6590 }, { "epoch": 0.05573029912815858, "grad_norm": 2.147193670272827, "learning_rate": 2.7864561344253993e-06, "loss": 0.0331, "step": 6600 }, { "epoch": 0.05581473897532246, "grad_norm": 1.3467718362808228, "learning_rate": 2.790678037659377e-06, "loss": 0.0324, "step": 6610 }, { "epoch": 0.05589917882248633, "grad_norm": 2.073678493499756, "learning_rate": 2.7948999408933545e-06, "loss": 0.04, "step": 6620 }, { "epoch": 0.05598361866965021, "grad_norm": 1.7245019674301147, "learning_rate": 2.799121844127333e-06, "loss": 0.0306, "step": 6630 }, { "epoch": 0.05606805851681408, "grad_norm": 1.416350245475769, "learning_rate": 2.8033437473613107e-06, "loss": 0.0439, "step": 6640 }, { "epoch": 0.05615249836397796, "grad_norm": 4.038002967834473, "learning_rate": 2.8075656505952885e-06, "loss": 0.0543, "step": 6650 }, { "epoch": 0.05623693821114184, "grad_norm": 1.0427507162094116, "learning_rate": 2.8117875538292664e-06, "loss": 0.0222, "step": 6660 }, { "epoch": 0.05632137805830571, "grad_norm": 2.075561761856079, "learning_rate": 2.8160094570632447e-06, "loss": 0.0328, "step": 6670 }, { "epoch": 0.05640581790546959, "grad_norm": 1.4701448678970337, "learning_rate": 2.820231360297222e-06, "loss": 0.0323, "step": 6680 }, { "epoch": 0.056490257752633465, "grad_norm": 1.7703927755355835, "learning_rate": 2.8244532635312e-06, "loss": 0.0298, "step": 6690 }, { "epoch": 0.056574697599797344, "grad_norm": 2.0032877922058105, "learning_rate": 2.828675166765178e-06, "loss": 0.0499, "step": 6700 }, { "epoch": 0.05665913744696122, "grad_norm": 1.3141937255859375, "learning_rate": 2.832897069999156e-06, "loss": 0.0367, "step": 6710 }, { "epoch": 0.056743577294125096, "grad_norm": 1.630948781967163, "learning_rate": 2.837118973233134e-06, "loss": 0.0456, "step": 6720 }, { "epoch": 0.056828017141288975, "grad_norm": 0.934062123298645, "learning_rate": 2.841340876467112e-06, "loss": 0.0347, "step": 6730 }, { "epoch": 0.05691245698845285, "grad_norm": 1.7708605527877808, "learning_rate": 2.8455627797010893e-06, "loss": 0.044, "step": 6740 }, { "epoch": 0.05699689683561673, "grad_norm": 1.0600682497024536, "learning_rate": 2.849784682935067e-06, "loss": 0.036, "step": 6750 }, { "epoch": 0.057081336682780606, "grad_norm": 1.3801698684692383, "learning_rate": 2.8540065861690454e-06, "loss": 0.0503, "step": 6760 }, { "epoch": 0.05716577652994448, "grad_norm": 1.0020811557769775, "learning_rate": 2.8582284894030233e-06, "loss": 0.0279, "step": 6770 }, { "epoch": 0.05725021637710836, "grad_norm": 1.3321465253829956, "learning_rate": 2.862450392637001e-06, "loss": 0.029, "step": 6780 }, { "epoch": 0.05733465622427224, "grad_norm": 0.6473193168640137, "learning_rate": 2.8666722958709786e-06, "loss": 0.032, "step": 6790 }, { "epoch": 0.05741909607143611, "grad_norm": 2.0596601963043213, "learning_rate": 2.8708941991049564e-06, "loss": 0.0444, "step": 6800 }, { "epoch": 0.05750353591859999, "grad_norm": 1.9895679950714111, "learning_rate": 2.8751161023389347e-06, "loss": 0.0276, "step": 6810 }, { "epoch": 0.05758797576576386, "grad_norm": 0.8321904540061951, "learning_rate": 2.8793380055729126e-06, "loss": 0.0366, "step": 6820 }, { "epoch": 0.05767241561292774, "grad_norm": 1.1693270206451416, "learning_rate": 2.8835599088068904e-06, "loss": 0.0446, "step": 6830 }, { "epoch": 0.05775685546009162, "grad_norm": 1.2172396183013916, "learning_rate": 2.8877818120408687e-06, "loss": 0.04, "step": 6840 }, { "epoch": 0.05784129530725549, "grad_norm": 1.2020337581634521, "learning_rate": 2.8920037152748457e-06, "loss": 0.041, "step": 6850 }, { "epoch": 0.05792573515441937, "grad_norm": 1.8444737195968628, "learning_rate": 2.896225618508824e-06, "loss": 0.0357, "step": 6860 }, { "epoch": 0.058010175001583245, "grad_norm": 1.621745228767395, "learning_rate": 2.900447521742802e-06, "loss": 0.0345, "step": 6870 }, { "epoch": 0.058094614848747124, "grad_norm": 1.061902642250061, "learning_rate": 2.9046694249767797e-06, "loss": 0.0438, "step": 6880 }, { "epoch": 0.058179054695911, "grad_norm": 1.6105414628982544, "learning_rate": 2.908891328210758e-06, "loss": 0.0371, "step": 6890 }, { "epoch": 0.058263494543074876, "grad_norm": 1.4002681970596313, "learning_rate": 2.913113231444736e-06, "loss": 0.0292, "step": 6900 }, { "epoch": 0.058347934390238755, "grad_norm": 1.5615251064300537, "learning_rate": 2.9173351346787133e-06, "loss": 0.0383, "step": 6910 }, { "epoch": 0.05843237423740263, "grad_norm": 1.8238232135772705, "learning_rate": 2.921557037912691e-06, "loss": 0.0293, "step": 6920 }, { "epoch": 0.05851681408456651, "grad_norm": 1.010703682899475, "learning_rate": 2.925778941146669e-06, "loss": 0.0565, "step": 6930 }, { "epoch": 0.058601253931730386, "grad_norm": 0.44107741117477417, "learning_rate": 2.9300008443806473e-06, "loss": 0.0254, "step": 6940 }, { "epoch": 0.05868569377889426, "grad_norm": 1.747601866722107, "learning_rate": 2.934222747614625e-06, "loss": 0.0325, "step": 6950 }, { "epoch": 0.05877013362605814, "grad_norm": 1.0768909454345703, "learning_rate": 2.9384446508486026e-06, "loss": 0.0358, "step": 6960 }, { "epoch": 0.05885457347322201, "grad_norm": 0.968361496925354, "learning_rate": 2.9426665540825804e-06, "loss": 0.0218, "step": 6970 }, { "epoch": 0.05893901332038589, "grad_norm": 2.391749858856201, "learning_rate": 2.9468884573165583e-06, "loss": 0.0387, "step": 6980 }, { "epoch": 0.05902345316754977, "grad_norm": 1.3572393655776978, "learning_rate": 2.9511103605505366e-06, "loss": 0.0406, "step": 6990 }, { "epoch": 0.05910789301471364, "grad_norm": 3.4984021186828613, "learning_rate": 2.9553322637845144e-06, "loss": 0.0337, "step": 7000 }, { "epoch": 0.05919233286187752, "grad_norm": 1.255326271057129, "learning_rate": 2.9595541670184923e-06, "loss": 0.0287, "step": 7010 }, { "epoch": 0.05927677270904139, "grad_norm": 1.0739736557006836, "learning_rate": 2.9637760702524697e-06, "loss": 0.025, "step": 7020 }, { "epoch": 0.05936121255620527, "grad_norm": 1.3372535705566406, "learning_rate": 2.967997973486448e-06, "loss": 0.0388, "step": 7030 }, { "epoch": 0.05944565240336915, "grad_norm": 1.8678152561187744, "learning_rate": 2.972219876720426e-06, "loss": 0.0262, "step": 7040 }, { "epoch": 0.059530092250533025, "grad_norm": 1.318699836730957, "learning_rate": 2.9764417799544037e-06, "loss": 0.0363, "step": 7050 }, { "epoch": 0.059614532097696904, "grad_norm": 0.5639051795005798, "learning_rate": 2.9806636831883816e-06, "loss": 0.0416, "step": 7060 }, { "epoch": 0.059698971944860776, "grad_norm": 0.5559428334236145, "learning_rate": 2.98488558642236e-06, "loss": 0.0299, "step": 7070 }, { "epoch": 0.059783411792024656, "grad_norm": 1.9380468130111694, "learning_rate": 2.9891074896563373e-06, "loss": 0.041, "step": 7080 }, { "epoch": 0.059867851639188535, "grad_norm": 1.3426811695098877, "learning_rate": 2.993329392890315e-06, "loss": 0.0345, "step": 7090 }, { "epoch": 0.05995229148635241, "grad_norm": 0.41680029034614563, "learning_rate": 2.997551296124293e-06, "loss": 0.0245, "step": 7100 }, { "epoch": 0.06003673133351629, "grad_norm": 1.422370195388794, "learning_rate": 3.001773199358271e-06, "loss": 0.0422, "step": 7110 }, { "epoch": 0.060121171180680166, "grad_norm": 1.6220464706420898, "learning_rate": 3.005995102592249e-06, "loss": 0.0327, "step": 7120 }, { "epoch": 0.06020561102784404, "grad_norm": 1.059643268585205, "learning_rate": 3.0102170058262266e-06, "loss": 0.0213, "step": 7130 }, { "epoch": 0.06029005087500792, "grad_norm": 1.792641043663025, "learning_rate": 3.0144389090602045e-06, "loss": 0.0362, "step": 7140 }, { "epoch": 0.06037449072217179, "grad_norm": 1.7370243072509766, "learning_rate": 3.0186608122941823e-06, "loss": 0.0326, "step": 7150 }, { "epoch": 0.06045893056933567, "grad_norm": 1.9471403360366821, "learning_rate": 3.02288271552816e-06, "loss": 0.0333, "step": 7160 }, { "epoch": 0.06054337041649955, "grad_norm": 1.474241852760315, "learning_rate": 3.0271046187621385e-06, "loss": 0.0451, "step": 7170 }, { "epoch": 0.06062781026366342, "grad_norm": 1.4035919904708862, "learning_rate": 3.0313265219961163e-06, "loss": 0.0292, "step": 7180 }, { "epoch": 0.0607122501108273, "grad_norm": 0.8702583312988281, "learning_rate": 3.0355484252300937e-06, "loss": 0.0375, "step": 7190 }, { "epoch": 0.060796689957991173, "grad_norm": 1.2629834413528442, "learning_rate": 3.0397703284640716e-06, "loss": 0.0456, "step": 7200 }, { "epoch": 0.06088112980515505, "grad_norm": 0.9023465514183044, "learning_rate": 3.04399223169805e-06, "loss": 0.0349, "step": 7210 }, { "epoch": 0.06096556965231893, "grad_norm": 2.820183753967285, "learning_rate": 3.0482141349320277e-06, "loss": 0.0501, "step": 7220 }, { "epoch": 0.061050009499482805, "grad_norm": 1.5745785236358643, "learning_rate": 3.0524360381660056e-06, "loss": 0.0272, "step": 7230 }, { "epoch": 0.061134449346646684, "grad_norm": 1.8576788902282715, "learning_rate": 3.0566579413999835e-06, "loss": 0.0547, "step": 7240 }, { "epoch": 0.061218889193810556, "grad_norm": 1.9387233257293701, "learning_rate": 3.060879844633961e-06, "loss": 0.0372, "step": 7250 }, { "epoch": 0.061303329040974436, "grad_norm": 0.9600934982299805, "learning_rate": 3.065101747867939e-06, "loss": 0.0302, "step": 7260 }, { "epoch": 0.061387768888138315, "grad_norm": 1.1251429319381714, "learning_rate": 3.069323651101917e-06, "loss": 0.029, "step": 7270 }, { "epoch": 0.06147220873530219, "grad_norm": 1.8234447240829468, "learning_rate": 3.073545554335895e-06, "loss": 0.0375, "step": 7280 }, { "epoch": 0.06155664858246607, "grad_norm": 1.3942089080810547, "learning_rate": 3.0777674575698728e-06, "loss": 0.0529, "step": 7290 }, { "epoch": 0.06164108842962994, "grad_norm": 0.6867241263389587, "learning_rate": 3.08198936080385e-06, "loss": 0.0346, "step": 7300 }, { "epoch": 0.06172552827679382, "grad_norm": 1.3369264602661133, "learning_rate": 3.0862112640378285e-06, "loss": 0.0252, "step": 7310 }, { "epoch": 0.0618099681239577, "grad_norm": 1.0160303115844727, "learning_rate": 3.0904331672718063e-06, "loss": 0.0197, "step": 7320 }, { "epoch": 0.06189440797112157, "grad_norm": 1.128499150276184, "learning_rate": 3.094655070505784e-06, "loss": 0.0355, "step": 7330 }, { "epoch": 0.06197884781828545, "grad_norm": 0.8548903465270996, "learning_rate": 3.0988769737397625e-06, "loss": 0.0266, "step": 7340 }, { "epoch": 0.06206328766544932, "grad_norm": 1.3641325235366821, "learning_rate": 3.1030988769737403e-06, "loss": 0.0406, "step": 7350 }, { "epoch": 0.0621477275126132, "grad_norm": 1.5321522951126099, "learning_rate": 3.1073207802077178e-06, "loss": 0.031, "step": 7360 }, { "epoch": 0.06223216735977708, "grad_norm": 1.7474113702774048, "learning_rate": 3.1115426834416956e-06, "loss": 0.0625, "step": 7370 }, { "epoch": 0.062316607206940954, "grad_norm": 1.8888853788375854, "learning_rate": 3.1157645866756735e-06, "loss": 0.0227, "step": 7380 }, { "epoch": 0.06240104705410483, "grad_norm": 1.568772315979004, "learning_rate": 3.1199864899096518e-06, "loss": 0.0255, "step": 7390 }, { "epoch": 0.062485486901268705, "grad_norm": 1.2359474897384644, "learning_rate": 3.1242083931436296e-06, "loss": 0.0355, "step": 7400 }, { "epoch": 0.06256992674843259, "grad_norm": 2.2589378356933594, "learning_rate": 3.1284302963776075e-06, "loss": 0.0282, "step": 7410 }, { "epoch": 0.06265436659559646, "grad_norm": 1.1864666938781738, "learning_rate": 3.132652199611585e-06, "loss": 0.0214, "step": 7420 }, { "epoch": 0.06273880644276034, "grad_norm": 0.6748795509338379, "learning_rate": 3.1368741028455628e-06, "loss": 0.0346, "step": 7430 }, { "epoch": 0.06282324628992421, "grad_norm": 0.9155420660972595, "learning_rate": 3.141096006079541e-06, "loss": 0.0555, "step": 7440 }, { "epoch": 0.0629076861370881, "grad_norm": 1.414878487586975, "learning_rate": 3.145317909313519e-06, "loss": 0.0339, "step": 7450 }, { "epoch": 0.06299212598425197, "grad_norm": 1.7581746578216553, "learning_rate": 3.1495398125474968e-06, "loss": 0.0309, "step": 7460 }, { "epoch": 0.06307656583141584, "grad_norm": 1.7755396366119385, "learning_rate": 3.153761715781474e-06, "loss": 0.0342, "step": 7470 }, { "epoch": 0.06316100567857973, "grad_norm": 1.9103107452392578, "learning_rate": 3.157983619015452e-06, "loss": 0.0234, "step": 7480 }, { "epoch": 0.0632454455257436, "grad_norm": 1.9388128519058228, "learning_rate": 3.1622055222494303e-06, "loss": 0.0349, "step": 7490 }, { "epoch": 0.06332988537290747, "grad_norm": 0.9188522100448608, "learning_rate": 3.166427425483408e-06, "loss": 0.0356, "step": 7500 }, { "epoch": 0.06341432522007136, "grad_norm": 3.6329703330993652, "learning_rate": 3.170649328717386e-06, "loss": 0.0473, "step": 7510 }, { "epoch": 0.06349876506723523, "grad_norm": 0.8670021891593933, "learning_rate": 3.1748712319513643e-06, "loss": 0.0329, "step": 7520 }, { "epoch": 0.0635832049143991, "grad_norm": 1.4211503267288208, "learning_rate": 3.1790931351853418e-06, "loss": 0.0395, "step": 7530 }, { "epoch": 0.06366764476156297, "grad_norm": 2.9066643714904785, "learning_rate": 3.1833150384193196e-06, "loss": 0.0389, "step": 7540 }, { "epoch": 0.06375208460872686, "grad_norm": 1.4982432126998901, "learning_rate": 3.1875369416532975e-06, "loss": 0.0526, "step": 7550 }, { "epoch": 0.06383652445589073, "grad_norm": 0.5621861219406128, "learning_rate": 3.1917588448872754e-06, "loss": 0.0256, "step": 7560 }, { "epoch": 0.0639209643030546, "grad_norm": 1.147916316986084, "learning_rate": 3.1959807481212536e-06, "loss": 0.0256, "step": 7570 }, { "epoch": 0.06400540415021849, "grad_norm": 0.6914950013160706, "learning_rate": 3.2002026513552315e-06, "loss": 0.0341, "step": 7580 }, { "epoch": 0.06408984399738236, "grad_norm": 2.0282435417175293, "learning_rate": 3.204424554589209e-06, "loss": 0.0556, "step": 7590 }, { "epoch": 0.06417428384454624, "grad_norm": 1.4294661283493042, "learning_rate": 3.2086464578231868e-06, "loss": 0.0456, "step": 7600 }, { "epoch": 0.06425872369171012, "grad_norm": 1.2065900564193726, "learning_rate": 3.2128683610571646e-06, "loss": 0.0385, "step": 7610 }, { "epoch": 0.064343163538874, "grad_norm": 1.0024211406707764, "learning_rate": 3.217090264291143e-06, "loss": 0.0216, "step": 7620 }, { "epoch": 0.06442760338603787, "grad_norm": 1.759974479675293, "learning_rate": 3.2213121675251208e-06, "loss": 0.0431, "step": 7630 }, { "epoch": 0.06451204323320175, "grad_norm": 0.9854244589805603, "learning_rate": 3.2255340707590982e-06, "loss": 0.0355, "step": 7640 }, { "epoch": 0.06459648308036563, "grad_norm": 1.7209686040878296, "learning_rate": 3.229755973993076e-06, "loss": 0.0476, "step": 7650 }, { "epoch": 0.0646809229275295, "grad_norm": 0.7778801321983337, "learning_rate": 3.233977877227054e-06, "loss": 0.0249, "step": 7660 }, { "epoch": 0.06476536277469337, "grad_norm": 1.4547507762908936, "learning_rate": 3.2381997804610322e-06, "loss": 0.031, "step": 7670 }, { "epoch": 0.06484980262185726, "grad_norm": 0.6578739881515503, "learning_rate": 3.24242168369501e-06, "loss": 0.0339, "step": 7680 }, { "epoch": 0.06493424246902113, "grad_norm": 1.4524853229522705, "learning_rate": 3.246643586928988e-06, "loss": 0.0444, "step": 7690 }, { "epoch": 0.065018682316185, "grad_norm": 2.221374750137329, "learning_rate": 3.2508654901629654e-06, "loss": 0.0406, "step": 7700 }, { "epoch": 0.06510312216334889, "grad_norm": 1.3417779207229614, "learning_rate": 3.2550873933969437e-06, "loss": 0.039, "step": 7710 }, { "epoch": 0.06518756201051276, "grad_norm": 1.5256142616271973, "learning_rate": 3.2593092966309215e-06, "loss": 0.0422, "step": 7720 }, { "epoch": 0.06527200185767663, "grad_norm": 1.3581523895263672, "learning_rate": 3.2635311998648994e-06, "loss": 0.0348, "step": 7730 }, { "epoch": 0.06535644170484052, "grad_norm": 1.6558748483657837, "learning_rate": 3.2677531030988772e-06, "loss": 0.039, "step": 7740 }, { "epoch": 0.06544088155200439, "grad_norm": 1.3180819749832153, "learning_rate": 3.2719750063328555e-06, "loss": 0.0416, "step": 7750 }, { "epoch": 0.06552532139916827, "grad_norm": 2.5654072761535645, "learning_rate": 3.276196909566833e-06, "loss": 0.0493, "step": 7760 }, { "epoch": 0.06560976124633214, "grad_norm": 1.3817024230957031, "learning_rate": 3.280418812800811e-06, "loss": 0.0348, "step": 7770 }, { "epoch": 0.06569420109349602, "grad_norm": 3.4309115409851074, "learning_rate": 3.2846407160347887e-06, "loss": 0.0399, "step": 7780 }, { "epoch": 0.0657786409406599, "grad_norm": 1.1456453800201416, "learning_rate": 3.2888626192687665e-06, "loss": 0.027, "step": 7790 }, { "epoch": 0.06586308078782377, "grad_norm": 1.3093684911727905, "learning_rate": 3.293084522502745e-06, "loss": 0.0409, "step": 7800 }, { "epoch": 0.06594752063498766, "grad_norm": 2.182934284210205, "learning_rate": 3.2973064257367227e-06, "loss": 0.0293, "step": 7810 }, { "epoch": 0.06603196048215153, "grad_norm": 1.4152977466583252, "learning_rate": 3.3015283289707e-06, "loss": 0.0275, "step": 7820 }, { "epoch": 0.0661164003293154, "grad_norm": 1.9728955030441284, "learning_rate": 3.305750232204678e-06, "loss": 0.0375, "step": 7830 }, { "epoch": 0.06620084017647929, "grad_norm": 1.7280434370040894, "learning_rate": 3.3099721354386562e-06, "loss": 0.0364, "step": 7840 }, { "epoch": 0.06628528002364316, "grad_norm": 1.7358589172363281, "learning_rate": 3.314194038672634e-06, "loss": 0.024, "step": 7850 }, { "epoch": 0.06636971987080703, "grad_norm": 1.6330993175506592, "learning_rate": 3.318415941906612e-06, "loss": 0.051, "step": 7860 }, { "epoch": 0.0664541597179709, "grad_norm": 1.1092965602874756, "learning_rate": 3.3226378451405894e-06, "loss": 0.0283, "step": 7870 }, { "epoch": 0.06653859956513479, "grad_norm": 0.9942919015884399, "learning_rate": 3.3268597483745672e-06, "loss": 0.0353, "step": 7880 }, { "epoch": 0.06662303941229866, "grad_norm": 0.9500750303268433, "learning_rate": 3.3310816516085455e-06, "loss": 0.0371, "step": 7890 }, { "epoch": 0.06670747925946253, "grad_norm": 1.4533112049102783, "learning_rate": 3.3353035548425234e-06, "loss": 0.0443, "step": 7900 }, { "epoch": 0.06679191910662642, "grad_norm": 1.4364049434661865, "learning_rate": 3.3395254580765012e-06, "loss": 0.0383, "step": 7910 }, { "epoch": 0.0668763589537903, "grad_norm": 1.6144258975982666, "learning_rate": 3.343747361310479e-06, "loss": 0.0264, "step": 7920 }, { "epoch": 0.06696079880095417, "grad_norm": 0.7946205735206604, "learning_rate": 3.3479692645444565e-06, "loss": 0.0374, "step": 7930 }, { "epoch": 0.06704523864811805, "grad_norm": 1.0147744417190552, "learning_rate": 3.352191167778435e-06, "loss": 0.0409, "step": 7940 }, { "epoch": 0.06712967849528192, "grad_norm": 0.9254303574562073, "learning_rate": 3.3564130710124127e-06, "loss": 0.0382, "step": 7950 }, { "epoch": 0.0672141183424458, "grad_norm": 1.8416476249694824, "learning_rate": 3.3606349742463905e-06, "loss": 0.0351, "step": 7960 }, { "epoch": 0.06729855818960968, "grad_norm": 1.1777739524841309, "learning_rate": 3.3648568774803684e-06, "loss": 0.0418, "step": 7970 }, { "epoch": 0.06738299803677356, "grad_norm": 1.7282299995422363, "learning_rate": 3.3690787807143467e-06, "loss": 0.0366, "step": 7980 }, { "epoch": 0.06746743788393743, "grad_norm": 0.8800182342529297, "learning_rate": 3.373300683948324e-06, "loss": 0.0311, "step": 7990 }, { "epoch": 0.0675518777311013, "grad_norm": 0.4361768364906311, "learning_rate": 3.377522587182302e-06, "loss": 0.0173, "step": 8000 }, { "epoch": 0.06763631757826519, "grad_norm": 1.0782191753387451, "learning_rate": 3.38174449041628e-06, "loss": 0.0363, "step": 8010 }, { "epoch": 0.06772075742542906, "grad_norm": 1.506798505783081, "learning_rate": 3.385966393650258e-06, "loss": 0.0283, "step": 8020 }, { "epoch": 0.06780519727259293, "grad_norm": 1.3879339694976807, "learning_rate": 3.390188296884236e-06, "loss": 0.0273, "step": 8030 }, { "epoch": 0.06788963711975682, "grad_norm": 1.7458670139312744, "learning_rate": 3.3944102001182134e-06, "loss": 0.0419, "step": 8040 }, { "epoch": 0.06797407696692069, "grad_norm": 1.3982877731323242, "learning_rate": 3.3986321033521913e-06, "loss": 0.0377, "step": 8050 }, { "epoch": 0.06805851681408456, "grad_norm": 1.056390643119812, "learning_rate": 3.402854006586169e-06, "loss": 0.0313, "step": 8060 }, { "epoch": 0.06814295666124845, "grad_norm": 1.7773302793502808, "learning_rate": 3.4070759098201474e-06, "loss": 0.0316, "step": 8070 }, { "epoch": 0.06822739650841232, "grad_norm": 1.658202052116394, "learning_rate": 3.4112978130541253e-06, "loss": 0.0319, "step": 8080 }, { "epoch": 0.0683118363555762, "grad_norm": 1.7863534688949585, "learning_rate": 3.415519716288103e-06, "loss": 0.0348, "step": 8090 }, { "epoch": 0.06839627620274007, "grad_norm": 1.3309413194656372, "learning_rate": 3.4197416195220806e-06, "loss": 0.0271, "step": 8100 }, { "epoch": 0.06848071604990395, "grad_norm": 1.7432198524475098, "learning_rate": 3.4239635227560584e-06, "loss": 0.0337, "step": 8110 }, { "epoch": 0.06856515589706783, "grad_norm": 0.49498018622398376, "learning_rate": 3.4281854259900367e-06, "loss": 0.033, "step": 8120 }, { "epoch": 0.0686495957442317, "grad_norm": 1.4118764400482178, "learning_rate": 3.4324073292240146e-06, "loss": 0.0317, "step": 8130 }, { "epoch": 0.06873403559139558, "grad_norm": 0.9648473858833313, "learning_rate": 3.4366292324579924e-06, "loss": 0.0371, "step": 8140 }, { "epoch": 0.06881847543855946, "grad_norm": 0.6265752911567688, "learning_rate": 3.4408511356919707e-06, "loss": 0.0456, "step": 8150 }, { "epoch": 0.06890291528572333, "grad_norm": 1.873292088508606, "learning_rate": 3.4450730389259477e-06, "loss": 0.0393, "step": 8160 }, { "epoch": 0.06898735513288722, "grad_norm": 1.1310110092163086, "learning_rate": 3.449294942159926e-06, "loss": 0.0389, "step": 8170 }, { "epoch": 0.06907179498005109, "grad_norm": 0.8032477498054504, "learning_rate": 3.453516845393904e-06, "loss": 0.0313, "step": 8180 }, { "epoch": 0.06915623482721496, "grad_norm": 1.5284579992294312, "learning_rate": 3.4577387486278817e-06, "loss": 0.0406, "step": 8190 }, { "epoch": 0.06924067467437883, "grad_norm": 0.8510813117027283, "learning_rate": 3.46196065186186e-06, "loss": 0.0355, "step": 8200 }, { "epoch": 0.06932511452154272, "grad_norm": 1.6467934846878052, "learning_rate": 3.4661825550958374e-06, "loss": 0.0391, "step": 8210 }, { "epoch": 0.06940955436870659, "grad_norm": 1.1861796379089355, "learning_rate": 3.4704044583298153e-06, "loss": 0.0265, "step": 8220 }, { "epoch": 0.06949399421587046, "grad_norm": 1.5668269395828247, "learning_rate": 3.474626361563793e-06, "loss": 0.0393, "step": 8230 }, { "epoch": 0.06957843406303435, "grad_norm": 1.5832464694976807, "learning_rate": 3.478848264797771e-06, "loss": 0.0345, "step": 8240 }, { "epoch": 0.06966287391019822, "grad_norm": 3.014484167098999, "learning_rate": 3.4830701680317493e-06, "loss": 0.042, "step": 8250 }, { "epoch": 0.0697473137573621, "grad_norm": 0.800914466381073, "learning_rate": 3.487292071265727e-06, "loss": 0.0366, "step": 8260 }, { "epoch": 0.06983175360452598, "grad_norm": 1.376441240310669, "learning_rate": 3.4915139744997046e-06, "loss": 0.035, "step": 8270 }, { "epoch": 0.06991619345168985, "grad_norm": 1.788270115852356, "learning_rate": 3.4957358777336824e-06, "loss": 0.0386, "step": 8280 }, { "epoch": 0.07000063329885373, "grad_norm": 1.8638744354248047, "learning_rate": 3.4999577809676603e-06, "loss": 0.0218, "step": 8290 }, { "epoch": 0.07008507314601761, "grad_norm": 0.34230032563209534, "learning_rate": 3.5041796842016386e-06, "loss": 0.0347, "step": 8300 }, { "epoch": 0.07016951299318148, "grad_norm": 0.8869764804840088, "learning_rate": 3.5084015874356164e-06, "loss": 0.0243, "step": 8310 }, { "epoch": 0.07025395284034536, "grad_norm": 1.6278222799301147, "learning_rate": 3.5126234906695943e-06, "loss": 0.0378, "step": 8320 }, { "epoch": 0.07033839268750923, "grad_norm": 1.2129969596862793, "learning_rate": 3.5168453939035717e-06, "loss": 0.0457, "step": 8330 }, { "epoch": 0.07042283253467312, "grad_norm": 1.5563982725143433, "learning_rate": 3.52106729713755e-06, "loss": 0.0293, "step": 8340 }, { "epoch": 0.07050727238183699, "grad_norm": 1.4163262844085693, "learning_rate": 3.525289200371528e-06, "loss": 0.0445, "step": 8350 }, { "epoch": 0.07059171222900086, "grad_norm": 1.428666353225708, "learning_rate": 3.5295111036055057e-06, "loss": 0.0382, "step": 8360 }, { "epoch": 0.07067615207616475, "grad_norm": 1.5389633178710938, "learning_rate": 3.5337330068394836e-06, "loss": 0.0309, "step": 8370 }, { "epoch": 0.07076059192332862, "grad_norm": 1.4238532781600952, "learning_rate": 3.537954910073461e-06, "loss": 0.0408, "step": 8380 }, { "epoch": 0.07084503177049249, "grad_norm": 2.216996192932129, "learning_rate": 3.5421768133074393e-06, "loss": 0.0478, "step": 8390 }, { "epoch": 0.07092947161765638, "grad_norm": 2.3920347690582275, "learning_rate": 3.546398716541417e-06, "loss": 0.047, "step": 8400 }, { "epoch": 0.07101391146482025, "grad_norm": 1.154330849647522, "learning_rate": 3.550620619775395e-06, "loss": 0.0337, "step": 8410 }, { "epoch": 0.07109835131198412, "grad_norm": 1.26197350025177, "learning_rate": 3.554842523009373e-06, "loss": 0.0332, "step": 8420 }, { "epoch": 0.071182791159148, "grad_norm": 1.2631621360778809, "learning_rate": 3.559064426243351e-06, "loss": 0.041, "step": 8430 }, { "epoch": 0.07126723100631188, "grad_norm": 1.1341042518615723, "learning_rate": 3.5632863294773286e-06, "loss": 0.029, "step": 8440 }, { "epoch": 0.07135167085347575, "grad_norm": 0.9169212579727173, "learning_rate": 3.5675082327113064e-06, "loss": 0.0415, "step": 8450 }, { "epoch": 0.07143611070063963, "grad_norm": 1.1352174282073975, "learning_rate": 3.5717301359452843e-06, "loss": 0.0442, "step": 8460 }, { "epoch": 0.07152055054780351, "grad_norm": 1.3061712980270386, "learning_rate": 3.575952039179262e-06, "loss": 0.0378, "step": 8470 }, { "epoch": 0.07160499039496739, "grad_norm": 0.9389786124229431, "learning_rate": 3.5801739424132404e-06, "loss": 0.0293, "step": 8480 }, { "epoch": 0.07168943024213126, "grad_norm": 1.1143182516098022, "learning_rate": 3.5843958456472183e-06, "loss": 0.051, "step": 8490 }, { "epoch": 0.07177387008929514, "grad_norm": 1.34580659866333, "learning_rate": 3.5886177488811957e-06, "loss": 0.0357, "step": 8500 }, { "epoch": 0.07185830993645902, "grad_norm": 2.150942802429199, "learning_rate": 3.5928396521151736e-06, "loss": 0.033, "step": 8510 }, { "epoch": 0.07194274978362289, "grad_norm": 2.2490921020507812, "learning_rate": 3.597061555349152e-06, "loss": 0.0343, "step": 8520 }, { "epoch": 0.07202718963078676, "grad_norm": 1.1204098463058472, "learning_rate": 3.6012834585831297e-06, "loss": 0.0372, "step": 8530 }, { "epoch": 0.07211162947795065, "grad_norm": 0.7761765122413635, "learning_rate": 3.6055053618171076e-06, "loss": 0.0274, "step": 8540 }, { "epoch": 0.07219606932511452, "grad_norm": 1.6708389520645142, "learning_rate": 3.609727265051085e-06, "loss": 0.0443, "step": 8550 }, { "epoch": 0.07228050917227839, "grad_norm": 1.0692102909088135, "learning_rate": 3.613949168285063e-06, "loss": 0.0306, "step": 8560 }, { "epoch": 0.07236494901944228, "grad_norm": 0.5421239137649536, "learning_rate": 3.618171071519041e-06, "loss": 0.0322, "step": 8570 }, { "epoch": 0.07244938886660615, "grad_norm": 1.5924113988876343, "learning_rate": 3.622392974753019e-06, "loss": 0.037, "step": 8580 }, { "epoch": 0.07253382871377002, "grad_norm": 2.421722650527954, "learning_rate": 3.626614877986997e-06, "loss": 0.0388, "step": 8590 }, { "epoch": 0.07261826856093391, "grad_norm": 0.935229480266571, "learning_rate": 3.6308367812209747e-06, "loss": 0.0195, "step": 8600 }, { "epoch": 0.07270270840809778, "grad_norm": 1.5243797302246094, "learning_rate": 3.635058684454952e-06, "loss": 0.0305, "step": 8610 }, { "epoch": 0.07278714825526165, "grad_norm": 1.18341064453125, "learning_rate": 3.6392805876889305e-06, "loss": 0.0327, "step": 8620 }, { "epoch": 0.07287158810242554, "grad_norm": 1.1582528352737427, "learning_rate": 3.6435024909229083e-06, "loss": 0.0316, "step": 8630 }, { "epoch": 0.07295602794958941, "grad_norm": 0.8882296085357666, "learning_rate": 3.647724394156886e-06, "loss": 0.0239, "step": 8640 }, { "epoch": 0.07304046779675329, "grad_norm": 1.897577166557312, "learning_rate": 3.6519462973908645e-06, "loss": 0.0362, "step": 8650 }, { "epoch": 0.07312490764391716, "grad_norm": 0.8898911476135254, "learning_rate": 3.6561682006248423e-06, "loss": 0.0451, "step": 8660 }, { "epoch": 0.07320934749108104, "grad_norm": 2.0025877952575684, "learning_rate": 3.6603901038588198e-06, "loss": 0.028, "step": 8670 }, { "epoch": 0.07329378733824492, "grad_norm": 1.5284417867660522, "learning_rate": 3.6646120070927976e-06, "loss": 0.0312, "step": 8680 }, { "epoch": 0.07337822718540879, "grad_norm": 1.4185892343521118, "learning_rate": 3.6688339103267755e-06, "loss": 0.0283, "step": 8690 }, { "epoch": 0.07346266703257268, "grad_norm": 0.8876581788063049, "learning_rate": 3.6730558135607538e-06, "loss": 0.0402, "step": 8700 }, { "epoch": 0.07354710687973655, "grad_norm": 1.241692066192627, "learning_rate": 3.6772777167947316e-06, "loss": 0.0278, "step": 8710 }, { "epoch": 0.07363154672690042, "grad_norm": 1.946045160293579, "learning_rate": 3.681499620028709e-06, "loss": 0.0377, "step": 8720 }, { "epoch": 0.07371598657406431, "grad_norm": 0.5367090106010437, "learning_rate": 3.685721523262687e-06, "loss": 0.0335, "step": 8730 }, { "epoch": 0.07380042642122818, "grad_norm": 1.2588815689086914, "learning_rate": 3.6899434264966648e-06, "loss": 0.0347, "step": 8740 }, { "epoch": 0.07388486626839205, "grad_norm": 0.8179640173912048, "learning_rate": 3.694165329730643e-06, "loss": 0.042, "step": 8750 }, { "epoch": 0.07396930611555592, "grad_norm": 1.4670065641403198, "learning_rate": 3.698387232964621e-06, "loss": 0.0329, "step": 8760 }, { "epoch": 0.07405374596271981, "grad_norm": 1.137587308883667, "learning_rate": 3.7026091361985988e-06, "loss": 0.0444, "step": 8770 }, { "epoch": 0.07413818580988368, "grad_norm": 1.0297147035598755, "learning_rate": 3.706831039432576e-06, "loss": 0.0305, "step": 8780 }, { "epoch": 0.07422262565704756, "grad_norm": 2.706773042678833, "learning_rate": 3.711052942666554e-06, "loss": 0.0371, "step": 8790 }, { "epoch": 0.07430706550421144, "grad_norm": 0.871928334236145, "learning_rate": 3.7152748459005323e-06, "loss": 0.0365, "step": 8800 }, { "epoch": 0.07439150535137531, "grad_norm": 1.5934629440307617, "learning_rate": 3.71949674913451e-06, "loss": 0.0274, "step": 8810 }, { "epoch": 0.07447594519853919, "grad_norm": 1.057687759399414, "learning_rate": 3.723718652368488e-06, "loss": 0.0311, "step": 8820 }, { "epoch": 0.07456038504570307, "grad_norm": 0.83939528465271, "learning_rate": 3.7279405556024663e-06, "loss": 0.0232, "step": 8830 }, { "epoch": 0.07464482489286695, "grad_norm": 1.2693523168563843, "learning_rate": 3.7321624588364438e-06, "loss": 0.0275, "step": 8840 }, { "epoch": 0.07472926474003082, "grad_norm": 0.8901336193084717, "learning_rate": 3.7363843620704216e-06, "loss": 0.0385, "step": 8850 }, { "epoch": 0.07481370458719469, "grad_norm": 0.7834444642066956, "learning_rate": 3.7406062653043995e-06, "loss": 0.0252, "step": 8860 }, { "epoch": 0.07489814443435858, "grad_norm": 0.9209738373756409, "learning_rate": 3.7448281685383773e-06, "loss": 0.0362, "step": 8870 }, { "epoch": 0.07498258428152245, "grad_norm": 0.6541457176208496, "learning_rate": 3.7490500717723556e-06, "loss": 0.0295, "step": 8880 }, { "epoch": 0.07506702412868632, "grad_norm": 2.451007604598999, "learning_rate": 3.753271975006333e-06, "loss": 0.0323, "step": 8890 }, { "epoch": 0.07515146397585021, "grad_norm": 1.9088870286941528, "learning_rate": 3.757493878240311e-06, "loss": 0.0335, "step": 8900 }, { "epoch": 0.07523590382301408, "grad_norm": 0.7427630424499512, "learning_rate": 3.7617157814742888e-06, "loss": 0.0473, "step": 8910 }, { "epoch": 0.07532034367017795, "grad_norm": 1.620606541633606, "learning_rate": 3.7659376847082666e-06, "loss": 0.0423, "step": 8920 }, { "epoch": 0.07540478351734184, "grad_norm": 2.4617316722869873, "learning_rate": 3.770159587942245e-06, "loss": 0.0334, "step": 8930 }, { "epoch": 0.07548922336450571, "grad_norm": 0.2270221710205078, "learning_rate": 3.7743814911762228e-06, "loss": 0.028, "step": 8940 }, { "epoch": 0.07557366321166958, "grad_norm": 1.787018895149231, "learning_rate": 3.7786033944102002e-06, "loss": 0.0673, "step": 8950 }, { "epoch": 0.07565810305883347, "grad_norm": 1.1476157903671265, "learning_rate": 3.782825297644178e-06, "loss": 0.0401, "step": 8960 }, { "epoch": 0.07574254290599734, "grad_norm": 0.6368725299835205, "learning_rate": 3.787047200878156e-06, "loss": 0.0283, "step": 8970 }, { "epoch": 0.07582698275316122, "grad_norm": 1.02668297290802, "learning_rate": 3.791269104112134e-06, "loss": 0.0275, "step": 8980 }, { "epoch": 0.07591142260032509, "grad_norm": 1.4313726425170898, "learning_rate": 3.795491007346112e-06, "loss": 0.0311, "step": 8990 }, { "epoch": 0.07599586244748897, "grad_norm": 1.5186009407043457, "learning_rate": 3.79971291058009e-06, "loss": 0.0305, "step": 9000 }, { "epoch": 0.07608030229465285, "grad_norm": 0.7805594205856323, "learning_rate": 3.8039348138140674e-06, "loss": 0.0235, "step": 9010 }, { "epoch": 0.07616474214181672, "grad_norm": 0.7905440330505371, "learning_rate": 3.8081567170480456e-06, "loss": 0.0317, "step": 9020 }, { "epoch": 0.0762491819889806, "grad_norm": 1.6967308521270752, "learning_rate": 3.8123786202820235e-06, "loss": 0.0413, "step": 9030 }, { "epoch": 0.07633362183614448, "grad_norm": 1.6523903608322144, "learning_rate": 3.816600523516002e-06, "loss": 0.0283, "step": 9040 }, { "epoch": 0.07641806168330835, "grad_norm": 0.9541444778442383, "learning_rate": 3.82082242674998e-06, "loss": 0.0277, "step": 9050 }, { "epoch": 0.07650250153047224, "grad_norm": 0.6740541458129883, "learning_rate": 3.825044329983957e-06, "loss": 0.0306, "step": 9060 }, { "epoch": 0.07658694137763611, "grad_norm": 1.2929350137710571, "learning_rate": 3.8292662332179345e-06, "loss": 0.0393, "step": 9070 }, { "epoch": 0.07667138122479998, "grad_norm": 1.8424711227416992, "learning_rate": 3.833488136451912e-06, "loss": 0.026, "step": 9080 }, { "epoch": 0.07675582107196385, "grad_norm": 1.7258563041687012, "learning_rate": 3.837710039685891e-06, "loss": 0.0432, "step": 9090 }, { "epoch": 0.07684026091912774, "grad_norm": 0.9918792843818665, "learning_rate": 3.841931942919869e-06, "loss": 0.032, "step": 9100 }, { "epoch": 0.07692470076629161, "grad_norm": 2.1538615226745605, "learning_rate": 3.846153846153847e-06, "loss": 0.0351, "step": 9110 }, { "epoch": 0.07700914061345548, "grad_norm": 1.01333487033844, "learning_rate": 3.850375749387824e-06, "loss": 0.0328, "step": 9120 }, { "epoch": 0.07709358046061937, "grad_norm": 1.7364563941955566, "learning_rate": 3.854597652621802e-06, "loss": 0.0315, "step": 9130 }, { "epoch": 0.07717802030778324, "grad_norm": 1.2614015340805054, "learning_rate": 3.85881955585578e-06, "loss": 0.0348, "step": 9140 }, { "epoch": 0.07726246015494712, "grad_norm": 1.2331262826919556, "learning_rate": 3.863041459089758e-06, "loss": 0.0313, "step": 9150 }, { "epoch": 0.077346900002111, "grad_norm": 0.7155181765556335, "learning_rate": 3.867263362323736e-06, "loss": 0.026, "step": 9160 }, { "epoch": 0.07743133984927487, "grad_norm": 0.9003651142120361, "learning_rate": 3.871485265557714e-06, "loss": 0.0319, "step": 9170 }, { "epoch": 0.07751577969643875, "grad_norm": 0.5118963718414307, "learning_rate": 3.875707168791692e-06, "loss": 0.0177, "step": 9180 }, { "epoch": 0.07760021954360262, "grad_norm": 0.6280134916305542, "learning_rate": 3.87992907202567e-06, "loss": 0.0274, "step": 9190 }, { "epoch": 0.0776846593907665, "grad_norm": 0.8695781826972961, "learning_rate": 3.8841509752596475e-06, "loss": 0.0353, "step": 9200 }, { "epoch": 0.07776909923793038, "grad_norm": 1.8359739780426025, "learning_rate": 3.888372878493625e-06, "loss": 0.0264, "step": 9210 }, { "epoch": 0.07785353908509425, "grad_norm": 1.0486769676208496, "learning_rate": 3.892594781727603e-06, "loss": 0.0328, "step": 9220 }, { "epoch": 0.07793797893225814, "grad_norm": 1.391336441040039, "learning_rate": 3.896816684961581e-06, "loss": 0.035, "step": 9230 }, { "epoch": 0.07802241877942201, "grad_norm": 1.0788627862930298, "learning_rate": 3.901038588195559e-06, "loss": 0.0338, "step": 9240 }, { "epoch": 0.07810685862658588, "grad_norm": 0.7602897882461548, "learning_rate": 3.905260491429537e-06, "loss": 0.0265, "step": 9250 }, { "epoch": 0.07819129847374977, "grad_norm": 1.5634723901748657, "learning_rate": 3.909482394663515e-06, "loss": 0.0326, "step": 9260 }, { "epoch": 0.07827573832091364, "grad_norm": 1.5248656272888184, "learning_rate": 3.9137042978974925e-06, "loss": 0.0292, "step": 9270 }, { "epoch": 0.07836017816807751, "grad_norm": 2.163184881210327, "learning_rate": 3.91792620113147e-06, "loss": 0.027, "step": 9280 }, { "epoch": 0.0784446180152414, "grad_norm": 1.5659383535385132, "learning_rate": 3.922148104365448e-06, "loss": 0.0221, "step": 9290 }, { "epoch": 0.07852905786240527, "grad_norm": 0.92411869764328, "learning_rate": 3.926370007599426e-06, "loss": 0.0349, "step": 9300 }, { "epoch": 0.07861349770956914, "grad_norm": 1.2900539636611938, "learning_rate": 3.930591910833404e-06, "loss": 0.0341, "step": 9310 }, { "epoch": 0.07869793755673302, "grad_norm": 1.0460326671600342, "learning_rate": 3.934813814067382e-06, "loss": 0.0384, "step": 9320 }, { "epoch": 0.0787823774038969, "grad_norm": 1.2649474143981934, "learning_rate": 3.93903571730136e-06, "loss": 0.021, "step": 9330 }, { "epoch": 0.07886681725106078, "grad_norm": 1.157718300819397, "learning_rate": 3.9432576205353375e-06, "loss": 0.0441, "step": 9340 }, { "epoch": 0.07895125709822465, "grad_norm": 1.8719513416290283, "learning_rate": 3.947479523769315e-06, "loss": 0.0391, "step": 9350 }, { "epoch": 0.07903569694538853, "grad_norm": 0.7655966281890869, "learning_rate": 3.951701427003293e-06, "loss": 0.0321, "step": 9360 }, { "epoch": 0.0791201367925524, "grad_norm": 0.8140991926193237, "learning_rate": 3.955923330237271e-06, "loss": 0.0326, "step": 9370 }, { "epoch": 0.07920457663971628, "grad_norm": 1.3929857015609741, "learning_rate": 3.960145233471249e-06, "loss": 0.0244, "step": 9380 }, { "epoch": 0.07928901648688017, "grad_norm": 1.7076337337493896, "learning_rate": 3.964367136705227e-06, "loss": 0.0401, "step": 9390 }, { "epoch": 0.07937345633404404, "grad_norm": 1.0542619228363037, "learning_rate": 3.9685890399392055e-06, "loss": 0.0405, "step": 9400 }, { "epoch": 0.07945789618120791, "grad_norm": 1.1655197143554688, "learning_rate": 3.9728109431731825e-06, "loss": 0.0429, "step": 9410 }, { "epoch": 0.07954233602837178, "grad_norm": 0.8588681221008301, "learning_rate": 3.97703284640716e-06, "loss": 0.0307, "step": 9420 }, { "epoch": 0.07962677587553567, "grad_norm": 1.3781098127365112, "learning_rate": 3.981254749641138e-06, "loss": 0.0347, "step": 9430 }, { "epoch": 0.07971121572269954, "grad_norm": 1.5042544603347778, "learning_rate": 3.985476652875116e-06, "loss": 0.0484, "step": 9440 }, { "epoch": 0.07979565556986341, "grad_norm": 1.1244189739227295, "learning_rate": 3.989698556109095e-06, "loss": 0.0355, "step": 9450 }, { "epoch": 0.0798800954170273, "grad_norm": 0.3910805583000183, "learning_rate": 3.993920459343072e-06, "loss": 0.0146, "step": 9460 }, { "epoch": 0.07996453526419117, "grad_norm": 0.6644776463508606, "learning_rate": 3.99814236257705e-06, "loss": 0.0258, "step": 9470 }, { "epoch": 0.08004897511135504, "grad_norm": 1.429508924484253, "learning_rate": 4.0023642658110276e-06, "loss": 0.0385, "step": 9480 }, { "epoch": 0.08013341495851893, "grad_norm": 2.4141499996185303, "learning_rate": 4.006586169045006e-06, "loss": 0.0384, "step": 9490 }, { "epoch": 0.0802178548056828, "grad_norm": 1.182749629020691, "learning_rate": 4.010808072278984e-06, "loss": 0.0256, "step": 9500 }, { "epoch": 0.08030229465284668, "grad_norm": 1.2712204456329346, "learning_rate": 4.015029975512962e-06, "loss": 0.0434, "step": 9510 }, { "epoch": 0.08038673450001055, "grad_norm": 1.7979804277420044, "learning_rate": 4.019251878746939e-06, "loss": 0.0255, "step": 9520 }, { "epoch": 0.08047117434717443, "grad_norm": 1.4779351949691772, "learning_rate": 4.023473781980917e-06, "loss": 0.028, "step": 9530 }, { "epoch": 0.08055561419433831, "grad_norm": 0.8053571581840515, "learning_rate": 4.0276956852148956e-06, "loss": 0.027, "step": 9540 }, { "epoch": 0.08064005404150218, "grad_norm": 0.7053826451301575, "learning_rate": 4.031917588448873e-06, "loss": 0.0303, "step": 9550 }, { "epoch": 0.08072449388866607, "grad_norm": 1.1467522382736206, "learning_rate": 4.036139491682851e-06, "loss": 0.0312, "step": 9560 }, { "epoch": 0.08080893373582994, "grad_norm": 0.6039867401123047, "learning_rate": 4.040361394916829e-06, "loss": 0.0307, "step": 9570 }, { "epoch": 0.08089337358299381, "grad_norm": 1.9366611242294312, "learning_rate": 4.044583298150806e-06, "loss": 0.0284, "step": 9580 }, { "epoch": 0.0809778134301577, "grad_norm": 1.3453168869018555, "learning_rate": 4.048805201384785e-06, "loss": 0.0355, "step": 9590 }, { "epoch": 0.08106225327732157, "grad_norm": 1.0266505479812622, "learning_rate": 4.053027104618763e-06, "loss": 0.0264, "step": 9600 }, { "epoch": 0.08114669312448544, "grad_norm": 1.6336406469345093, "learning_rate": 4.0572490078527406e-06, "loss": 0.0329, "step": 9610 }, { "epoch": 0.08123113297164933, "grad_norm": 0.9103832244873047, "learning_rate": 4.061470911086718e-06, "loss": 0.0386, "step": 9620 }, { "epoch": 0.0813155728188132, "grad_norm": 1.8317092657089233, "learning_rate": 4.0656928143206954e-06, "loss": 0.0394, "step": 9630 }, { "epoch": 0.08140001266597707, "grad_norm": 0.4586988687515259, "learning_rate": 4.069914717554674e-06, "loss": 0.0415, "step": 9640 }, { "epoch": 0.08148445251314095, "grad_norm": 0.9703102111816406, "learning_rate": 4.074136620788652e-06, "loss": 0.0363, "step": 9650 }, { "epoch": 0.08156889236030483, "grad_norm": 0.5389079451560974, "learning_rate": 4.07835852402263e-06, "loss": 0.0295, "step": 9660 }, { "epoch": 0.0816533322074687, "grad_norm": 0.422519326210022, "learning_rate": 4.082580427256608e-06, "loss": 0.0337, "step": 9670 }, { "epoch": 0.08173777205463258, "grad_norm": 2.5252487659454346, "learning_rate": 4.0868023304905856e-06, "loss": 0.0246, "step": 9680 }, { "epoch": 0.08182221190179646, "grad_norm": 1.340192198753357, "learning_rate": 4.0910242337245634e-06, "loss": 0.0331, "step": 9690 }, { "epoch": 0.08190665174896034, "grad_norm": 1.420854091644287, "learning_rate": 4.095246136958541e-06, "loss": 0.019, "step": 9700 }, { "epoch": 0.08199109159612421, "grad_norm": 0.9645056128501892, "learning_rate": 4.099468040192519e-06, "loss": 0.0246, "step": 9710 }, { "epoch": 0.0820755314432881, "grad_norm": 1.1500180959701538, "learning_rate": 4.103689943426497e-06, "loss": 0.0362, "step": 9720 }, { "epoch": 0.08215997129045197, "grad_norm": 1.9446067810058594, "learning_rate": 4.107911846660475e-06, "loss": 0.03, "step": 9730 }, { "epoch": 0.08224441113761584, "grad_norm": 1.825728416442871, "learning_rate": 4.112133749894453e-06, "loss": 0.0689, "step": 9740 }, { "epoch": 0.08232885098477971, "grad_norm": 1.6028759479522705, "learning_rate": 4.116355653128431e-06, "loss": 0.0254, "step": 9750 }, { "epoch": 0.0824132908319436, "grad_norm": 0.46126121282577515, "learning_rate": 4.1205775563624084e-06, "loss": 0.029, "step": 9760 }, { "epoch": 0.08249773067910747, "grad_norm": 0.9906089305877686, "learning_rate": 4.124799459596386e-06, "loss": 0.0317, "step": 9770 }, { "epoch": 0.08258217052627134, "grad_norm": 1.119531512260437, "learning_rate": 4.129021362830364e-06, "loss": 0.0482, "step": 9780 }, { "epoch": 0.08266661037343523, "grad_norm": 0.6701840758323669, "learning_rate": 4.133243266064342e-06, "loss": 0.0262, "step": 9790 }, { "epoch": 0.0827510502205991, "grad_norm": 2.2376348972320557, "learning_rate": 4.13746516929832e-06, "loss": 0.0393, "step": 9800 }, { "epoch": 0.08283549006776297, "grad_norm": 1.896157145500183, "learning_rate": 4.141687072532298e-06, "loss": 0.0383, "step": 9810 }, { "epoch": 0.08291992991492686, "grad_norm": 0.9660143852233887, "learning_rate": 4.145908975766276e-06, "loss": 0.0497, "step": 9820 }, { "epoch": 0.08300436976209073, "grad_norm": 0.8111022710800171, "learning_rate": 4.1501308790002534e-06, "loss": 0.0453, "step": 9830 }, { "epoch": 0.0830888096092546, "grad_norm": 1.2365968227386475, "learning_rate": 4.154352782234231e-06, "loss": 0.03, "step": 9840 }, { "epoch": 0.08317324945641848, "grad_norm": 1.4215306043624878, "learning_rate": 4.15857468546821e-06, "loss": 0.028, "step": 9850 }, { "epoch": 0.08325768930358236, "grad_norm": 1.199766993522644, "learning_rate": 4.162796588702187e-06, "loss": 0.0477, "step": 9860 }, { "epoch": 0.08334212915074624, "grad_norm": 0.7885210514068604, "learning_rate": 4.167018491936165e-06, "loss": 0.0376, "step": 9870 }, { "epoch": 0.08342656899791011, "grad_norm": 1.4037879705429077, "learning_rate": 4.171240395170143e-06, "loss": 0.0308, "step": 9880 }, { "epoch": 0.083511008845074, "grad_norm": 0.8547550439834595, "learning_rate": 4.175462298404121e-06, "loss": 0.0261, "step": 9890 }, { "epoch": 0.08359544869223787, "grad_norm": 1.0998724699020386, "learning_rate": 4.179684201638099e-06, "loss": 0.037, "step": 9900 }, { "epoch": 0.08367988853940174, "grad_norm": 1.2577742338180542, "learning_rate": 4.183906104872077e-06, "loss": 0.0311, "step": 9910 }, { "epoch": 0.08376432838656563, "grad_norm": 1.3292673826217651, "learning_rate": 4.188128008106054e-06, "loss": 0.0196, "step": 9920 }, { "epoch": 0.0838487682337295, "grad_norm": 1.1275172233581543, "learning_rate": 4.192349911340032e-06, "loss": 0.0288, "step": 9930 }, { "epoch": 0.08393320808089337, "grad_norm": 0.9156857132911682, "learning_rate": 4.19657181457401e-06, "loss": 0.031, "step": 9940 }, { "epoch": 0.08401764792805726, "grad_norm": 0.20626384019851685, "learning_rate": 4.200793717807989e-06, "loss": 0.0308, "step": 9950 }, { "epoch": 0.08410208777522113, "grad_norm": 0.9342350363731384, "learning_rate": 4.2050156210419665e-06, "loss": 0.0241, "step": 9960 }, { "epoch": 0.084186527622385, "grad_norm": 1.00840425491333, "learning_rate": 4.2092375242759435e-06, "loss": 0.0298, "step": 9970 }, { "epoch": 0.08427096746954887, "grad_norm": 1.2112725973129272, "learning_rate": 4.213459427509921e-06, "loss": 0.0315, "step": 9980 }, { "epoch": 0.08435540731671276, "grad_norm": 0.8788563013076782, "learning_rate": 4.2176813307439e-06, "loss": 0.0319, "step": 9990 }, { "epoch": 0.08443984716387663, "grad_norm": 4.346916198730469, "learning_rate": 4.221903233977878e-06, "loss": 0.0196, "step": 10000 }, { "epoch": 0.0845242870110405, "grad_norm": 0.7615471482276917, "learning_rate": 4.226125137211856e-06, "loss": 0.0321, "step": 10010 }, { "epoch": 0.08460872685820439, "grad_norm": 2.5091872215270996, "learning_rate": 4.230347040445834e-06, "loss": 0.0179, "step": 10020 }, { "epoch": 0.08469316670536826, "grad_norm": 1.359775185585022, "learning_rate": 4.234568943679811e-06, "loss": 0.0431, "step": 10030 }, { "epoch": 0.08477760655253214, "grad_norm": 0.7384734749794006, "learning_rate": 4.238790846913789e-06, "loss": 0.0335, "step": 10040 }, { "epoch": 0.08486204639969602, "grad_norm": 0.643878698348999, "learning_rate": 4.243012750147767e-06, "loss": 0.0272, "step": 10050 }, { "epoch": 0.0849464862468599, "grad_norm": 0.6757429838180542, "learning_rate": 4.247234653381745e-06, "loss": 0.0313, "step": 10060 }, { "epoch": 0.08503092609402377, "grad_norm": 0.889263391494751, "learning_rate": 4.251456556615723e-06, "loss": 0.0284, "step": 10070 }, { "epoch": 0.08511536594118764, "grad_norm": 1.3034485578536987, "learning_rate": 4.255678459849701e-06, "loss": 0.0408, "step": 10080 }, { "epoch": 0.08519980578835153, "grad_norm": 0.9900567531585693, "learning_rate": 4.259900363083679e-06, "loss": 0.03, "step": 10090 }, { "epoch": 0.0852842456355154, "grad_norm": 1.0319929122924805, "learning_rate": 4.2641222663176565e-06, "loss": 0.0322, "step": 10100 }, { "epoch": 0.08536868548267927, "grad_norm": 1.8282514810562134, "learning_rate": 4.268344169551634e-06, "loss": 0.0371, "step": 10110 }, { "epoch": 0.08545312532984316, "grad_norm": 1.0180093050003052, "learning_rate": 4.272566072785612e-06, "loss": 0.0302, "step": 10120 }, { "epoch": 0.08553756517700703, "grad_norm": 1.2075330018997192, "learning_rate": 4.27678797601959e-06, "loss": 0.0302, "step": 10130 }, { "epoch": 0.0856220050241709, "grad_norm": 1.107509970664978, "learning_rate": 4.281009879253568e-06, "loss": 0.0321, "step": 10140 }, { "epoch": 0.08570644487133479, "grad_norm": 1.3769199848175049, "learning_rate": 4.285231782487546e-06, "loss": 0.0267, "step": 10150 }, { "epoch": 0.08579088471849866, "grad_norm": 1.0291893482208252, "learning_rate": 4.289453685721524e-06, "loss": 0.0341, "step": 10160 }, { "epoch": 0.08587532456566253, "grad_norm": 1.6488666534423828, "learning_rate": 4.2936755889555015e-06, "loss": 0.0407, "step": 10170 }, { "epoch": 0.0859597644128264, "grad_norm": 1.0591243505477905, "learning_rate": 4.297897492189479e-06, "loss": 0.0328, "step": 10180 }, { "epoch": 0.08604420425999029, "grad_norm": 1.2053359746932983, "learning_rate": 4.302119395423457e-06, "loss": 0.0401, "step": 10190 }, { "epoch": 0.08612864410715416, "grad_norm": 1.061281681060791, "learning_rate": 4.306341298657435e-06, "loss": 0.0252, "step": 10200 }, { "epoch": 0.08621308395431804, "grad_norm": 0.7290518283843994, "learning_rate": 4.310563201891413e-06, "loss": 0.0211, "step": 10210 }, { "epoch": 0.08629752380148192, "grad_norm": 1.2211976051330566, "learning_rate": 4.314785105125391e-06, "loss": 0.0403, "step": 10220 }, { "epoch": 0.0863819636486458, "grad_norm": 2.076493740081787, "learning_rate": 4.319007008359369e-06, "loss": 0.0519, "step": 10230 }, { "epoch": 0.08646640349580967, "grad_norm": 0.6931390166282654, "learning_rate": 4.3232289115933465e-06, "loss": 0.0365, "step": 10240 }, { "epoch": 0.08655084334297355, "grad_norm": 0.7801313996315002, "learning_rate": 4.327450814827324e-06, "loss": 0.0439, "step": 10250 }, { "epoch": 0.08663528319013743, "grad_norm": 1.1864776611328125, "learning_rate": 4.331672718061302e-06, "loss": 0.0259, "step": 10260 }, { "epoch": 0.0867197230373013, "grad_norm": 1.3120883703231812, "learning_rate": 4.33589462129528e-06, "loss": 0.0395, "step": 10270 }, { "epoch": 0.08680416288446519, "grad_norm": 1.0833054780960083, "learning_rate": 4.340116524529258e-06, "loss": 0.0241, "step": 10280 }, { "epoch": 0.08688860273162906, "grad_norm": 1.76881742477417, "learning_rate": 4.344338427763236e-06, "loss": 0.0259, "step": 10290 }, { "epoch": 0.08697304257879293, "grad_norm": 1.3271806240081787, "learning_rate": 4.3485603309972145e-06, "loss": 0.0247, "step": 10300 }, { "epoch": 0.0870574824259568, "grad_norm": 1.0792794227600098, "learning_rate": 4.3527822342311915e-06, "loss": 0.0344, "step": 10310 }, { "epoch": 0.08714192227312069, "grad_norm": 0.9667630791664124, "learning_rate": 4.357004137465169e-06, "loss": 0.023, "step": 10320 }, { "epoch": 0.08722636212028456, "grad_norm": 1.847463607788086, "learning_rate": 4.361226040699147e-06, "loss": 0.0363, "step": 10330 }, { "epoch": 0.08731080196744843, "grad_norm": 1.6090646982192993, "learning_rate": 4.365447943933125e-06, "loss": 0.0395, "step": 10340 }, { "epoch": 0.08739524181461232, "grad_norm": 0.9305841326713562, "learning_rate": 4.369669847167104e-06, "loss": 0.0513, "step": 10350 }, { "epoch": 0.08747968166177619, "grad_norm": 0.6380230188369751, "learning_rate": 4.373891750401082e-06, "loss": 0.0222, "step": 10360 }, { "epoch": 0.08756412150894007, "grad_norm": 1.385947585105896, "learning_rate": 4.378113653635059e-06, "loss": 0.0281, "step": 10370 }, { "epoch": 0.08764856135610395, "grad_norm": 1.586627721786499, "learning_rate": 4.3823355568690365e-06, "loss": 0.0326, "step": 10380 }, { "epoch": 0.08773300120326782, "grad_norm": 0.9166606068611145, "learning_rate": 4.386557460103014e-06, "loss": 0.0262, "step": 10390 }, { "epoch": 0.0878174410504317, "grad_norm": 0.710888683795929, "learning_rate": 4.390779363336993e-06, "loss": 0.0269, "step": 10400 }, { "epoch": 0.08790188089759557, "grad_norm": 1.4200416803359985, "learning_rate": 4.395001266570971e-06, "loss": 0.0281, "step": 10410 }, { "epoch": 0.08798632074475946, "grad_norm": 0.568482518196106, "learning_rate": 4.399223169804949e-06, "loss": 0.0359, "step": 10420 }, { "epoch": 0.08807076059192333, "grad_norm": 0.9458418488502502, "learning_rate": 4.403445073038926e-06, "loss": 0.0307, "step": 10430 }, { "epoch": 0.0881552004390872, "grad_norm": 0.8332871198654175, "learning_rate": 4.407666976272904e-06, "loss": 0.0229, "step": 10440 }, { "epoch": 0.08823964028625109, "grad_norm": 0.4158794581890106, "learning_rate": 4.411888879506882e-06, "loss": 0.025, "step": 10450 }, { "epoch": 0.08832408013341496, "grad_norm": 1.010883092880249, "learning_rate": 4.41611078274086e-06, "loss": 0.025, "step": 10460 }, { "epoch": 0.08840851998057883, "grad_norm": 0.9274232387542725, "learning_rate": 4.420332685974838e-06, "loss": 0.026, "step": 10470 }, { "epoch": 0.08849295982774272, "grad_norm": 0.7487928867340088, "learning_rate": 4.424554589208815e-06, "loss": 0.0196, "step": 10480 }, { "epoch": 0.08857739967490659, "grad_norm": 0.8249865174293518, "learning_rate": 4.428776492442794e-06, "loss": 0.0421, "step": 10490 }, { "epoch": 0.08866183952207046, "grad_norm": 1.2269014120101929, "learning_rate": 4.432998395676772e-06, "loss": 0.022, "step": 10500 }, { "epoch": 0.08874627936923433, "grad_norm": 1.1112946271896362, "learning_rate": 4.4372202989107495e-06, "loss": 0.0329, "step": 10510 }, { "epoch": 0.08883071921639822, "grad_norm": 0.8289046287536621, "learning_rate": 4.441442202144727e-06, "loss": 0.0292, "step": 10520 }, { "epoch": 0.0889151590635621, "grad_norm": 1.0705268383026123, "learning_rate": 4.445664105378705e-06, "loss": 0.0297, "step": 10530 }, { "epoch": 0.08899959891072597, "grad_norm": 0.7257004976272583, "learning_rate": 4.449886008612683e-06, "loss": 0.0339, "step": 10540 }, { "epoch": 0.08908403875788985, "grad_norm": 0.9540268182754517, "learning_rate": 4.454107911846661e-06, "loss": 0.0392, "step": 10550 }, { "epoch": 0.08916847860505372, "grad_norm": 1.6594294309616089, "learning_rate": 4.458329815080639e-06, "loss": 0.0328, "step": 10560 }, { "epoch": 0.0892529184522176, "grad_norm": 1.0142652988433838, "learning_rate": 4.462551718314617e-06, "loss": 0.0277, "step": 10570 }, { "epoch": 0.08933735829938148, "grad_norm": 0.7268463373184204, "learning_rate": 4.4667736215485945e-06, "loss": 0.0288, "step": 10580 }, { "epoch": 0.08942179814654536, "grad_norm": 1.6687254905700684, "learning_rate": 4.470995524782572e-06, "loss": 0.0452, "step": 10590 }, { "epoch": 0.08950623799370923, "grad_norm": 1.1632682085037231, "learning_rate": 4.47521742801655e-06, "loss": 0.0273, "step": 10600 }, { "epoch": 0.08959067784087311, "grad_norm": 1.2766759395599365, "learning_rate": 4.479439331250528e-06, "loss": 0.0244, "step": 10610 }, { "epoch": 0.08967511768803699, "grad_norm": 0.8489068746566772, "learning_rate": 4.483661234484506e-06, "loss": 0.0343, "step": 10620 }, { "epoch": 0.08975955753520086, "grad_norm": 0.980724036693573, "learning_rate": 4.487883137718484e-06, "loss": 0.0354, "step": 10630 }, { "epoch": 0.08984399738236473, "grad_norm": 2.0402214527130127, "learning_rate": 4.492105040952462e-06, "loss": 0.0425, "step": 10640 }, { "epoch": 0.08992843722952862, "grad_norm": 0.9290620684623718, "learning_rate": 4.4963269441864395e-06, "loss": 0.0242, "step": 10650 }, { "epoch": 0.09001287707669249, "grad_norm": 1.289878249168396, "learning_rate": 4.500548847420417e-06, "loss": 0.0209, "step": 10660 }, { "epoch": 0.09009731692385636, "grad_norm": 2.0698328018188477, "learning_rate": 4.504770750654395e-06, "loss": 0.0417, "step": 10670 }, { "epoch": 0.09018175677102025, "grad_norm": 1.4921693801879883, "learning_rate": 4.508992653888373e-06, "loss": 0.0277, "step": 10680 }, { "epoch": 0.09026619661818412, "grad_norm": 0.8605479001998901, "learning_rate": 4.513214557122351e-06, "loss": 0.0346, "step": 10690 }, { "epoch": 0.090350636465348, "grad_norm": 1.2852251529693604, "learning_rate": 4.517436460356329e-06, "loss": 0.033, "step": 10700 }, { "epoch": 0.09043507631251188, "grad_norm": 0.9639057517051697, "learning_rate": 4.521658363590307e-06, "loss": 0.0338, "step": 10710 }, { "epoch": 0.09051951615967575, "grad_norm": 1.2124056816101074, "learning_rate": 4.5258802668242845e-06, "loss": 0.022, "step": 10720 }, { "epoch": 0.09060395600683963, "grad_norm": 1.369723916053772, "learning_rate": 4.530102170058262e-06, "loss": 0.0472, "step": 10730 }, { "epoch": 0.0906883958540035, "grad_norm": 0.8712310791015625, "learning_rate": 4.53432407329224e-06, "loss": 0.0332, "step": 10740 }, { "epoch": 0.09077283570116738, "grad_norm": 1.2175159454345703, "learning_rate": 4.538545976526218e-06, "loss": 0.0327, "step": 10750 }, { "epoch": 0.09085727554833126, "grad_norm": 1.418246865272522, "learning_rate": 4.542767879760197e-06, "loss": 0.0293, "step": 10760 }, { "epoch": 0.09094171539549513, "grad_norm": 1.116827130317688, "learning_rate": 4.546989782994174e-06, "loss": 0.0355, "step": 10770 }, { "epoch": 0.09102615524265902, "grad_norm": 0.9367241859436035, "learning_rate": 4.551211686228152e-06, "loss": 0.0344, "step": 10780 }, { "epoch": 0.09111059508982289, "grad_norm": 0.39470547437667847, "learning_rate": 4.5554335894621295e-06, "loss": 0.0187, "step": 10790 }, { "epoch": 0.09119503493698676, "grad_norm": 1.9281331300735474, "learning_rate": 4.559655492696108e-06, "loss": 0.044, "step": 10800 }, { "epoch": 0.09127947478415065, "grad_norm": 0.8667682409286499, "learning_rate": 4.563877395930086e-06, "loss": 0.0342, "step": 10810 }, { "epoch": 0.09136391463131452, "grad_norm": 0.7727762460708618, "learning_rate": 4.568099299164064e-06, "loss": 0.0382, "step": 10820 }, { "epoch": 0.09144835447847839, "grad_norm": 1.2156111001968384, "learning_rate": 4.572321202398041e-06, "loss": 0.0267, "step": 10830 }, { "epoch": 0.09153279432564226, "grad_norm": 0.7788799405097961, "learning_rate": 4.576543105632019e-06, "loss": 0.0294, "step": 10840 }, { "epoch": 0.09161723417280615, "grad_norm": 1.2835838794708252, "learning_rate": 4.5807650088659975e-06, "loss": 0.0388, "step": 10850 }, { "epoch": 0.09170167401997002, "grad_norm": 1.0006293058395386, "learning_rate": 4.584986912099975e-06, "loss": 0.036, "step": 10860 }, { "epoch": 0.0917861138671339, "grad_norm": 1.1320537328720093, "learning_rate": 4.589208815333953e-06, "loss": 0.0421, "step": 10870 }, { "epoch": 0.09187055371429778, "grad_norm": 0.18440942466259003, "learning_rate": 4.59343071856793e-06, "loss": 0.027, "step": 10880 }, { "epoch": 0.09195499356146165, "grad_norm": 0.7702540755271912, "learning_rate": 4.597652621801908e-06, "loss": 0.037, "step": 10890 }, { "epoch": 0.09203943340862553, "grad_norm": 0.6885334849357605, "learning_rate": 4.601874525035887e-06, "loss": 0.0286, "step": 10900 }, { "epoch": 0.09212387325578941, "grad_norm": 1.074554204940796, "learning_rate": 4.606096428269865e-06, "loss": 0.0534, "step": 10910 }, { "epoch": 0.09220831310295328, "grad_norm": 0.88800448179245, "learning_rate": 4.6103183315038426e-06, "loss": 0.0243, "step": 10920 }, { "epoch": 0.09229275295011716, "grad_norm": 0.9595540761947632, "learning_rate": 4.61454023473782e-06, "loss": 0.0207, "step": 10930 }, { "epoch": 0.09237719279728104, "grad_norm": 0.872346043586731, "learning_rate": 4.618762137971797e-06, "loss": 0.0256, "step": 10940 }, { "epoch": 0.09246163264444492, "grad_norm": 0.8001552224159241, "learning_rate": 4.622984041205776e-06, "loss": 0.0162, "step": 10950 }, { "epoch": 0.09254607249160879, "grad_norm": 1.6065597534179688, "learning_rate": 4.627205944439754e-06, "loss": 0.0228, "step": 10960 }, { "epoch": 0.09263051233877266, "grad_norm": 1.9184376001358032, "learning_rate": 4.631427847673732e-06, "loss": 0.0287, "step": 10970 }, { "epoch": 0.09271495218593655, "grad_norm": 2.190236806869507, "learning_rate": 4.63564975090771e-06, "loss": 0.0331, "step": 10980 }, { "epoch": 0.09279939203310042, "grad_norm": 1.6335341930389404, "learning_rate": 4.6398716541416876e-06, "loss": 0.0247, "step": 10990 }, { "epoch": 0.09288383188026429, "grad_norm": 0.4950500726699829, "learning_rate": 4.644093557375665e-06, "loss": 0.0315, "step": 11000 }, { "epoch": 0.09296827172742818, "grad_norm": 1.1463027000427246, "learning_rate": 4.648315460609643e-06, "loss": 0.0217, "step": 11010 }, { "epoch": 0.09305271157459205, "grad_norm": 0.9832736849784851, "learning_rate": 4.652537363843621e-06, "loss": 0.0393, "step": 11020 }, { "epoch": 0.09313715142175592, "grad_norm": 1.0337364673614502, "learning_rate": 4.656759267077599e-06, "loss": 0.0261, "step": 11030 }, { "epoch": 0.09322159126891981, "grad_norm": 1.1954268217086792, "learning_rate": 4.660981170311577e-06, "loss": 0.0405, "step": 11040 }, { "epoch": 0.09330603111608368, "grad_norm": 1.1922470331192017, "learning_rate": 4.665203073545555e-06, "loss": 0.0365, "step": 11050 }, { "epoch": 0.09339047096324755, "grad_norm": 0.8326893448829651, "learning_rate": 4.6694249767795326e-06, "loss": 0.0427, "step": 11060 }, { "epoch": 0.09347491081041143, "grad_norm": 1.123311161994934, "learning_rate": 4.6736468800135104e-06, "loss": 0.0276, "step": 11070 }, { "epoch": 0.09355935065757531, "grad_norm": 1.2775001525878906, "learning_rate": 4.677868783247488e-06, "loss": 0.0279, "step": 11080 }, { "epoch": 0.09364379050473919, "grad_norm": 1.011237621307373, "learning_rate": 4.682090686481466e-06, "loss": 0.037, "step": 11090 }, { "epoch": 0.09372823035190306, "grad_norm": 1.8337252140045166, "learning_rate": 4.686312589715444e-06, "loss": 0.0367, "step": 11100 }, { "epoch": 0.09381267019906694, "grad_norm": 1.4401123523712158, "learning_rate": 4.690534492949422e-06, "loss": 0.0299, "step": 11110 }, { "epoch": 0.09389711004623082, "grad_norm": 0.90699303150177, "learning_rate": 4.6947563961834e-06, "loss": 0.0255, "step": 11120 }, { "epoch": 0.09398154989339469, "grad_norm": 1.1037108898162842, "learning_rate": 4.698978299417378e-06, "loss": 0.0336, "step": 11130 }, { "epoch": 0.09406598974055858, "grad_norm": 0.830750584602356, "learning_rate": 4.7032002026513554e-06, "loss": 0.0167, "step": 11140 }, { "epoch": 0.09415042958772245, "grad_norm": 1.2472606897354126, "learning_rate": 4.707422105885333e-06, "loss": 0.0401, "step": 11150 }, { "epoch": 0.09423486943488632, "grad_norm": 0.9619490504264832, "learning_rate": 4.711644009119312e-06, "loss": 0.0294, "step": 11160 }, { "epoch": 0.09431930928205019, "grad_norm": 0.6588186025619507, "learning_rate": 4.715865912353289e-06, "loss": 0.0221, "step": 11170 }, { "epoch": 0.09440374912921408, "grad_norm": 0.8602689504623413, "learning_rate": 4.720087815587267e-06, "loss": 0.029, "step": 11180 }, { "epoch": 0.09448818897637795, "grad_norm": 1.3580924272537231, "learning_rate": 4.724309718821245e-06, "loss": 0.0287, "step": 11190 }, { "epoch": 0.09457262882354182, "grad_norm": 0.7848594784736633, "learning_rate": 4.728531622055223e-06, "loss": 0.0267, "step": 11200 }, { "epoch": 0.09465706867070571, "grad_norm": 1.5960345268249512, "learning_rate": 4.732753525289201e-06, "loss": 0.0315, "step": 11210 }, { "epoch": 0.09474150851786958, "grad_norm": 0.794005274772644, "learning_rate": 4.736975428523178e-06, "loss": 0.0299, "step": 11220 }, { "epoch": 0.09482594836503346, "grad_norm": 1.3229602575302124, "learning_rate": 4.741197331757156e-06, "loss": 0.0532, "step": 11230 }, { "epoch": 0.09491038821219734, "grad_norm": 2.241579294204712, "learning_rate": 4.745419234991134e-06, "loss": 0.0314, "step": 11240 }, { "epoch": 0.09499482805936121, "grad_norm": 1.472212791442871, "learning_rate": 4.749641138225112e-06, "loss": 0.0395, "step": 11250 }, { "epoch": 0.09507926790652509, "grad_norm": 1.2554235458374023, "learning_rate": 4.753863041459091e-06, "loss": 0.0308, "step": 11260 }, { "epoch": 0.09516370775368897, "grad_norm": 1.0540988445281982, "learning_rate": 4.7580849446930684e-06, "loss": 0.048, "step": 11270 }, { "epoch": 0.09524814760085284, "grad_norm": 1.8605648279190063, "learning_rate": 4.7623068479270455e-06, "loss": 0.0324, "step": 11280 }, { "epoch": 0.09533258744801672, "grad_norm": 1.4153296947479248, "learning_rate": 4.766528751161023e-06, "loss": 0.0313, "step": 11290 }, { "epoch": 0.09541702729518059, "grad_norm": 1.0071780681610107, "learning_rate": 4.770750654395002e-06, "loss": 0.043, "step": 11300 }, { "epoch": 0.09550146714234448, "grad_norm": 1.3805187940597534, "learning_rate": 4.77497255762898e-06, "loss": 0.035, "step": 11310 }, { "epoch": 0.09558590698950835, "grad_norm": 0.8294539451599121, "learning_rate": 4.779194460862958e-06, "loss": 0.023, "step": 11320 }, { "epoch": 0.09567034683667222, "grad_norm": 1.0577280521392822, "learning_rate": 4.783416364096936e-06, "loss": 0.0394, "step": 11330 }, { "epoch": 0.09575478668383611, "grad_norm": 0.5819516181945801, "learning_rate": 4.787638267330913e-06, "loss": 0.0206, "step": 11340 }, { "epoch": 0.09583922653099998, "grad_norm": 2.1717584133148193, "learning_rate": 4.791860170564891e-06, "loss": 0.0266, "step": 11350 }, { "epoch": 0.09592366637816385, "grad_norm": 0.7110533118247986, "learning_rate": 4.796082073798869e-06, "loss": 0.0235, "step": 11360 }, { "epoch": 0.09600810622532774, "grad_norm": 0.43701228499412537, "learning_rate": 4.800303977032847e-06, "loss": 0.018, "step": 11370 }, { "epoch": 0.09609254607249161, "grad_norm": 0.7197879552841187, "learning_rate": 4.804525880266825e-06, "loss": 0.0374, "step": 11380 }, { "epoch": 0.09617698591965548, "grad_norm": 0.914080798625946, "learning_rate": 4.808747783500802e-06, "loss": 0.0415, "step": 11390 }, { "epoch": 0.09626142576681936, "grad_norm": 1.3926634788513184, "learning_rate": 4.812969686734781e-06, "loss": 0.0454, "step": 11400 }, { "epoch": 0.09634586561398324, "grad_norm": 1.7053653001785278, "learning_rate": 4.8171915899687585e-06, "loss": 0.0311, "step": 11410 }, { "epoch": 0.09643030546114711, "grad_norm": 1.2574200630187988, "learning_rate": 4.821413493202736e-06, "loss": 0.029, "step": 11420 }, { "epoch": 0.09651474530831099, "grad_norm": 1.3738868236541748, "learning_rate": 4.825635396436714e-06, "loss": 0.0326, "step": 11430 }, { "epoch": 0.09659918515547487, "grad_norm": 1.4195475578308105, "learning_rate": 4.829857299670692e-06, "loss": 0.0321, "step": 11440 }, { "epoch": 0.09668362500263875, "grad_norm": 0.5949720740318298, "learning_rate": 4.83407920290467e-06, "loss": 0.0214, "step": 11450 }, { "epoch": 0.09676806484980262, "grad_norm": 0.9535499811172485, "learning_rate": 4.838301106138648e-06, "loss": 0.0244, "step": 11460 }, { "epoch": 0.0968525046969665, "grad_norm": 2.884915590286255, "learning_rate": 4.842523009372626e-06, "loss": 0.0332, "step": 11470 }, { "epoch": 0.09693694454413038, "grad_norm": 0.9535365104675293, "learning_rate": 4.8467449126066035e-06, "loss": 0.0321, "step": 11480 }, { "epoch": 0.09702138439129425, "grad_norm": 1.1172243356704712, "learning_rate": 4.850966815840581e-06, "loss": 0.0264, "step": 11490 }, { "epoch": 0.09710582423845812, "grad_norm": 0.5340768098831177, "learning_rate": 4.855188719074559e-06, "loss": 0.0318, "step": 11500 }, { "epoch": 0.09719026408562201, "grad_norm": 1.3799405097961426, "learning_rate": 4.859410622308537e-06, "loss": 0.0269, "step": 11510 }, { "epoch": 0.09727470393278588, "grad_norm": 2.048647165298462, "learning_rate": 4.863632525542515e-06, "loss": 0.0419, "step": 11520 }, { "epoch": 0.09735914377994975, "grad_norm": 0.9429327845573425, "learning_rate": 4.867854428776493e-06, "loss": 0.0378, "step": 11530 }, { "epoch": 0.09744358362711364, "grad_norm": 1.363776445388794, "learning_rate": 4.872076332010471e-06, "loss": 0.0292, "step": 11540 }, { "epoch": 0.09752802347427751, "grad_norm": 0.6471183896064758, "learning_rate": 4.8762982352444485e-06, "loss": 0.0209, "step": 11550 }, { "epoch": 0.09761246332144138, "grad_norm": 1.4211785793304443, "learning_rate": 4.880520138478426e-06, "loss": 0.0288, "step": 11560 }, { "epoch": 0.09769690316860527, "grad_norm": 1.9359774589538574, "learning_rate": 4.884742041712404e-06, "loss": 0.0443, "step": 11570 }, { "epoch": 0.09778134301576914, "grad_norm": 0.29871413111686707, "learning_rate": 4.888963944946382e-06, "loss": 0.0246, "step": 11580 }, { "epoch": 0.09786578286293302, "grad_norm": 1.0155203342437744, "learning_rate": 4.89318584818036e-06, "loss": 0.0192, "step": 11590 }, { "epoch": 0.0979502227100969, "grad_norm": 2.6247718334198, "learning_rate": 4.897407751414338e-06, "loss": 0.0388, "step": 11600 }, { "epoch": 0.09803466255726077, "grad_norm": 0.8061695098876953, "learning_rate": 4.9016296546483165e-06, "loss": 0.0269, "step": 11610 }, { "epoch": 0.09811910240442465, "grad_norm": 0.634352445602417, "learning_rate": 4.9058515578822935e-06, "loss": 0.0299, "step": 11620 }, { "epoch": 0.09820354225158852, "grad_norm": 1.4300183057785034, "learning_rate": 4.910073461116271e-06, "loss": 0.0298, "step": 11630 }, { "epoch": 0.0982879820987524, "grad_norm": 1.1696181297302246, "learning_rate": 4.914295364350249e-06, "loss": 0.0301, "step": 11640 }, { "epoch": 0.09837242194591628, "grad_norm": 1.2023569345474243, "learning_rate": 4.918517267584227e-06, "loss": 0.021, "step": 11650 }, { "epoch": 0.09845686179308015, "grad_norm": 1.5126733779907227, "learning_rate": 4.922739170818206e-06, "loss": 0.029, "step": 11660 }, { "epoch": 0.09854130164024404, "grad_norm": 1.2772140502929688, "learning_rate": 4.926961074052184e-06, "loss": 0.0272, "step": 11670 }, { "epoch": 0.09862574148740791, "grad_norm": 2.4967234134674072, "learning_rate": 4.931182977286161e-06, "loss": 0.0418, "step": 11680 }, { "epoch": 0.09871018133457178, "grad_norm": 1.1039267778396606, "learning_rate": 4.9354048805201385e-06, "loss": 0.0491, "step": 11690 }, { "epoch": 0.09879462118173567, "grad_norm": 0.34115833044052124, "learning_rate": 4.939626783754116e-06, "loss": 0.0293, "step": 11700 }, { "epoch": 0.09887906102889954, "grad_norm": 2.6173222064971924, "learning_rate": 4.943848686988095e-06, "loss": 0.0244, "step": 11710 }, { "epoch": 0.09896350087606341, "grad_norm": 0.559584379196167, "learning_rate": 4.948070590222073e-06, "loss": 0.0362, "step": 11720 }, { "epoch": 0.09904794072322728, "grad_norm": 0.9466559290885925, "learning_rate": 4.95229249345605e-06, "loss": 0.0254, "step": 11730 }, { "epoch": 0.09913238057039117, "grad_norm": 1.1420207023620605, "learning_rate": 4.956514396690028e-06, "loss": 0.0372, "step": 11740 }, { "epoch": 0.09921682041755504, "grad_norm": 1.0479289293289185, "learning_rate": 4.960736299924006e-06, "loss": 0.0442, "step": 11750 }, { "epoch": 0.09930126026471892, "grad_norm": 0.9160886406898499, "learning_rate": 4.964958203157984e-06, "loss": 0.0214, "step": 11760 }, { "epoch": 0.0993857001118828, "grad_norm": 1.5017277002334595, "learning_rate": 4.969180106391962e-06, "loss": 0.0297, "step": 11770 }, { "epoch": 0.09947013995904667, "grad_norm": 1.0794806480407715, "learning_rate": 4.97340200962594e-06, "loss": 0.0223, "step": 11780 }, { "epoch": 0.09955457980621055, "grad_norm": 0.4908881187438965, "learning_rate": 4.977623912859917e-06, "loss": 0.0309, "step": 11790 }, { "epoch": 0.09963901965337443, "grad_norm": 1.276275396347046, "learning_rate": 4.981845816093896e-06, "loss": 0.04, "step": 11800 }, { "epoch": 0.0997234595005383, "grad_norm": 1.5733819007873535, "learning_rate": 4.986067719327874e-06, "loss": 0.0309, "step": 11810 }, { "epoch": 0.09980789934770218, "grad_norm": 1.0462937355041504, "learning_rate": 4.9902896225618515e-06, "loss": 0.0262, "step": 11820 }, { "epoch": 0.09989233919486605, "grad_norm": 1.0812851190567017, "learning_rate": 4.994511525795829e-06, "loss": 0.0213, "step": 11830 }, { "epoch": 0.09997677904202994, "grad_norm": 0.7582193613052368, "learning_rate": 4.998733429029807e-06, "loss": 0.0352, "step": 11840 }, { "epoch": 0.10006121888919381, "grad_norm": 3.5297086238861084, "learning_rate": 5.002955332263785e-06, "loss": 0.0272, "step": 11850 }, { "epoch": 0.10014565873635768, "grad_norm": 0.8643373847007751, "learning_rate": 5.007177235497763e-06, "loss": 0.0202, "step": 11860 }, { "epoch": 0.10023009858352157, "grad_norm": 1.8133844137191772, "learning_rate": 5.011399138731741e-06, "loss": 0.0255, "step": 11870 }, { "epoch": 0.10031453843068544, "grad_norm": 1.3699244260787964, "learning_rate": 5.015621041965718e-06, "loss": 0.0258, "step": 11880 }, { "epoch": 0.10039897827784931, "grad_norm": 0.7298329472541809, "learning_rate": 5.0198429451996965e-06, "loss": 0.0369, "step": 11890 }, { "epoch": 0.1004834181250132, "grad_norm": 0.529822051525116, "learning_rate": 5.024064848433674e-06, "loss": 0.0268, "step": 11900 }, { "epoch": 0.10056785797217707, "grad_norm": 1.1946791410446167, "learning_rate": 5.028286751667652e-06, "loss": 0.0498, "step": 11910 }, { "epoch": 0.10065229781934094, "grad_norm": 0.6205116510391235, "learning_rate": 5.03250865490163e-06, "loss": 0.0295, "step": 11920 }, { "epoch": 0.10073673766650483, "grad_norm": 0.4280230402946472, "learning_rate": 5.036730558135607e-06, "loss": 0.0323, "step": 11930 }, { "epoch": 0.1008211775136687, "grad_norm": 1.1957944631576538, "learning_rate": 5.040952461369586e-06, "loss": 0.0353, "step": 11940 }, { "epoch": 0.10090561736083258, "grad_norm": 0.9614192247390747, "learning_rate": 5.045174364603564e-06, "loss": 0.0338, "step": 11950 }, { "epoch": 0.10099005720799645, "grad_norm": 1.5906105041503906, "learning_rate": 5.0493962678375415e-06, "loss": 0.0232, "step": 11960 }, { "epoch": 0.10107449705516033, "grad_norm": 0.9024080634117126, "learning_rate": 5.053618171071519e-06, "loss": 0.0262, "step": 11970 }, { "epoch": 0.1011589369023242, "grad_norm": 1.5125590562820435, "learning_rate": 5.057840074305498e-06, "loss": 0.0369, "step": 11980 }, { "epoch": 0.10124337674948808, "grad_norm": 0.6326072812080383, "learning_rate": 5.062061977539475e-06, "loss": 0.0167, "step": 11990 }, { "epoch": 0.10132781659665197, "grad_norm": 0.32587483525276184, "learning_rate": 5.066283880773453e-06, "loss": 0.023, "step": 12000 }, { "epoch": 0.10141225644381584, "grad_norm": 1.5242167711257935, "learning_rate": 5.070505784007431e-06, "loss": 0.0266, "step": 12010 }, { "epoch": 0.10149669629097971, "grad_norm": 2.272656202316284, "learning_rate": 5.074727687241409e-06, "loss": 0.0244, "step": 12020 }, { "epoch": 0.1015811361381436, "grad_norm": 1.3279024362564087, "learning_rate": 5.078949590475387e-06, "loss": 0.0266, "step": 12030 }, { "epoch": 0.10166557598530747, "grad_norm": 0.701909065246582, "learning_rate": 5.083171493709364e-06, "loss": 0.0321, "step": 12040 }, { "epoch": 0.10175001583247134, "grad_norm": 0.7873117923736572, "learning_rate": 5.087393396943342e-06, "loss": 0.0224, "step": 12050 }, { "epoch": 0.10183445567963521, "grad_norm": 0.7820058465003967, "learning_rate": 5.091615300177321e-06, "loss": 0.0233, "step": 12060 }, { "epoch": 0.1019188955267991, "grad_norm": 0.9653288722038269, "learning_rate": 5.095837203411298e-06, "loss": 0.0231, "step": 12070 }, { "epoch": 0.10200333537396297, "grad_norm": 1.138051986694336, "learning_rate": 5.100059106645277e-06, "loss": 0.0224, "step": 12080 }, { "epoch": 0.10208777522112684, "grad_norm": 0.6211355924606323, "learning_rate": 5.104281009879254e-06, "loss": 0.0299, "step": 12090 }, { "epoch": 0.10217221506829073, "grad_norm": 0.7945693135261536, "learning_rate": 5.1085029131132315e-06, "loss": 0.0296, "step": 12100 }, { "epoch": 0.1022566549154546, "grad_norm": 1.4380532503128052, "learning_rate": 5.11272481634721e-06, "loss": 0.0254, "step": 12110 }, { "epoch": 0.10234109476261848, "grad_norm": 1.0973221063613892, "learning_rate": 5.116946719581187e-06, "loss": 0.0252, "step": 12120 }, { "epoch": 0.10242553460978236, "grad_norm": 1.061102271080017, "learning_rate": 5.121168622815166e-06, "loss": 0.03, "step": 12130 }, { "epoch": 0.10250997445694623, "grad_norm": 0.5955126881599426, "learning_rate": 5.125390526049143e-06, "loss": 0.024, "step": 12140 }, { "epoch": 0.10259441430411011, "grad_norm": 0.3357559144496918, "learning_rate": 5.129612429283122e-06, "loss": 0.0221, "step": 12150 }, { "epoch": 0.10267885415127398, "grad_norm": 1.0481810569763184, "learning_rate": 5.1338343325170995e-06, "loss": 0.0367, "step": 12160 }, { "epoch": 0.10276329399843787, "grad_norm": 1.5738970041275024, "learning_rate": 5.1380562357510765e-06, "loss": 0.0292, "step": 12170 }, { "epoch": 0.10284773384560174, "grad_norm": 1.386911153793335, "learning_rate": 5.142278138985055e-06, "loss": 0.0315, "step": 12180 }, { "epoch": 0.10293217369276561, "grad_norm": 1.3201855421066284, "learning_rate": 5.146500042219032e-06, "loss": 0.038, "step": 12190 }, { "epoch": 0.1030166135399295, "grad_norm": 1.383278489112854, "learning_rate": 5.150721945453011e-06, "loss": 0.0451, "step": 12200 }, { "epoch": 0.10310105338709337, "grad_norm": 1.4374507665634155, "learning_rate": 5.154943848686989e-06, "loss": 0.0481, "step": 12210 }, { "epoch": 0.10318549323425724, "grad_norm": 1.3889034986495972, "learning_rate": 5.159165751920966e-06, "loss": 0.0367, "step": 12220 }, { "epoch": 0.10326993308142113, "grad_norm": 1.1625601053237915, "learning_rate": 5.1633876551549445e-06, "loss": 0.0427, "step": 12230 }, { "epoch": 0.103354372928585, "grad_norm": 0.9176104664802551, "learning_rate": 5.1676095583889216e-06, "loss": 0.0299, "step": 12240 }, { "epoch": 0.10343881277574887, "grad_norm": 1.1743849515914917, "learning_rate": 5.1718314616229e-06, "loss": 0.0234, "step": 12250 }, { "epoch": 0.10352325262291276, "grad_norm": 0.8403007388114929, "learning_rate": 5.176053364856878e-06, "loss": 0.0244, "step": 12260 }, { "epoch": 0.10360769247007663, "grad_norm": 1.5533603429794312, "learning_rate": 5.180275268090855e-06, "loss": 0.0314, "step": 12270 }, { "epoch": 0.1036921323172405, "grad_norm": 1.0903252363204956, "learning_rate": 5.184497171324834e-06, "loss": 0.0293, "step": 12280 }, { "epoch": 0.10377657216440438, "grad_norm": 1.9065686464309692, "learning_rate": 5.188719074558811e-06, "loss": 0.0339, "step": 12290 }, { "epoch": 0.10386101201156826, "grad_norm": 3.085045099258423, "learning_rate": 5.1929409777927896e-06, "loss": 0.0245, "step": 12300 }, { "epoch": 0.10394545185873214, "grad_norm": 1.0307116508483887, "learning_rate": 5.197162881026767e-06, "loss": 0.0314, "step": 12310 }, { "epoch": 0.10402989170589601, "grad_norm": 0.9048333168029785, "learning_rate": 5.201384784260745e-06, "loss": 0.0272, "step": 12320 }, { "epoch": 0.1041143315530599, "grad_norm": 1.141571283340454, "learning_rate": 5.205606687494723e-06, "loss": 0.0252, "step": 12330 }, { "epoch": 0.10419877140022377, "grad_norm": 1.3066354990005493, "learning_rate": 5.2098285907287e-06, "loss": 0.0297, "step": 12340 }, { "epoch": 0.10428321124738764, "grad_norm": 0.8762083053588867, "learning_rate": 5.214050493962679e-06, "loss": 0.033, "step": 12350 }, { "epoch": 0.10436765109455153, "grad_norm": 0.5939778089523315, "learning_rate": 5.218272397196657e-06, "loss": 0.0354, "step": 12360 }, { "epoch": 0.1044520909417154, "grad_norm": 1.0119094848632812, "learning_rate": 5.222494300430635e-06, "loss": 0.0202, "step": 12370 }, { "epoch": 0.10453653078887927, "grad_norm": 1.0291643142700195, "learning_rate": 5.226716203664612e-06, "loss": 0.0338, "step": 12380 }, { "epoch": 0.10462097063604314, "grad_norm": 0.783135175704956, "learning_rate": 5.2309381068985894e-06, "loss": 0.0298, "step": 12390 }, { "epoch": 0.10470541048320703, "grad_norm": 2.1016581058502197, "learning_rate": 5.235160010132568e-06, "loss": 0.0283, "step": 12400 }, { "epoch": 0.1047898503303709, "grad_norm": 2.4883337020874023, "learning_rate": 5.239381913366546e-06, "loss": 0.04, "step": 12410 }, { "epoch": 0.10487429017753477, "grad_norm": 1.0630910396575928, "learning_rate": 5.243603816600525e-06, "loss": 0.0278, "step": 12420 }, { "epoch": 0.10495873002469866, "grad_norm": 0.6995007991790771, "learning_rate": 5.247825719834502e-06, "loss": 0.0171, "step": 12430 }, { "epoch": 0.10504316987186253, "grad_norm": 0.9369008541107178, "learning_rate": 5.2520476230684796e-06, "loss": 0.0301, "step": 12440 }, { "epoch": 0.1051276097190264, "grad_norm": 0.8999984860420227, "learning_rate": 5.2562695263024574e-06, "loss": 0.0253, "step": 12450 }, { "epoch": 0.10521204956619029, "grad_norm": 0.865902304649353, "learning_rate": 5.260491429536435e-06, "loss": 0.0173, "step": 12460 }, { "epoch": 0.10529648941335416, "grad_norm": 0.7742636799812317, "learning_rate": 5.264713332770414e-06, "loss": 0.0284, "step": 12470 }, { "epoch": 0.10538092926051804, "grad_norm": 1.3132450580596924, "learning_rate": 5.268935236004391e-06, "loss": 0.0279, "step": 12480 }, { "epoch": 0.10546536910768191, "grad_norm": 0.7006683349609375, "learning_rate": 5.27315713923837e-06, "loss": 0.0258, "step": 12490 }, { "epoch": 0.1055498089548458, "grad_norm": 1.2200804948806763, "learning_rate": 5.277379042472347e-06, "loss": 0.0415, "step": 12500 }, { "epoch": 0.10563424880200967, "grad_norm": 2.0168120861053467, "learning_rate": 5.281600945706325e-06, "loss": 0.0237, "step": 12510 }, { "epoch": 0.10571868864917354, "grad_norm": 1.4212146997451782, "learning_rate": 5.285822848940303e-06, "loss": 0.0325, "step": 12520 }, { "epoch": 0.10580312849633743, "grad_norm": 1.0848870277404785, "learning_rate": 5.29004475217428e-06, "loss": 0.0144, "step": 12530 }, { "epoch": 0.1058875683435013, "grad_norm": 1.0814542770385742, "learning_rate": 5.294266655408259e-06, "loss": 0.0283, "step": 12540 }, { "epoch": 0.10597200819066517, "grad_norm": 0.9629197716712952, "learning_rate": 5.298488558642236e-06, "loss": 0.0217, "step": 12550 }, { "epoch": 0.10605644803782906, "grad_norm": 1.081850528717041, "learning_rate": 5.302710461876214e-06, "loss": 0.0296, "step": 12560 }, { "epoch": 0.10614088788499293, "grad_norm": 1.3140769004821777, "learning_rate": 5.306932365110193e-06, "loss": 0.0308, "step": 12570 }, { "epoch": 0.1062253277321568, "grad_norm": 1.059982419013977, "learning_rate": 5.31115426834417e-06, "loss": 0.0227, "step": 12580 }, { "epoch": 0.10630976757932069, "grad_norm": 1.2478437423706055, "learning_rate": 5.315376171578148e-06, "loss": 0.0327, "step": 12590 }, { "epoch": 0.10639420742648456, "grad_norm": 0.42909982800483704, "learning_rate": 5.319598074812125e-06, "loss": 0.0184, "step": 12600 }, { "epoch": 0.10647864727364843, "grad_norm": 1.5398246049880981, "learning_rate": 5.323819978046103e-06, "loss": 0.0288, "step": 12610 }, { "epoch": 0.1065630871208123, "grad_norm": 0.5509019494056702, "learning_rate": 5.328041881280082e-06, "loss": 0.0316, "step": 12620 }, { "epoch": 0.10664752696797619, "grad_norm": 1.1159168481826782, "learning_rate": 5.332263784514059e-06, "loss": 0.0275, "step": 12630 }, { "epoch": 0.10673196681514006, "grad_norm": 0.7874082326889038, "learning_rate": 5.336485687748038e-06, "loss": 0.0397, "step": 12640 }, { "epoch": 0.10681640666230394, "grad_norm": 0.6179842948913574, "learning_rate": 5.340707590982015e-06, "loss": 0.0196, "step": 12650 }, { "epoch": 0.10690084650946782, "grad_norm": 0.9184685349464417, "learning_rate": 5.344929494215993e-06, "loss": 0.0334, "step": 12660 }, { "epoch": 0.1069852863566317, "grad_norm": 0.6495174765586853, "learning_rate": 5.349151397449971e-06, "loss": 0.024, "step": 12670 }, { "epoch": 0.10706972620379557, "grad_norm": 0.4782678186893463, "learning_rate": 5.353373300683948e-06, "loss": 0.0239, "step": 12680 }, { "epoch": 0.10715416605095945, "grad_norm": 1.0100173950195312, "learning_rate": 5.357595203917927e-06, "loss": 0.0302, "step": 12690 }, { "epoch": 0.10723860589812333, "grad_norm": 0.6996117234230042, "learning_rate": 5.361817107151904e-06, "loss": 0.0146, "step": 12700 }, { "epoch": 0.1073230457452872, "grad_norm": 0.7763306498527527, "learning_rate": 5.366039010385883e-06, "loss": 0.0231, "step": 12710 }, { "epoch": 0.10740748559245107, "grad_norm": 1.3442621231079102, "learning_rate": 5.3702609136198605e-06, "loss": 0.0394, "step": 12720 }, { "epoch": 0.10749192543961496, "grad_norm": 0.6300420761108398, "learning_rate": 5.3744828168538375e-06, "loss": 0.0369, "step": 12730 }, { "epoch": 0.10757636528677883, "grad_norm": 0.7412264347076416, "learning_rate": 5.378704720087816e-06, "loss": 0.0258, "step": 12740 }, { "epoch": 0.1076608051339427, "grad_norm": 1.7320868968963623, "learning_rate": 5.382926623321794e-06, "loss": 0.0431, "step": 12750 }, { "epoch": 0.10774524498110659, "grad_norm": 0.9424993395805359, "learning_rate": 5.387148526555772e-06, "loss": 0.0384, "step": 12760 }, { "epoch": 0.10782968482827046, "grad_norm": 0.879253089427948, "learning_rate": 5.39137042978975e-06, "loss": 0.0183, "step": 12770 }, { "epoch": 0.10791412467543433, "grad_norm": 0.6988537907600403, "learning_rate": 5.3955923330237285e-06, "loss": 0.0193, "step": 12780 }, { "epoch": 0.10799856452259822, "grad_norm": 1.0300732851028442, "learning_rate": 5.3998142362577055e-06, "loss": 0.0322, "step": 12790 }, { "epoch": 0.10808300436976209, "grad_norm": 0.6340742707252502, "learning_rate": 5.404036139491683e-06, "loss": 0.0287, "step": 12800 }, { "epoch": 0.10816744421692596, "grad_norm": 2.4094371795654297, "learning_rate": 5.408258042725661e-06, "loss": 0.0306, "step": 12810 }, { "epoch": 0.10825188406408984, "grad_norm": 0.8663190603256226, "learning_rate": 5.412479945959639e-06, "loss": 0.0365, "step": 12820 }, { "epoch": 0.10833632391125372, "grad_norm": 1.1962352991104126, "learning_rate": 5.416701849193618e-06, "loss": 0.0171, "step": 12830 }, { "epoch": 0.1084207637584176, "grad_norm": 1.2467020750045776, "learning_rate": 5.420923752427595e-06, "loss": 0.0408, "step": 12840 }, { "epoch": 0.10850520360558147, "grad_norm": 1.2257318496704102, "learning_rate": 5.425145655661573e-06, "loss": 0.0259, "step": 12850 }, { "epoch": 0.10858964345274535, "grad_norm": 0.4645140767097473, "learning_rate": 5.4293675588955505e-06, "loss": 0.0263, "step": 12860 }, { "epoch": 0.10867408329990923, "grad_norm": 0.6885578036308289, "learning_rate": 5.433589462129528e-06, "loss": 0.0197, "step": 12870 }, { "epoch": 0.1087585231470731, "grad_norm": 1.3101087808609009, "learning_rate": 5.437811365363507e-06, "loss": 0.0264, "step": 12880 }, { "epoch": 0.10884296299423699, "grad_norm": 0.8228420615196228, "learning_rate": 5.442033268597484e-06, "loss": 0.0276, "step": 12890 }, { "epoch": 0.10892740284140086, "grad_norm": 1.497761845588684, "learning_rate": 5.446255171831462e-06, "loss": 0.0519, "step": 12900 }, { "epoch": 0.10901184268856473, "grad_norm": 1.356361985206604, "learning_rate": 5.45047707506544e-06, "loss": 0.0201, "step": 12910 }, { "epoch": 0.10909628253572862, "grad_norm": 1.0773228406906128, "learning_rate": 5.454698978299418e-06, "loss": 0.0367, "step": 12920 }, { "epoch": 0.10918072238289249, "grad_norm": 1.0675737857818604, "learning_rate": 5.458920881533396e-06, "loss": 0.0247, "step": 12930 }, { "epoch": 0.10926516223005636, "grad_norm": 1.011941909790039, "learning_rate": 5.463142784767373e-06, "loss": 0.0295, "step": 12940 }, { "epoch": 0.10934960207722023, "grad_norm": 2.410801649093628, "learning_rate": 5.467364688001352e-06, "loss": 0.0259, "step": 12950 }, { "epoch": 0.10943404192438412, "grad_norm": 0.7879287004470825, "learning_rate": 5.471586591235329e-06, "loss": 0.0204, "step": 12960 }, { "epoch": 0.10951848177154799, "grad_norm": 1.1133893728256226, "learning_rate": 5.475808494469307e-06, "loss": 0.0271, "step": 12970 }, { "epoch": 0.10960292161871187, "grad_norm": 1.5902358293533325, "learning_rate": 5.480030397703286e-06, "loss": 0.0338, "step": 12980 }, { "epoch": 0.10968736146587575, "grad_norm": 1.2498546838760376, "learning_rate": 5.484252300937263e-06, "loss": 0.0275, "step": 12990 }, { "epoch": 0.10977180131303962, "grad_norm": 0.8307667970657349, "learning_rate": 5.488474204171241e-06, "loss": 0.0227, "step": 13000 }, { "epoch": 0.1098562411602035, "grad_norm": 0.7846692800521851, "learning_rate": 5.492696107405218e-06, "loss": 0.0225, "step": 13010 }, { "epoch": 0.10994068100736738, "grad_norm": 1.6573212146759033, "learning_rate": 5.496918010639196e-06, "loss": 0.0293, "step": 13020 }, { "epoch": 0.11002512085453126, "grad_norm": 0.6472325921058655, "learning_rate": 5.501139913873175e-06, "loss": 0.0183, "step": 13030 }, { "epoch": 0.11010956070169513, "grad_norm": 1.0205951929092407, "learning_rate": 5.505361817107152e-06, "loss": 0.0395, "step": 13040 }, { "epoch": 0.110194000548859, "grad_norm": 0.9254760146141052, "learning_rate": 5.509583720341131e-06, "loss": 0.0302, "step": 13050 }, { "epoch": 0.11027844039602289, "grad_norm": 0.6171848773956299, "learning_rate": 5.5138056235751085e-06, "loss": 0.0221, "step": 13060 }, { "epoch": 0.11036288024318676, "grad_norm": 0.7784627676010132, "learning_rate": 5.5180275268090855e-06, "loss": 0.0314, "step": 13070 }, { "epoch": 0.11044732009035063, "grad_norm": 0.8037761449813843, "learning_rate": 5.522249430043064e-06, "loss": 0.0281, "step": 13080 }, { "epoch": 0.11053175993751452, "grad_norm": 0.8673291802406311, "learning_rate": 5.526471333277041e-06, "loss": 0.0174, "step": 13090 }, { "epoch": 0.11061619978467839, "grad_norm": 1.298331618309021, "learning_rate": 5.53069323651102e-06, "loss": 0.0423, "step": 13100 }, { "epoch": 0.11070063963184226, "grad_norm": 1.0970591306686401, "learning_rate": 5.534915139744998e-06, "loss": 0.0439, "step": 13110 }, { "epoch": 0.11078507947900615, "grad_norm": 1.3340789079666138, "learning_rate": 5.539137042978976e-06, "loss": 0.0332, "step": 13120 }, { "epoch": 0.11086951932617002, "grad_norm": 0.2912949323654175, "learning_rate": 5.5433589462129535e-06, "loss": 0.0208, "step": 13130 }, { "epoch": 0.1109539591733339, "grad_norm": 1.0312498807907104, "learning_rate": 5.5475808494469305e-06, "loss": 0.0319, "step": 13140 }, { "epoch": 0.11103839902049777, "grad_norm": 1.1094084978103638, "learning_rate": 5.551802752680909e-06, "loss": 0.0214, "step": 13150 }, { "epoch": 0.11112283886766165, "grad_norm": 1.2788500785827637, "learning_rate": 5.556024655914887e-06, "loss": 0.033, "step": 13160 }, { "epoch": 0.11120727871482552, "grad_norm": 0.9310007691383362, "learning_rate": 5.560246559148865e-06, "loss": 0.0246, "step": 13170 }, { "epoch": 0.1112917185619894, "grad_norm": 0.7339730262756348, "learning_rate": 5.564468462382843e-06, "loss": 0.0405, "step": 13180 }, { "epoch": 0.11137615840915328, "grad_norm": 0.9753920435905457, "learning_rate": 5.56869036561682e-06, "loss": 0.0324, "step": 13190 }, { "epoch": 0.11146059825631716, "grad_norm": 0.5561636686325073, "learning_rate": 5.5729122688507985e-06, "loss": 0.0201, "step": 13200 }, { "epoch": 0.11154503810348103, "grad_norm": 1.1478428840637207, "learning_rate": 5.577134172084776e-06, "loss": 0.0245, "step": 13210 }, { "epoch": 0.11162947795064491, "grad_norm": 0.739615261554718, "learning_rate": 5.581356075318754e-06, "loss": 0.0284, "step": 13220 }, { "epoch": 0.11171391779780879, "grad_norm": 0.8407266139984131, "learning_rate": 5.585577978552732e-06, "loss": 0.0252, "step": 13230 }, { "epoch": 0.11179835764497266, "grad_norm": 1.1495952606201172, "learning_rate": 5.589799881786709e-06, "loss": 0.031, "step": 13240 }, { "epoch": 0.11188279749213655, "grad_norm": 0.9282408356666565, "learning_rate": 5.594021785020688e-06, "loss": 0.0222, "step": 13250 }, { "epoch": 0.11196723733930042, "grad_norm": 0.8735226988792419, "learning_rate": 5.598243688254666e-06, "loss": 0.02, "step": 13260 }, { "epoch": 0.11205167718646429, "grad_norm": 0.9970313310623169, "learning_rate": 5.6024655914886435e-06, "loss": 0.031, "step": 13270 }, { "epoch": 0.11213611703362816, "grad_norm": 1.0965423583984375, "learning_rate": 5.606687494722621e-06, "loss": 0.0308, "step": 13280 }, { "epoch": 0.11222055688079205, "grad_norm": 1.1154756546020508, "learning_rate": 5.6109093979566e-06, "loss": 0.023, "step": 13290 }, { "epoch": 0.11230499672795592, "grad_norm": 1.257583498954773, "learning_rate": 5.615131301190577e-06, "loss": 0.0292, "step": 13300 }, { "epoch": 0.1123894365751198, "grad_norm": 0.8066310882568359, "learning_rate": 5.619353204424555e-06, "loss": 0.0233, "step": 13310 }, { "epoch": 0.11247387642228368, "grad_norm": 0.4756326377391815, "learning_rate": 5.623575107658533e-06, "loss": 0.017, "step": 13320 }, { "epoch": 0.11255831626944755, "grad_norm": 0.847089409828186, "learning_rate": 5.627797010892511e-06, "loss": 0.0286, "step": 13330 }, { "epoch": 0.11264275611661143, "grad_norm": 0.3670573830604553, "learning_rate": 5.632018914126489e-06, "loss": 0.0205, "step": 13340 }, { "epoch": 0.11272719596377531, "grad_norm": 0.7486038208007812, "learning_rate": 5.636240817360466e-06, "loss": 0.0241, "step": 13350 }, { "epoch": 0.11281163581093918, "grad_norm": 0.4744364023208618, "learning_rate": 5.640462720594444e-06, "loss": 0.0301, "step": 13360 }, { "epoch": 0.11289607565810306, "grad_norm": 0.911292552947998, "learning_rate": 5.644684623828423e-06, "loss": 0.0194, "step": 13370 }, { "epoch": 0.11298051550526693, "grad_norm": 1.0206451416015625, "learning_rate": 5.6489065270624e-06, "loss": 0.0198, "step": 13380 }, { "epoch": 0.11306495535243082, "grad_norm": 1.092210292816162, "learning_rate": 5.653128430296379e-06, "loss": 0.0241, "step": 13390 }, { "epoch": 0.11314939519959469, "grad_norm": 1.0069886445999146, "learning_rate": 5.657350333530356e-06, "loss": 0.0243, "step": 13400 }, { "epoch": 0.11323383504675856, "grad_norm": 2.092388391494751, "learning_rate": 5.6615722367643335e-06, "loss": 0.0244, "step": 13410 }, { "epoch": 0.11331827489392245, "grad_norm": 0.3190837800502777, "learning_rate": 5.665794139998312e-06, "loss": 0.0262, "step": 13420 }, { "epoch": 0.11340271474108632, "grad_norm": 1.170495867729187, "learning_rate": 5.670016043232289e-06, "loss": 0.0256, "step": 13430 }, { "epoch": 0.11348715458825019, "grad_norm": 0.8036184310913086, "learning_rate": 5.674237946466268e-06, "loss": 0.0249, "step": 13440 }, { "epoch": 0.11357159443541408, "grad_norm": 0.9094191789627075, "learning_rate": 5.678459849700245e-06, "loss": 0.0203, "step": 13450 }, { "epoch": 0.11365603428257795, "grad_norm": 0.004978427663445473, "learning_rate": 5.682681752934224e-06, "loss": 0.0272, "step": 13460 }, { "epoch": 0.11374047412974182, "grad_norm": 1.023194432258606, "learning_rate": 5.6869036561682015e-06, "loss": 0.0211, "step": 13470 }, { "epoch": 0.1138249139769057, "grad_norm": 1.1570470333099365, "learning_rate": 5.6911255594021785e-06, "loss": 0.0552, "step": 13480 }, { "epoch": 0.11390935382406958, "grad_norm": 0.7424198389053345, "learning_rate": 5.695347462636157e-06, "loss": 0.0434, "step": 13490 }, { "epoch": 0.11399379367123345, "grad_norm": 0.5878762602806091, "learning_rate": 5.699569365870134e-06, "loss": 0.0222, "step": 13500 }, { "epoch": 0.11407823351839733, "grad_norm": 0.7746303081512451, "learning_rate": 5.703791269104113e-06, "loss": 0.0284, "step": 13510 }, { "epoch": 0.11416267336556121, "grad_norm": 0.9709267020225525, "learning_rate": 5.708013172338091e-06, "loss": 0.0204, "step": 13520 }, { "epoch": 0.11424711321272508, "grad_norm": 0.44745564460754395, "learning_rate": 5.712235075572068e-06, "loss": 0.0221, "step": 13530 }, { "epoch": 0.11433155305988896, "grad_norm": 1.8365914821624756, "learning_rate": 5.7164569788060465e-06, "loss": 0.0228, "step": 13540 }, { "epoch": 0.11441599290705284, "grad_norm": 0.39755386114120483, "learning_rate": 5.7206788820400235e-06, "loss": 0.0248, "step": 13550 }, { "epoch": 0.11450043275421672, "grad_norm": 1.3068764209747314, "learning_rate": 5.724900785274002e-06, "loss": 0.036, "step": 13560 }, { "epoch": 0.11458487260138059, "grad_norm": 0.45841023325920105, "learning_rate": 5.72912268850798e-06, "loss": 0.0206, "step": 13570 }, { "epoch": 0.11466931244854447, "grad_norm": 0.9400277137756348, "learning_rate": 5.733344591741957e-06, "loss": 0.0246, "step": 13580 }, { "epoch": 0.11475375229570835, "grad_norm": 1.3385361433029175, "learning_rate": 5.737566494975936e-06, "loss": 0.0317, "step": 13590 }, { "epoch": 0.11483819214287222, "grad_norm": 0.9344369173049927, "learning_rate": 5.741788398209913e-06, "loss": 0.0347, "step": 13600 }, { "epoch": 0.11492263199003609, "grad_norm": 1.2418123483657837, "learning_rate": 5.7460103014438915e-06, "loss": 0.0234, "step": 13610 }, { "epoch": 0.11500707183719998, "grad_norm": 0.7223513126373291, "learning_rate": 5.750232204677869e-06, "loss": 0.017, "step": 13620 }, { "epoch": 0.11509151168436385, "grad_norm": 0.8356301188468933, "learning_rate": 5.754454107911847e-06, "loss": 0.0222, "step": 13630 }, { "epoch": 0.11517595153152772, "grad_norm": 0.6065970063209534, "learning_rate": 5.758676011145825e-06, "loss": 0.033, "step": 13640 }, { "epoch": 0.11526039137869161, "grad_norm": 0.834016740322113, "learning_rate": 5.762897914379802e-06, "loss": 0.0333, "step": 13650 }, { "epoch": 0.11534483122585548, "grad_norm": 0.8789721727371216, "learning_rate": 5.767119817613781e-06, "loss": 0.0269, "step": 13660 }, { "epoch": 0.11542927107301935, "grad_norm": 1.1039059162139893, "learning_rate": 5.771341720847759e-06, "loss": 0.0317, "step": 13670 }, { "epoch": 0.11551371092018324, "grad_norm": 1.5006086826324463, "learning_rate": 5.775563624081737e-06, "loss": 0.0192, "step": 13680 }, { "epoch": 0.11559815076734711, "grad_norm": 1.600651502609253, "learning_rate": 5.779785527315714e-06, "loss": 0.0341, "step": 13690 }, { "epoch": 0.11568259061451099, "grad_norm": 1.1748347282409668, "learning_rate": 5.7840074305496914e-06, "loss": 0.0283, "step": 13700 }, { "epoch": 0.11576703046167486, "grad_norm": 1.3449984788894653, "learning_rate": 5.78822933378367e-06, "loss": 0.0277, "step": 13710 }, { "epoch": 0.11585147030883874, "grad_norm": 0.728096604347229, "learning_rate": 5.792451237017648e-06, "loss": 0.0249, "step": 13720 }, { "epoch": 0.11593591015600262, "grad_norm": 2.161609172821045, "learning_rate": 5.796673140251627e-06, "loss": 0.0316, "step": 13730 }, { "epoch": 0.11602035000316649, "grad_norm": 0.40777260065078735, "learning_rate": 5.800895043485604e-06, "loss": 0.0306, "step": 13740 }, { "epoch": 0.11610478985033038, "grad_norm": 1.1313971281051636, "learning_rate": 5.8051169467195816e-06, "loss": 0.02, "step": 13750 }, { "epoch": 0.11618922969749425, "grad_norm": 0.5384504199028015, "learning_rate": 5.809338849953559e-06, "loss": 0.0248, "step": 13760 }, { "epoch": 0.11627366954465812, "grad_norm": 1.4360653162002563, "learning_rate": 5.813560753187537e-06, "loss": 0.0336, "step": 13770 }, { "epoch": 0.116358109391822, "grad_norm": 0.8517219424247742, "learning_rate": 5.817782656421516e-06, "loss": 0.0222, "step": 13780 }, { "epoch": 0.11644254923898588, "grad_norm": 1.149146318435669, "learning_rate": 5.822004559655493e-06, "loss": 0.0367, "step": 13790 }, { "epoch": 0.11652698908614975, "grad_norm": 0.8681339025497437, "learning_rate": 5.826226462889472e-06, "loss": 0.0364, "step": 13800 }, { "epoch": 0.11661142893331362, "grad_norm": 0.1470610797405243, "learning_rate": 5.830448366123449e-06, "loss": 0.0145, "step": 13810 }, { "epoch": 0.11669586878047751, "grad_norm": 1.1809313297271729, "learning_rate": 5.8346702693574266e-06, "loss": 0.0268, "step": 13820 }, { "epoch": 0.11678030862764138, "grad_norm": 0.5673054456710815, "learning_rate": 5.838892172591405e-06, "loss": 0.025, "step": 13830 }, { "epoch": 0.11686474847480526, "grad_norm": 2.266599416732788, "learning_rate": 5.843114075825382e-06, "loss": 0.0213, "step": 13840 }, { "epoch": 0.11694918832196914, "grad_norm": 1.079797387123108, "learning_rate": 5.847335979059361e-06, "loss": 0.0321, "step": 13850 }, { "epoch": 0.11703362816913301, "grad_norm": 1.1622332334518433, "learning_rate": 5.851557882293338e-06, "loss": 0.027, "step": 13860 }, { "epoch": 0.11711806801629689, "grad_norm": 0.8627147674560547, "learning_rate": 5.855779785527316e-06, "loss": 0.0239, "step": 13870 }, { "epoch": 0.11720250786346077, "grad_norm": 1.537564992904663, "learning_rate": 5.8600016887612946e-06, "loss": 0.0268, "step": 13880 }, { "epoch": 0.11728694771062464, "grad_norm": 0.7784967422485352, "learning_rate": 5.864223591995272e-06, "loss": 0.0296, "step": 13890 }, { "epoch": 0.11737138755778852, "grad_norm": 1.0392358303070068, "learning_rate": 5.86844549522925e-06, "loss": 0.0311, "step": 13900 }, { "epoch": 0.1174558274049524, "grad_norm": 0.5216428637504578, "learning_rate": 5.872667398463227e-06, "loss": 0.0316, "step": 13910 }, { "epoch": 0.11754026725211628, "grad_norm": 1.33235764503479, "learning_rate": 5.876889301697205e-06, "loss": 0.0258, "step": 13920 }, { "epoch": 0.11762470709928015, "grad_norm": 1.083234429359436, "learning_rate": 5.881111204931184e-06, "loss": 0.0382, "step": 13930 }, { "epoch": 0.11770914694644402, "grad_norm": 0.7743604779243469, "learning_rate": 5.885333108165161e-06, "loss": 0.0323, "step": 13940 }, { "epoch": 0.11779358679360791, "grad_norm": 0.46372854709625244, "learning_rate": 5.88955501139914e-06, "loss": 0.0247, "step": 13950 }, { "epoch": 0.11787802664077178, "grad_norm": 0.9428807497024536, "learning_rate": 5.893776914633117e-06, "loss": 0.0191, "step": 13960 }, { "epoch": 0.11796246648793565, "grad_norm": 0.9953282475471497, "learning_rate": 5.897998817867095e-06, "loss": 0.0243, "step": 13970 }, { "epoch": 0.11804690633509954, "grad_norm": 0.8097124695777893, "learning_rate": 5.902220721101073e-06, "loss": 0.025, "step": 13980 }, { "epoch": 0.11813134618226341, "grad_norm": 0.8752173185348511, "learning_rate": 5.90644262433505e-06, "loss": 0.0301, "step": 13990 }, { "epoch": 0.11821578602942728, "grad_norm": 0.9432576894760132, "learning_rate": 5.910664527569029e-06, "loss": 0.0308, "step": 14000 }, { "epoch": 0.11830022587659117, "grad_norm": 0.8057315945625305, "learning_rate": 5.914886430803006e-06, "loss": 0.025, "step": 14010 }, { "epoch": 0.11838466572375504, "grad_norm": 1.1431537866592407, "learning_rate": 5.919108334036985e-06, "loss": 0.0301, "step": 14020 }, { "epoch": 0.11846910557091891, "grad_norm": 2.051326274871826, "learning_rate": 5.9233302372709624e-06, "loss": 0.0292, "step": 14030 }, { "epoch": 0.11855354541808279, "grad_norm": 0.5306580066680908, "learning_rate": 5.9275521405049395e-06, "loss": 0.0157, "step": 14040 }, { "epoch": 0.11863798526524667, "grad_norm": 0.6362818479537964, "learning_rate": 5.931774043738918e-06, "loss": 0.0207, "step": 14050 }, { "epoch": 0.11872242511241055, "grad_norm": 2.030358076095581, "learning_rate": 5.935995946972896e-06, "loss": 0.038, "step": 14060 }, { "epoch": 0.11880686495957442, "grad_norm": 0.6415475010871887, "learning_rate": 5.940217850206874e-06, "loss": 0.018, "step": 14070 }, { "epoch": 0.1188913048067383, "grad_norm": 1.0431835651397705, "learning_rate": 5.944439753440852e-06, "loss": 0.0249, "step": 14080 }, { "epoch": 0.11897574465390218, "grad_norm": 0.7738913893699646, "learning_rate": 5.948661656674829e-06, "loss": 0.0276, "step": 14090 }, { "epoch": 0.11906018450106605, "grad_norm": 1.0759636163711548, "learning_rate": 5.9528835599088075e-06, "loss": 0.036, "step": 14100 }, { "epoch": 0.11914462434822994, "grad_norm": 0.5098883509635925, "learning_rate": 5.957105463142785e-06, "loss": 0.0211, "step": 14110 }, { "epoch": 0.11922906419539381, "grad_norm": 1.4840002059936523, "learning_rate": 5.961327366376763e-06, "loss": 0.0172, "step": 14120 }, { "epoch": 0.11931350404255768, "grad_norm": 0.7709274291992188, "learning_rate": 5.965549269610741e-06, "loss": 0.0145, "step": 14130 }, { "epoch": 0.11939794388972155, "grad_norm": 0.6766738295555115, "learning_rate": 5.96977117284472e-06, "loss": 0.0414, "step": 14140 }, { "epoch": 0.11948238373688544, "grad_norm": 0.8492491841316223, "learning_rate": 5.973993076078697e-06, "loss": 0.0391, "step": 14150 }, { "epoch": 0.11956682358404931, "grad_norm": 1.383046269416809, "learning_rate": 5.978214979312675e-06, "loss": 0.0265, "step": 14160 }, { "epoch": 0.11965126343121318, "grad_norm": 0.9409967660903931, "learning_rate": 5.9824368825466525e-06, "loss": 0.0255, "step": 14170 }, { "epoch": 0.11973570327837707, "grad_norm": 0.4393687844276428, "learning_rate": 5.98665878578063e-06, "loss": 0.0427, "step": 14180 }, { "epoch": 0.11982014312554094, "grad_norm": 0.538382351398468, "learning_rate": 5.990880689014609e-06, "loss": 0.018, "step": 14190 }, { "epoch": 0.11990458297270482, "grad_norm": 1.3814271688461304, "learning_rate": 5.995102592248586e-06, "loss": 0.0196, "step": 14200 }, { "epoch": 0.1199890228198687, "grad_norm": 1.1220121383666992, "learning_rate": 5.999324495482564e-06, "loss": 0.025, "step": 14210 }, { "epoch": 0.12007346266703257, "grad_norm": 2.338578701019287, "learning_rate": 6.003546398716542e-06, "loss": 0.0138, "step": 14220 }, { "epoch": 0.12015790251419645, "grad_norm": 1.6417797803878784, "learning_rate": 6.00776830195052e-06, "loss": 0.0292, "step": 14230 }, { "epoch": 0.12024234236136033, "grad_norm": 0.6505575776100159, "learning_rate": 6.011990205184498e-06, "loss": 0.0151, "step": 14240 }, { "epoch": 0.1203267822085242, "grad_norm": 0.7035506963729858, "learning_rate": 6.016212108418475e-06, "loss": 0.0297, "step": 14250 }, { "epoch": 0.12041122205568808, "grad_norm": 0.23945331573486328, "learning_rate": 6.020434011652453e-06, "loss": 0.0375, "step": 14260 }, { "epoch": 0.12049566190285195, "grad_norm": 1.3349950313568115, "learning_rate": 6.024655914886431e-06, "loss": 0.0329, "step": 14270 }, { "epoch": 0.12058010175001584, "grad_norm": 1.0170304775238037, "learning_rate": 6.028877818120409e-06, "loss": 0.0323, "step": 14280 }, { "epoch": 0.12066454159717971, "grad_norm": 0.45360085368156433, "learning_rate": 6.033099721354388e-06, "loss": 0.0203, "step": 14290 }, { "epoch": 0.12074898144434358, "grad_norm": 1.1860790252685547, "learning_rate": 6.037321624588365e-06, "loss": 0.0218, "step": 14300 }, { "epoch": 0.12083342129150747, "grad_norm": 0.4265297055244446, "learning_rate": 6.041543527822343e-06, "loss": 0.0321, "step": 14310 }, { "epoch": 0.12091786113867134, "grad_norm": 0.1946972906589508, "learning_rate": 6.04576543105632e-06, "loss": 0.0257, "step": 14320 }, { "epoch": 0.12100230098583521, "grad_norm": 0.9351276159286499, "learning_rate": 6.049987334290298e-06, "loss": 0.0151, "step": 14330 }, { "epoch": 0.1210867408329991, "grad_norm": 1.0286377668380737, "learning_rate": 6.054209237524277e-06, "loss": 0.0298, "step": 14340 }, { "epoch": 0.12117118068016297, "grad_norm": 0.8019536137580872, "learning_rate": 6.058431140758254e-06, "loss": 0.026, "step": 14350 }, { "epoch": 0.12125562052732684, "grad_norm": 0.9517760872840881, "learning_rate": 6.062653043992233e-06, "loss": 0.0257, "step": 14360 }, { "epoch": 0.12134006037449072, "grad_norm": 1.1364011764526367, "learning_rate": 6.0668749472262105e-06, "loss": 0.0355, "step": 14370 }, { "epoch": 0.1214245002216546, "grad_norm": 0.8307782411575317, "learning_rate": 6.0710968504601875e-06, "loss": 0.0305, "step": 14380 }, { "epoch": 0.12150894006881847, "grad_norm": 1.1062270402908325, "learning_rate": 6.075318753694166e-06, "loss": 0.0204, "step": 14390 }, { "epoch": 0.12159337991598235, "grad_norm": 0.6359397172927856, "learning_rate": 6.079540656928143e-06, "loss": 0.0272, "step": 14400 }, { "epoch": 0.12167781976314623, "grad_norm": 0.48236021399497986, "learning_rate": 6.083762560162122e-06, "loss": 0.0215, "step": 14410 }, { "epoch": 0.1217622596103101, "grad_norm": 1.0032992362976074, "learning_rate": 6.0879844633961e-06, "loss": 0.0307, "step": 14420 }, { "epoch": 0.12184669945747398, "grad_norm": 1.2716050148010254, "learning_rate": 6.092206366630077e-06, "loss": 0.036, "step": 14430 }, { "epoch": 0.12193113930463786, "grad_norm": 1.2149336338043213, "learning_rate": 6.0964282698640555e-06, "loss": 0.0386, "step": 14440 }, { "epoch": 0.12201557915180174, "grad_norm": 0.6697990894317627, "learning_rate": 6.1006501730980325e-06, "loss": 0.021, "step": 14450 }, { "epoch": 0.12210001899896561, "grad_norm": 0.44871336221694946, "learning_rate": 6.104872076332011e-06, "loss": 0.0179, "step": 14460 }, { "epoch": 0.12218445884612948, "grad_norm": 1.7641772031784058, "learning_rate": 6.109093979565989e-06, "loss": 0.0214, "step": 14470 }, { "epoch": 0.12226889869329337, "grad_norm": 1.129635214805603, "learning_rate": 6.113315882799967e-06, "loss": 0.0285, "step": 14480 }, { "epoch": 0.12235333854045724, "grad_norm": 0.8485088348388672, "learning_rate": 6.117537786033945e-06, "loss": 0.0319, "step": 14490 }, { "epoch": 0.12243777838762111, "grad_norm": 0.4314082860946655, "learning_rate": 6.121759689267922e-06, "loss": 0.0186, "step": 14500 }, { "epoch": 0.122522218234785, "grad_norm": 0.4912186861038208, "learning_rate": 6.1259815925019005e-06, "loss": 0.0327, "step": 14510 }, { "epoch": 0.12260665808194887, "grad_norm": 0.8230891227722168, "learning_rate": 6.130203495735878e-06, "loss": 0.0279, "step": 14520 }, { "epoch": 0.12269109792911274, "grad_norm": 1.3763080835342407, "learning_rate": 6.134425398969856e-06, "loss": 0.0293, "step": 14530 }, { "epoch": 0.12277553777627663, "grad_norm": 0.5460295677185059, "learning_rate": 6.138647302203834e-06, "loss": 0.0291, "step": 14540 }, { "epoch": 0.1228599776234405, "grad_norm": 0.601218581199646, "learning_rate": 6.142869205437811e-06, "loss": 0.024, "step": 14550 }, { "epoch": 0.12294441747060438, "grad_norm": 1.0468716621398926, "learning_rate": 6.14709110867179e-06, "loss": 0.0219, "step": 14560 }, { "epoch": 0.12302885731776826, "grad_norm": 0.6243321299552917, "learning_rate": 6.151313011905768e-06, "loss": 0.014, "step": 14570 }, { "epoch": 0.12311329716493213, "grad_norm": 0.9221895337104797, "learning_rate": 6.1555349151397455e-06, "loss": 0.0184, "step": 14580 }, { "epoch": 0.123197737012096, "grad_norm": 0.4063107669353485, "learning_rate": 6.159756818373723e-06, "loss": 0.0155, "step": 14590 }, { "epoch": 0.12328217685925988, "grad_norm": 1.5385714769363403, "learning_rate": 6.1639787216077e-06, "loss": 0.0274, "step": 14600 }, { "epoch": 0.12336661670642377, "grad_norm": 0.5480573773384094, "learning_rate": 6.168200624841679e-06, "loss": 0.0154, "step": 14610 }, { "epoch": 0.12345105655358764, "grad_norm": 1.0190738439559937, "learning_rate": 6.172422528075657e-06, "loss": 0.0244, "step": 14620 }, { "epoch": 0.12353549640075151, "grad_norm": 0.5075932741165161, "learning_rate": 6.176644431309635e-06, "loss": 0.0333, "step": 14630 }, { "epoch": 0.1236199362479154, "grad_norm": 0.8672965168952942, "learning_rate": 6.180866334543613e-06, "loss": 0.0147, "step": 14640 }, { "epoch": 0.12370437609507927, "grad_norm": 2.3978517055511475, "learning_rate": 6.185088237777591e-06, "loss": 0.0327, "step": 14650 }, { "epoch": 0.12378881594224314, "grad_norm": 1.2845544815063477, "learning_rate": 6.189310141011568e-06, "loss": 0.0273, "step": 14660 }, { "epoch": 0.12387325578940703, "grad_norm": 0.9315706491470337, "learning_rate": 6.193532044245546e-06, "loss": 0.022, "step": 14670 }, { "epoch": 0.1239576956365709, "grad_norm": 0.36666229367256165, "learning_rate": 6.197753947479525e-06, "loss": 0.0173, "step": 14680 }, { "epoch": 0.12404213548373477, "grad_norm": 0.8570045232772827, "learning_rate": 6.201975850713502e-06, "loss": 0.0308, "step": 14690 }, { "epoch": 0.12412657533089864, "grad_norm": 1.3574438095092773, "learning_rate": 6.206197753947481e-06, "loss": 0.0226, "step": 14700 }, { "epoch": 0.12421101517806253, "grad_norm": 0.5759990215301514, "learning_rate": 6.210419657181458e-06, "loss": 0.0297, "step": 14710 }, { "epoch": 0.1242954550252264, "grad_norm": 1.9799742698669434, "learning_rate": 6.2146415604154355e-06, "loss": 0.0438, "step": 14720 }, { "epoch": 0.12437989487239028, "grad_norm": 0.979000449180603, "learning_rate": 6.218863463649414e-06, "loss": 0.0249, "step": 14730 }, { "epoch": 0.12446433471955416, "grad_norm": 1.9119248390197754, "learning_rate": 6.223085366883391e-06, "loss": 0.0222, "step": 14740 }, { "epoch": 0.12454877456671803, "grad_norm": 1.4570412635803223, "learning_rate": 6.22730727011737e-06, "loss": 0.0341, "step": 14750 }, { "epoch": 0.12463321441388191, "grad_norm": 0.6650915741920471, "learning_rate": 6.231529173351347e-06, "loss": 0.0249, "step": 14760 }, { "epoch": 0.1247176542610458, "grad_norm": 1.0876481533050537, "learning_rate": 6.235751076585325e-06, "loss": 0.0172, "step": 14770 }, { "epoch": 0.12480209410820967, "grad_norm": 1.9661192893981934, "learning_rate": 6.2399729798193035e-06, "loss": 0.0333, "step": 14780 }, { "epoch": 0.12488653395537354, "grad_norm": 1.4169869422912598, "learning_rate": 6.2441948830532805e-06, "loss": 0.0255, "step": 14790 }, { "epoch": 0.12497097380253741, "grad_norm": 0.8858640193939209, "learning_rate": 6.248416786287259e-06, "loss": 0.0291, "step": 14800 }, { "epoch": 0.1250554136497013, "grad_norm": 0.680128276348114, "learning_rate": 6.252638689521236e-06, "loss": 0.0303, "step": 14810 }, { "epoch": 0.12513985349686518, "grad_norm": 0.8858612179756165, "learning_rate": 6.256860592755215e-06, "loss": 0.0297, "step": 14820 }, { "epoch": 0.12522429334402904, "grad_norm": 1.0086337327957153, "learning_rate": 6.261082495989193e-06, "loss": 0.0287, "step": 14830 }, { "epoch": 0.12530873319119293, "grad_norm": 0.757635235786438, "learning_rate": 6.26530439922317e-06, "loss": 0.019, "step": 14840 }, { "epoch": 0.12539317303835681, "grad_norm": 0.5932726263999939, "learning_rate": 6.2695263024571485e-06, "loss": 0.0247, "step": 14850 }, { "epoch": 0.12547761288552067, "grad_norm": 0.6255943775177002, "learning_rate": 6.2737482056911255e-06, "loss": 0.0198, "step": 14860 }, { "epoch": 0.12556205273268456, "grad_norm": 0.44270214438438416, "learning_rate": 6.277970108925104e-06, "loss": 0.0158, "step": 14870 }, { "epoch": 0.12564649257984842, "grad_norm": 0.6326145529747009, "learning_rate": 6.282192012159082e-06, "loss": 0.0256, "step": 14880 }, { "epoch": 0.1257309324270123, "grad_norm": 0.5347554087638855, "learning_rate": 6.286413915393059e-06, "loss": 0.0391, "step": 14890 }, { "epoch": 0.1258153722741762, "grad_norm": 0.4736630916595459, "learning_rate": 6.290635818627038e-06, "loss": 0.0294, "step": 14900 }, { "epoch": 0.12589981212134005, "grad_norm": 1.6640856266021729, "learning_rate": 6.294857721861015e-06, "loss": 0.0298, "step": 14910 }, { "epoch": 0.12598425196850394, "grad_norm": 0.8186252117156982, "learning_rate": 6.2990796250949935e-06, "loss": 0.0358, "step": 14920 }, { "epoch": 0.12606869181566782, "grad_norm": 0.5540655255317688, "learning_rate": 6.303301528328971e-06, "loss": 0.0243, "step": 14930 }, { "epoch": 0.12615313166283168, "grad_norm": 1.1384425163269043, "learning_rate": 6.307523431562948e-06, "loss": 0.0255, "step": 14940 }, { "epoch": 0.12623757150999557, "grad_norm": 0.7222641706466675, "learning_rate": 6.311745334796927e-06, "loss": 0.0245, "step": 14950 }, { "epoch": 0.12632201135715945, "grad_norm": 0.977242112159729, "learning_rate": 6.315967238030904e-06, "loss": 0.0281, "step": 14960 }, { "epoch": 0.1264064512043233, "grad_norm": 0.8860167264938354, "learning_rate": 6.320189141264883e-06, "loss": 0.0408, "step": 14970 }, { "epoch": 0.1264908910514872, "grad_norm": 0.5630953311920166, "learning_rate": 6.324411044498861e-06, "loss": 0.0278, "step": 14980 }, { "epoch": 0.12657533089865108, "grad_norm": 0.4162220060825348, "learning_rate": 6.328632947732839e-06, "loss": 0.0388, "step": 14990 }, { "epoch": 0.12665977074581494, "grad_norm": 0.4576854407787323, "learning_rate": 6.332854850966816e-06, "loss": 0.0191, "step": 15000 }, { "epoch": 0.12674421059297883, "grad_norm": 0.9155135154724121, "learning_rate": 6.337076754200793e-06, "loss": 0.0207, "step": 15010 }, { "epoch": 0.12682865044014272, "grad_norm": 0.9990374445915222, "learning_rate": 6.341298657434772e-06, "loss": 0.0357, "step": 15020 }, { "epoch": 0.12691309028730657, "grad_norm": 1.1222634315490723, "learning_rate": 6.34552056066875e-06, "loss": 0.0237, "step": 15030 }, { "epoch": 0.12699753013447046, "grad_norm": 0.9157246351242065, "learning_rate": 6.349742463902729e-06, "loss": 0.0171, "step": 15040 }, { "epoch": 0.12708196998163435, "grad_norm": 0.42009469866752625, "learning_rate": 6.353964367136706e-06, "loss": 0.0251, "step": 15050 }, { "epoch": 0.1271664098287982, "grad_norm": 0.11369547247886658, "learning_rate": 6.3581862703706836e-06, "loss": 0.0136, "step": 15060 }, { "epoch": 0.1272508496759621, "grad_norm": 1.0705848932266235, "learning_rate": 6.362408173604661e-06, "loss": 0.0546, "step": 15070 }, { "epoch": 0.12733528952312595, "grad_norm": 1.5872175693511963, "learning_rate": 6.366630076838639e-06, "loss": 0.0265, "step": 15080 }, { "epoch": 0.12741972937028984, "grad_norm": 0.7375980615615845, "learning_rate": 6.370851980072618e-06, "loss": 0.0244, "step": 15090 }, { "epoch": 0.12750416921745372, "grad_norm": 0.3772319257259369, "learning_rate": 6.375073883306595e-06, "loss": 0.0264, "step": 15100 }, { "epoch": 0.12758860906461758, "grad_norm": 0.8125713467597961, "learning_rate": 6.379295786540573e-06, "loss": 0.0247, "step": 15110 }, { "epoch": 0.12767304891178147, "grad_norm": 0.4811934232711792, "learning_rate": 6.383517689774551e-06, "loss": 0.0272, "step": 15120 }, { "epoch": 0.12775748875894535, "grad_norm": 0.8360493183135986, "learning_rate": 6.3877395930085286e-06, "loss": 0.0264, "step": 15130 }, { "epoch": 0.1278419286061092, "grad_norm": 0.5337676405906677, "learning_rate": 6.391961496242507e-06, "loss": 0.0248, "step": 15140 }, { "epoch": 0.1279263684532731, "grad_norm": 0.6734366416931152, "learning_rate": 6.396183399476484e-06, "loss": 0.0287, "step": 15150 }, { "epoch": 0.12801080830043698, "grad_norm": 0.4091334640979767, "learning_rate": 6.400405302710463e-06, "loss": 0.0192, "step": 15160 }, { "epoch": 0.12809524814760084, "grad_norm": 0.6357341408729553, "learning_rate": 6.40462720594444e-06, "loss": 0.0289, "step": 15170 }, { "epoch": 0.12817968799476473, "grad_norm": 1.0971680879592896, "learning_rate": 6.408849109178418e-06, "loss": 0.0239, "step": 15180 }, { "epoch": 0.12826412784192862, "grad_norm": 1.0084707736968994, "learning_rate": 6.4130710124123966e-06, "loss": 0.0246, "step": 15190 }, { "epoch": 0.12834856768909247, "grad_norm": 1.2603260278701782, "learning_rate": 6.4172929156463736e-06, "loss": 0.0377, "step": 15200 }, { "epoch": 0.12843300753625636, "grad_norm": 0.9810188412666321, "learning_rate": 6.421514818880352e-06, "loss": 0.0213, "step": 15210 }, { "epoch": 0.12851744738342025, "grad_norm": 1.4669829607009888, "learning_rate": 6.425736722114329e-06, "loss": 0.0291, "step": 15220 }, { "epoch": 0.1286018872305841, "grad_norm": 0.8786041140556335, "learning_rate": 6.429958625348307e-06, "loss": 0.0304, "step": 15230 }, { "epoch": 0.128686327077748, "grad_norm": 0.729134738445282, "learning_rate": 6.434180528582286e-06, "loss": 0.0259, "step": 15240 }, { "epoch": 0.12877076692491188, "grad_norm": 0.9497475624084473, "learning_rate": 6.438402431816263e-06, "loss": 0.0347, "step": 15250 }, { "epoch": 0.12885520677207574, "grad_norm": 1.987231731414795, "learning_rate": 6.4426243350502416e-06, "loss": 0.0284, "step": 15260 }, { "epoch": 0.12893964661923962, "grad_norm": 0.6359442472457886, "learning_rate": 6.446846238284219e-06, "loss": 0.0166, "step": 15270 }, { "epoch": 0.1290240864664035, "grad_norm": 0.5549911856651306, "learning_rate": 6.4510681415181964e-06, "loss": 0.0279, "step": 15280 }, { "epoch": 0.12910852631356737, "grad_norm": 0.8991914391517639, "learning_rate": 6.455290044752175e-06, "loss": 0.0499, "step": 15290 }, { "epoch": 0.12919296616073125, "grad_norm": 0.41402584314346313, "learning_rate": 6.459511947986152e-06, "loss": 0.0322, "step": 15300 }, { "epoch": 0.1292774060078951, "grad_norm": 0.9361251592636108, "learning_rate": 6.463733851220131e-06, "loss": 0.0243, "step": 15310 }, { "epoch": 0.129361845855059, "grad_norm": 0.7049379944801331, "learning_rate": 6.467955754454108e-06, "loss": 0.023, "step": 15320 }, { "epoch": 0.12944628570222289, "grad_norm": 0.6026835441589355, "learning_rate": 6.472177657688087e-06, "loss": 0.0223, "step": 15330 }, { "epoch": 0.12953072554938674, "grad_norm": 0.9600390195846558, "learning_rate": 6.4763995609220644e-06, "loss": 0.0345, "step": 15340 }, { "epoch": 0.12961516539655063, "grad_norm": 0.9808129668235779, "learning_rate": 6.4806214641560415e-06, "loss": 0.0227, "step": 15350 }, { "epoch": 0.12969960524371452, "grad_norm": 1.3226628303527832, "learning_rate": 6.48484336739002e-06, "loss": 0.0277, "step": 15360 }, { "epoch": 0.12978404509087837, "grad_norm": 0.7106572389602661, "learning_rate": 6.489065270623998e-06, "loss": 0.0278, "step": 15370 }, { "epoch": 0.12986848493804226, "grad_norm": 0.9590808749198914, "learning_rate": 6.493287173857976e-06, "loss": 0.0256, "step": 15380 }, { "epoch": 0.12995292478520615, "grad_norm": 0.7618299126625061, "learning_rate": 6.497509077091954e-06, "loss": 0.0199, "step": 15390 }, { "epoch": 0.13003736463237, "grad_norm": 0.39566996693611145, "learning_rate": 6.501730980325931e-06, "loss": 0.0207, "step": 15400 }, { "epoch": 0.1301218044795339, "grad_norm": 0.5007640719413757, "learning_rate": 6.5059528835599094e-06, "loss": 0.0387, "step": 15410 }, { "epoch": 0.13020624432669778, "grad_norm": 0.7645143866539001, "learning_rate": 6.510174786793887e-06, "loss": 0.0191, "step": 15420 }, { "epoch": 0.13029068417386164, "grad_norm": 1.826019287109375, "learning_rate": 6.514396690027865e-06, "loss": 0.0269, "step": 15430 }, { "epoch": 0.13037512402102552, "grad_norm": 2.3512861728668213, "learning_rate": 6.518618593261843e-06, "loss": 0.0202, "step": 15440 }, { "epoch": 0.1304595638681894, "grad_norm": 0.4556906819343567, "learning_rate": 6.52284049649582e-06, "loss": 0.0211, "step": 15450 }, { "epoch": 0.13054400371535327, "grad_norm": 2.4845831394195557, "learning_rate": 6.527062399729799e-06, "loss": 0.0237, "step": 15460 }, { "epoch": 0.13062844356251715, "grad_norm": 0.7886413931846619, "learning_rate": 6.531284302963777e-06, "loss": 0.0301, "step": 15470 }, { "epoch": 0.13071288340968104, "grad_norm": 1.622550129890442, "learning_rate": 6.5355062061977545e-06, "loss": 0.0345, "step": 15480 }, { "epoch": 0.1307973232568449, "grad_norm": 1.2570345401763916, "learning_rate": 6.539728109431732e-06, "loss": 0.0328, "step": 15490 }, { "epoch": 0.13088176310400879, "grad_norm": 0.938630998134613, "learning_rate": 6.543950012665711e-06, "loss": 0.0178, "step": 15500 }, { "epoch": 0.13096620295117267, "grad_norm": 1.4210177659988403, "learning_rate": 6.548171915899688e-06, "loss": 0.032, "step": 15510 }, { "epoch": 0.13105064279833653, "grad_norm": 0.6003727316856384, "learning_rate": 6.552393819133666e-06, "loss": 0.0249, "step": 15520 }, { "epoch": 0.13113508264550042, "grad_norm": 1.3762062788009644, "learning_rate": 6.556615722367644e-06, "loss": 0.0308, "step": 15530 }, { "epoch": 0.13121952249266428, "grad_norm": 0.6540197730064392, "learning_rate": 6.560837625601622e-06, "loss": 0.0291, "step": 15540 }, { "epoch": 0.13130396233982816, "grad_norm": 1.444808840751648, "learning_rate": 6.5650595288356e-06, "loss": 0.0334, "step": 15550 }, { "epoch": 0.13138840218699205, "grad_norm": 0.7549641132354736, "learning_rate": 6.569281432069577e-06, "loss": 0.0241, "step": 15560 }, { "epoch": 0.1314728420341559, "grad_norm": 0.48670047521591187, "learning_rate": 6.573503335303555e-06, "loss": 0.03, "step": 15570 }, { "epoch": 0.1315572818813198, "grad_norm": 0.7065364718437195, "learning_rate": 6.577725238537533e-06, "loss": 0.0284, "step": 15580 }, { "epoch": 0.13164172172848368, "grad_norm": 0.6644801497459412, "learning_rate": 6.581947141771511e-06, "loss": 0.0302, "step": 15590 }, { "epoch": 0.13172616157564754, "grad_norm": 0.22666087746620178, "learning_rate": 6.58616904500549e-06, "loss": 0.0287, "step": 15600 }, { "epoch": 0.13181060142281142, "grad_norm": 0.9244785308837891, "learning_rate": 6.590390948239467e-06, "loss": 0.022, "step": 15610 }, { "epoch": 0.1318950412699753, "grad_norm": 0.49571970105171204, "learning_rate": 6.594612851473445e-06, "loss": 0.0281, "step": 15620 }, { "epoch": 0.13197948111713917, "grad_norm": 0.9493920207023621, "learning_rate": 6.598834754707422e-06, "loss": 0.0307, "step": 15630 }, { "epoch": 0.13206392096430306, "grad_norm": 0.4997584819793701, "learning_rate": 6.6030566579414e-06, "loss": 0.031, "step": 15640 }, { "epoch": 0.13214836081146694, "grad_norm": 1.3092278242111206, "learning_rate": 6.607278561175379e-06, "loss": 0.0286, "step": 15650 }, { "epoch": 0.1322328006586308, "grad_norm": 0.7838383913040161, "learning_rate": 6.611500464409356e-06, "loss": 0.0219, "step": 15660 }, { "epoch": 0.1323172405057947, "grad_norm": 0.5542532205581665, "learning_rate": 6.615722367643335e-06, "loss": 0.0198, "step": 15670 }, { "epoch": 0.13240168035295857, "grad_norm": 0.9320794343948364, "learning_rate": 6.6199442708773125e-06, "loss": 0.0213, "step": 15680 }, { "epoch": 0.13248612020012243, "grad_norm": 0.33859819173812866, "learning_rate": 6.6241661741112895e-06, "loss": 0.0364, "step": 15690 }, { "epoch": 0.13257056004728632, "grad_norm": 0.7018003463745117, "learning_rate": 6.628388077345268e-06, "loss": 0.0209, "step": 15700 }, { "epoch": 0.1326549998944502, "grad_norm": 0.7666375041007996, "learning_rate": 6.632609980579245e-06, "loss": 0.021, "step": 15710 }, { "epoch": 0.13273943974161406, "grad_norm": 1.0364779233932495, "learning_rate": 6.636831883813224e-06, "loss": 0.0398, "step": 15720 }, { "epoch": 0.13282387958877795, "grad_norm": 0.8690795302391052, "learning_rate": 6.641053787047202e-06, "loss": 0.021, "step": 15730 }, { "epoch": 0.1329083194359418, "grad_norm": 0.5160264372825623, "learning_rate": 6.645275690281179e-06, "loss": 0.0155, "step": 15740 }, { "epoch": 0.1329927592831057, "grad_norm": 1.0986318588256836, "learning_rate": 6.6494975935151575e-06, "loss": 0.0251, "step": 15750 }, { "epoch": 0.13307719913026958, "grad_norm": 0.7017724514007568, "learning_rate": 6.6537194967491345e-06, "loss": 0.029, "step": 15760 }, { "epoch": 0.13316163897743344, "grad_norm": 0.7088260650634766, "learning_rate": 6.657941399983113e-06, "loss": 0.0274, "step": 15770 }, { "epoch": 0.13324607882459732, "grad_norm": 0.5978497266769409, "learning_rate": 6.662163303217091e-06, "loss": 0.0363, "step": 15780 }, { "epoch": 0.1333305186717612, "grad_norm": 0.610503077507019, "learning_rate": 6.666385206451069e-06, "loss": 0.0316, "step": 15790 }, { "epoch": 0.13341495851892507, "grad_norm": 0.16387026011943817, "learning_rate": 6.670607109685047e-06, "loss": 0.0159, "step": 15800 }, { "epoch": 0.13349939836608896, "grad_norm": 0.9383697509765625, "learning_rate": 6.674829012919024e-06, "loss": 0.0261, "step": 15810 }, { "epoch": 0.13358383821325284, "grad_norm": 0.4542647898197174, "learning_rate": 6.6790509161530025e-06, "loss": 0.0262, "step": 15820 }, { "epoch": 0.1336682780604167, "grad_norm": 0.9123470187187195, "learning_rate": 6.68327281938698e-06, "loss": 0.023, "step": 15830 }, { "epoch": 0.1337527179075806, "grad_norm": 1.2878137826919556, "learning_rate": 6.687494722620958e-06, "loss": 0.0262, "step": 15840 }, { "epoch": 0.13383715775474447, "grad_norm": 0.7430747747421265, "learning_rate": 6.691716625854936e-06, "loss": 0.0261, "step": 15850 }, { "epoch": 0.13392159760190833, "grad_norm": 0.7316389679908752, "learning_rate": 6.695938529088913e-06, "loss": 0.0327, "step": 15860 }, { "epoch": 0.13400603744907222, "grad_norm": 0.9163026809692383, "learning_rate": 6.700160432322892e-06, "loss": 0.0377, "step": 15870 }, { "epoch": 0.1340904772962361, "grad_norm": 0.42657244205474854, "learning_rate": 6.70438233555687e-06, "loss": 0.0206, "step": 15880 }, { "epoch": 0.13417491714339996, "grad_norm": 0.8899572491645813, "learning_rate": 6.7086042387908475e-06, "loss": 0.0333, "step": 15890 }, { "epoch": 0.13425935699056385, "grad_norm": 0.494885116815567, "learning_rate": 6.712826142024825e-06, "loss": 0.0226, "step": 15900 }, { "epoch": 0.13434379683772774, "grad_norm": 1.417901873588562, "learning_rate": 6.717048045258802e-06, "loss": 0.033, "step": 15910 }, { "epoch": 0.1344282366848916, "grad_norm": 1.1154241561889648, "learning_rate": 6.721269948492781e-06, "loss": 0.0262, "step": 15920 }, { "epoch": 0.13451267653205548, "grad_norm": 0.9707631468772888, "learning_rate": 6.725491851726759e-06, "loss": 0.0264, "step": 15930 }, { "epoch": 0.13459711637921937, "grad_norm": 1.4940481185913086, "learning_rate": 6.729713754960737e-06, "loss": 0.0238, "step": 15940 }, { "epoch": 0.13468155622638323, "grad_norm": 2.742016553878784, "learning_rate": 6.733935658194715e-06, "loss": 0.0625, "step": 15950 }, { "epoch": 0.1347659960735471, "grad_norm": 0.7398288249969482, "learning_rate": 6.738157561428693e-06, "loss": 0.0293, "step": 15960 }, { "epoch": 0.13485043592071097, "grad_norm": 0.9113770723342896, "learning_rate": 6.74237946466267e-06, "loss": 0.0364, "step": 15970 }, { "epoch": 0.13493487576787486, "grad_norm": 0.9463467597961426, "learning_rate": 6.746601367896648e-06, "loss": 0.0361, "step": 15980 }, { "epoch": 0.13501931561503874, "grad_norm": 1.1047667264938354, "learning_rate": 6.750823271130627e-06, "loss": 0.0287, "step": 15990 }, { "epoch": 0.1351037554622026, "grad_norm": 1.1979058980941772, "learning_rate": 6.755045174364604e-06, "loss": 0.0413, "step": 16000 }, { "epoch": 0.1351881953093665, "grad_norm": 0.7172194123268127, "learning_rate": 6.759267077598583e-06, "loss": 0.0243, "step": 16010 }, { "epoch": 0.13527263515653037, "grad_norm": 1.2005350589752197, "learning_rate": 6.76348898083256e-06, "loss": 0.0372, "step": 16020 }, { "epoch": 0.13535707500369423, "grad_norm": 1.1579266786575317, "learning_rate": 6.7677108840665375e-06, "loss": 0.0217, "step": 16030 }, { "epoch": 0.13544151485085812, "grad_norm": 0.8484644889831543, "learning_rate": 6.771932787300516e-06, "loss": 0.0222, "step": 16040 }, { "epoch": 0.135525954698022, "grad_norm": 1.0831055641174316, "learning_rate": 6.776154690534493e-06, "loss": 0.0244, "step": 16050 }, { "epoch": 0.13561039454518586, "grad_norm": 0.4132275879383087, "learning_rate": 6.780376593768472e-06, "loss": 0.0349, "step": 16060 }, { "epoch": 0.13569483439234975, "grad_norm": 1.300898551940918, "learning_rate": 6.784598497002449e-06, "loss": 0.0378, "step": 16070 }, { "epoch": 0.13577927423951364, "grad_norm": 1.1157641410827637, "learning_rate": 6.788820400236427e-06, "loss": 0.0494, "step": 16080 }, { "epoch": 0.1358637140866775, "grad_norm": 0.7862768173217773, "learning_rate": 6.7930423034704055e-06, "loss": 0.0167, "step": 16090 }, { "epoch": 0.13594815393384138, "grad_norm": 1.306864619255066, "learning_rate": 6.7972642067043825e-06, "loss": 0.0281, "step": 16100 }, { "epoch": 0.13603259378100527, "grad_norm": 1.6393890380859375, "learning_rate": 6.801486109938361e-06, "loss": 0.0503, "step": 16110 }, { "epoch": 0.13611703362816913, "grad_norm": 0.9184459447860718, "learning_rate": 6.805708013172338e-06, "loss": 0.0168, "step": 16120 }, { "epoch": 0.136201473475333, "grad_norm": 0.5478464961051941, "learning_rate": 6.809929916406317e-06, "loss": 0.0218, "step": 16130 }, { "epoch": 0.1362859133224969, "grad_norm": 0.6469419002532959, "learning_rate": 6.814151819640295e-06, "loss": 0.0403, "step": 16140 }, { "epoch": 0.13637035316966076, "grad_norm": 1.1825060844421387, "learning_rate": 6.818373722874272e-06, "loss": 0.0218, "step": 16150 }, { "epoch": 0.13645479301682464, "grad_norm": 0.16258588433265686, "learning_rate": 6.8225956261082505e-06, "loss": 0.0305, "step": 16160 }, { "epoch": 0.13653923286398853, "grad_norm": 0.8396166563034058, "learning_rate": 6.8268175293422275e-06, "loss": 0.0166, "step": 16170 }, { "epoch": 0.1366236727111524, "grad_norm": 1.0872628688812256, "learning_rate": 6.831039432576206e-06, "loss": 0.0323, "step": 16180 }, { "epoch": 0.13670811255831627, "grad_norm": 1.2692753076553345, "learning_rate": 6.835261335810184e-06, "loss": 0.0264, "step": 16190 }, { "epoch": 0.13679255240548013, "grad_norm": 0.7945291996002197, "learning_rate": 6.839483239044161e-06, "loss": 0.0229, "step": 16200 }, { "epoch": 0.13687699225264402, "grad_norm": 1.176315426826477, "learning_rate": 6.84370514227814e-06, "loss": 0.0477, "step": 16210 }, { "epoch": 0.1369614320998079, "grad_norm": 0.26465627551078796, "learning_rate": 6.847927045512117e-06, "loss": 0.0221, "step": 16220 }, { "epoch": 0.13704587194697176, "grad_norm": 0.6280030608177185, "learning_rate": 6.8521489487460955e-06, "loss": 0.0135, "step": 16230 }, { "epoch": 0.13713031179413565, "grad_norm": 0.7856820225715637, "learning_rate": 6.856370851980073e-06, "loss": 0.0244, "step": 16240 }, { "epoch": 0.13721475164129954, "grad_norm": 1.110695242881775, "learning_rate": 6.86059275521405e-06, "loss": 0.0385, "step": 16250 }, { "epoch": 0.1372991914884634, "grad_norm": 0.9092965722084045, "learning_rate": 6.864814658448029e-06, "loss": 0.0329, "step": 16260 }, { "epoch": 0.13738363133562728, "grad_norm": 0.5450162291526794, "learning_rate": 6.869036561682006e-06, "loss": 0.0289, "step": 16270 }, { "epoch": 0.13746807118279117, "grad_norm": 0.16027839481830597, "learning_rate": 6.873258464915985e-06, "loss": 0.0196, "step": 16280 }, { "epoch": 0.13755251102995503, "grad_norm": 0.7047494649887085, "learning_rate": 6.877480368149963e-06, "loss": 0.0311, "step": 16290 }, { "epoch": 0.1376369508771189, "grad_norm": 0.7148143649101257, "learning_rate": 6.881702271383941e-06, "loss": 0.0218, "step": 16300 }, { "epoch": 0.1377213907242828, "grad_norm": 0.7493844032287598, "learning_rate": 6.885924174617918e-06, "loss": 0.0209, "step": 16310 }, { "epoch": 0.13780583057144666, "grad_norm": 0.9049124717712402, "learning_rate": 6.890146077851895e-06, "loss": 0.0301, "step": 16320 }, { "epoch": 0.13789027041861054, "grad_norm": 1.0683902502059937, "learning_rate": 6.894367981085874e-06, "loss": 0.0214, "step": 16330 }, { "epoch": 0.13797471026577443, "grad_norm": 0.7696762681007385, "learning_rate": 6.898589884319852e-06, "loss": 0.0242, "step": 16340 }, { "epoch": 0.1380591501129383, "grad_norm": 0.555850088596344, "learning_rate": 6.902811787553831e-06, "loss": 0.0172, "step": 16350 }, { "epoch": 0.13814358996010218, "grad_norm": 0.6180983781814575, "learning_rate": 6.907033690787808e-06, "loss": 0.0183, "step": 16360 }, { "epoch": 0.13822802980726606, "grad_norm": 0.7653218507766724, "learning_rate": 6.9112555940217855e-06, "loss": 0.0323, "step": 16370 }, { "epoch": 0.13831246965442992, "grad_norm": 0.7628645300865173, "learning_rate": 6.915477497255763e-06, "loss": 0.0207, "step": 16380 }, { "epoch": 0.1383969095015938, "grad_norm": 0.8007361888885498, "learning_rate": 6.919699400489741e-06, "loss": 0.0215, "step": 16390 }, { "epoch": 0.13848134934875767, "grad_norm": 0.40342453122138977, "learning_rate": 6.92392130372372e-06, "loss": 0.0345, "step": 16400 }, { "epoch": 0.13856578919592155, "grad_norm": 0.26439565420150757, "learning_rate": 6.928143206957697e-06, "loss": 0.0235, "step": 16410 }, { "epoch": 0.13865022904308544, "grad_norm": 0.6485360860824585, "learning_rate": 6.932365110191675e-06, "loss": 0.0284, "step": 16420 }, { "epoch": 0.1387346688902493, "grad_norm": 0.18667587637901306, "learning_rate": 6.936587013425653e-06, "loss": 0.015, "step": 16430 }, { "epoch": 0.13881910873741318, "grad_norm": 0.32011356949806213, "learning_rate": 6.9408089166596306e-06, "loss": 0.0219, "step": 16440 }, { "epoch": 0.13890354858457707, "grad_norm": 0.5616617202758789, "learning_rate": 6.945030819893609e-06, "loss": 0.0266, "step": 16450 }, { "epoch": 0.13898798843174093, "grad_norm": 1.3478642702102661, "learning_rate": 6.949252723127586e-06, "loss": 0.0205, "step": 16460 }, { "epoch": 0.1390724282789048, "grad_norm": 0.767581582069397, "learning_rate": 6.953474626361565e-06, "loss": 0.028, "step": 16470 }, { "epoch": 0.1391568681260687, "grad_norm": 1.1472853422164917, "learning_rate": 6.957696529595542e-06, "loss": 0.0245, "step": 16480 }, { "epoch": 0.13924130797323256, "grad_norm": 1.1301432847976685, "learning_rate": 6.96191843282952e-06, "loss": 0.0371, "step": 16490 }, { "epoch": 0.13932574782039645, "grad_norm": 0.5188471078872681, "learning_rate": 6.9661403360634986e-06, "loss": 0.0186, "step": 16500 }, { "epoch": 0.13941018766756033, "grad_norm": 1.652117371559143, "learning_rate": 6.9703622392974756e-06, "loss": 0.0178, "step": 16510 }, { "epoch": 0.1394946275147242, "grad_norm": 0.5299106240272522, "learning_rate": 6.974584142531454e-06, "loss": 0.0313, "step": 16520 }, { "epoch": 0.13957906736188808, "grad_norm": 0.5246085524559021, "learning_rate": 6.978806045765431e-06, "loss": 0.0297, "step": 16530 }, { "epoch": 0.13966350720905196, "grad_norm": 1.0791010856628418, "learning_rate": 6.983027948999409e-06, "loss": 0.024, "step": 16540 }, { "epoch": 0.13974794705621582, "grad_norm": 0.7878665328025818, "learning_rate": 6.987249852233388e-06, "loss": 0.0203, "step": 16550 }, { "epoch": 0.1398323869033797, "grad_norm": 0.49127596616744995, "learning_rate": 6.991471755467365e-06, "loss": 0.0258, "step": 16560 }, { "epoch": 0.1399168267505436, "grad_norm": 0.9012401103973389, "learning_rate": 6.9956936587013436e-06, "loss": 0.0294, "step": 16570 }, { "epoch": 0.14000126659770745, "grad_norm": 0.9564355611801147, "learning_rate": 6.9999155619353206e-06, "loss": 0.0271, "step": 16580 }, { "epoch": 0.14008570644487134, "grad_norm": 1.3682494163513184, "learning_rate": 7.0041374651692984e-06, "loss": 0.0265, "step": 16590 }, { "epoch": 0.14017014629203522, "grad_norm": 0.8562265634536743, "learning_rate": 7.008359368403277e-06, "loss": 0.0207, "step": 16600 }, { "epoch": 0.14025458613919908, "grad_norm": 1.1039142608642578, "learning_rate": 7.012581271637254e-06, "loss": 0.0223, "step": 16610 }, { "epoch": 0.14033902598636297, "grad_norm": 0.6161004900932312, "learning_rate": 7.016803174871233e-06, "loss": 0.0209, "step": 16620 }, { "epoch": 0.14042346583352683, "grad_norm": 1.3864187002182007, "learning_rate": 7.02102507810521e-06, "loss": 0.0255, "step": 16630 }, { "epoch": 0.14050790568069071, "grad_norm": 1.0706815719604492, "learning_rate": 7.0252469813391886e-06, "loss": 0.0252, "step": 16640 }, { "epoch": 0.1405923455278546, "grad_norm": 1.2011888027191162, "learning_rate": 7.0294688845731664e-06, "loss": 0.0272, "step": 16650 }, { "epoch": 0.14067678537501846, "grad_norm": 0.3629244267940521, "learning_rate": 7.0336907878071434e-06, "loss": 0.0204, "step": 16660 }, { "epoch": 0.14076122522218235, "grad_norm": 1.155235767364502, "learning_rate": 7.037912691041122e-06, "loss": 0.0225, "step": 16670 }, { "epoch": 0.14084566506934623, "grad_norm": 0.621454656124115, "learning_rate": 7.0421345942751e-06, "loss": 0.0233, "step": 16680 }, { "epoch": 0.1409301049165101, "grad_norm": 0.4253043532371521, "learning_rate": 7.046356497509078e-06, "loss": 0.0262, "step": 16690 }, { "epoch": 0.14101454476367398, "grad_norm": 0.3048534095287323, "learning_rate": 7.050578400743056e-06, "loss": 0.0151, "step": 16700 }, { "epoch": 0.14109898461083786, "grad_norm": 0.3650418817996979, "learning_rate": 7.054800303977033e-06, "loss": 0.0251, "step": 16710 }, { "epoch": 0.14118342445800172, "grad_norm": 1.3182097673416138, "learning_rate": 7.0590222072110114e-06, "loss": 0.034, "step": 16720 }, { "epoch": 0.1412678643051656, "grad_norm": 1.9556939601898193, "learning_rate": 7.063244110444989e-06, "loss": 0.0188, "step": 16730 }, { "epoch": 0.1413523041523295, "grad_norm": 1.0927531719207764, "learning_rate": 7.067466013678967e-06, "loss": 0.0315, "step": 16740 }, { "epoch": 0.14143674399949335, "grad_norm": 0.8898778557777405, "learning_rate": 7.071687916912945e-06, "loss": 0.0225, "step": 16750 }, { "epoch": 0.14152118384665724, "grad_norm": 1.4461491107940674, "learning_rate": 7.075909820146922e-06, "loss": 0.0327, "step": 16760 }, { "epoch": 0.14160562369382113, "grad_norm": 0.4611034691333771, "learning_rate": 7.080131723380901e-06, "loss": 0.0257, "step": 16770 }, { "epoch": 0.14169006354098498, "grad_norm": 0.7587218880653381, "learning_rate": 7.084353626614879e-06, "loss": 0.0271, "step": 16780 }, { "epoch": 0.14177450338814887, "grad_norm": 0.9091900587081909, "learning_rate": 7.0885755298488564e-06, "loss": 0.0243, "step": 16790 }, { "epoch": 0.14185894323531276, "grad_norm": 1.3094395399093628, "learning_rate": 7.092797433082834e-06, "loss": 0.0299, "step": 16800 }, { "epoch": 0.14194338308247662, "grad_norm": 0.5225780010223389, "learning_rate": 7.097019336316813e-06, "loss": 0.0174, "step": 16810 }, { "epoch": 0.1420278229296405, "grad_norm": 0.9296630024909973, "learning_rate": 7.10124123955079e-06, "loss": 0.0246, "step": 16820 }, { "epoch": 0.1421122627768044, "grad_norm": 0.5103289484977722, "learning_rate": 7.105463142784768e-06, "loss": 0.0305, "step": 16830 }, { "epoch": 0.14219670262396825, "grad_norm": 0.500582218170166, "learning_rate": 7.109685046018746e-06, "loss": 0.0233, "step": 16840 }, { "epoch": 0.14228114247113213, "grad_norm": 0.7843692898750305, "learning_rate": 7.113906949252724e-06, "loss": 0.031, "step": 16850 }, { "epoch": 0.142365582318296, "grad_norm": 0.6765689849853516, "learning_rate": 7.118128852486702e-06, "loss": 0.021, "step": 16860 }, { "epoch": 0.14245002216545988, "grad_norm": 0.5467396378517151, "learning_rate": 7.122350755720679e-06, "loss": 0.0277, "step": 16870 }, { "epoch": 0.14253446201262376, "grad_norm": 1.1342296600341797, "learning_rate": 7.126572658954657e-06, "loss": 0.0261, "step": 16880 }, { "epoch": 0.14261890185978762, "grad_norm": 1.085603952407837, "learning_rate": 7.130794562188635e-06, "loss": 0.0297, "step": 16890 }, { "epoch": 0.1427033417069515, "grad_norm": 0.1375785768032074, "learning_rate": 7.135016465422613e-06, "loss": 0.0249, "step": 16900 }, { "epoch": 0.1427877815541154, "grad_norm": 0.5405246019363403, "learning_rate": 7.139238368656592e-06, "loss": 0.0239, "step": 16910 }, { "epoch": 0.14287222140127925, "grad_norm": 1.2729734182357788, "learning_rate": 7.143460271890569e-06, "loss": 0.0291, "step": 16920 }, { "epoch": 0.14295666124844314, "grad_norm": 1.0273692607879639, "learning_rate": 7.1476821751245465e-06, "loss": 0.0374, "step": 16930 }, { "epoch": 0.14304110109560703, "grad_norm": 0.6082038283348083, "learning_rate": 7.151904078358524e-06, "loss": 0.0252, "step": 16940 }, { "epoch": 0.14312554094277088, "grad_norm": 0.9218189716339111, "learning_rate": 7.156125981592502e-06, "loss": 0.0313, "step": 16950 }, { "epoch": 0.14320998078993477, "grad_norm": 0.566478431224823, "learning_rate": 7.160347884826481e-06, "loss": 0.0141, "step": 16960 }, { "epoch": 0.14329442063709866, "grad_norm": 0.48440155386924744, "learning_rate": 7.164569788060458e-06, "loss": 0.0207, "step": 16970 }, { "epoch": 0.14337886048426252, "grad_norm": 0.6211758852005005, "learning_rate": 7.168791691294437e-06, "loss": 0.0282, "step": 16980 }, { "epoch": 0.1434633003314264, "grad_norm": 0.7482503056526184, "learning_rate": 7.1730135945284145e-06, "loss": 0.0174, "step": 16990 }, { "epoch": 0.1435477401785903, "grad_norm": 0.5871439576148987, "learning_rate": 7.1772354977623915e-06, "loss": 0.0225, "step": 17000 }, { "epoch": 0.14363218002575415, "grad_norm": 0.5148375630378723, "learning_rate": 7.18145740099637e-06, "loss": 0.0194, "step": 17010 }, { "epoch": 0.14371661987291803, "grad_norm": 1.420873999595642, "learning_rate": 7.185679304230347e-06, "loss": 0.0187, "step": 17020 }, { "epoch": 0.14380105972008192, "grad_norm": 0.4894660413265228, "learning_rate": 7.189901207464326e-06, "loss": 0.0398, "step": 17030 }, { "epoch": 0.14388549956724578, "grad_norm": 0.5916805267333984, "learning_rate": 7.194123110698304e-06, "loss": 0.028, "step": 17040 }, { "epoch": 0.14396993941440966, "grad_norm": 0.8152384161949158, "learning_rate": 7.198345013932281e-06, "loss": 0.0251, "step": 17050 }, { "epoch": 0.14405437926157352, "grad_norm": 0.42847442626953125, "learning_rate": 7.2025669171662595e-06, "loss": 0.0221, "step": 17060 }, { "epoch": 0.1441388191087374, "grad_norm": 1.375949740409851, "learning_rate": 7.2067888204002365e-06, "loss": 0.0324, "step": 17070 }, { "epoch": 0.1442232589559013, "grad_norm": 0.6246162056922913, "learning_rate": 7.211010723634215e-06, "loss": 0.0168, "step": 17080 }, { "epoch": 0.14430769880306515, "grad_norm": 0.4282462000846863, "learning_rate": 7.215232626868193e-06, "loss": 0.0141, "step": 17090 }, { "epoch": 0.14439213865022904, "grad_norm": 0.9864509701728821, "learning_rate": 7.21945453010217e-06, "loss": 0.0305, "step": 17100 }, { "epoch": 0.14447657849739293, "grad_norm": 1.2863620519638062, "learning_rate": 7.223676433336149e-06, "loss": 0.0362, "step": 17110 }, { "epoch": 0.14456101834455679, "grad_norm": 1.1640506982803345, "learning_rate": 7.227898336570126e-06, "loss": 0.0193, "step": 17120 }, { "epoch": 0.14464545819172067, "grad_norm": 0.6666225790977478, "learning_rate": 7.2321202398041045e-06, "loss": 0.0143, "step": 17130 }, { "epoch": 0.14472989803888456, "grad_norm": 1.7881792783737183, "learning_rate": 7.236342143038082e-06, "loss": 0.0296, "step": 17140 }, { "epoch": 0.14481433788604842, "grad_norm": 1.1400673389434814, "learning_rate": 7.24056404627206e-06, "loss": 0.0217, "step": 17150 }, { "epoch": 0.1448987777332123, "grad_norm": 0.5915045142173767, "learning_rate": 7.244785949506038e-06, "loss": 0.0285, "step": 17160 }, { "epoch": 0.1449832175803762, "grad_norm": 1.3259762525558472, "learning_rate": 7.249007852740015e-06, "loss": 0.0446, "step": 17170 }, { "epoch": 0.14506765742754005, "grad_norm": 0.7801637649536133, "learning_rate": 7.253229755973994e-06, "loss": 0.0227, "step": 17180 }, { "epoch": 0.14515209727470393, "grad_norm": 1.0714577436447144, "learning_rate": 7.257451659207972e-06, "loss": 0.0204, "step": 17190 }, { "epoch": 0.14523653712186782, "grad_norm": 0.9477888941764832, "learning_rate": 7.2616735624419495e-06, "loss": 0.0214, "step": 17200 }, { "epoch": 0.14532097696903168, "grad_norm": 0.736675500869751, "learning_rate": 7.265895465675927e-06, "loss": 0.0299, "step": 17210 }, { "epoch": 0.14540541681619557, "grad_norm": 1.1067068576812744, "learning_rate": 7.270117368909904e-06, "loss": 0.0324, "step": 17220 }, { "epoch": 0.14548985666335945, "grad_norm": 0.903555691242218, "learning_rate": 7.274339272143883e-06, "loss": 0.0231, "step": 17230 }, { "epoch": 0.1455742965105233, "grad_norm": 1.1105679273605347, "learning_rate": 7.278561175377861e-06, "loss": 0.0357, "step": 17240 }, { "epoch": 0.1456587363576872, "grad_norm": 1.0410410165786743, "learning_rate": 7.282783078611839e-06, "loss": 0.0339, "step": 17250 }, { "epoch": 0.14574317620485108, "grad_norm": 0.673792839050293, "learning_rate": 7.287004981845817e-06, "loss": 0.0261, "step": 17260 }, { "epoch": 0.14582761605201494, "grad_norm": 0.7014194130897522, "learning_rate": 7.291226885079794e-06, "loss": 0.0243, "step": 17270 }, { "epoch": 0.14591205589917883, "grad_norm": 0.6873523592948914, "learning_rate": 7.295448788313772e-06, "loss": 0.0241, "step": 17280 }, { "epoch": 0.14599649574634269, "grad_norm": 0.6755647659301758, "learning_rate": 7.29967069154775e-06, "loss": 0.0195, "step": 17290 }, { "epoch": 0.14608093559350657, "grad_norm": 0.568419337272644, "learning_rate": 7.303892594781729e-06, "loss": 0.0229, "step": 17300 }, { "epoch": 0.14616537544067046, "grad_norm": 0.7231701612472534, "learning_rate": 7.308114498015706e-06, "loss": 0.0259, "step": 17310 }, { "epoch": 0.14624981528783432, "grad_norm": 0.6038322448730469, "learning_rate": 7.312336401249685e-06, "loss": 0.0167, "step": 17320 }, { "epoch": 0.1463342551349982, "grad_norm": 0.7714376449584961, "learning_rate": 7.316558304483662e-06, "loss": 0.019, "step": 17330 }, { "epoch": 0.1464186949821621, "grad_norm": 0.12846112251281738, "learning_rate": 7.3207802077176395e-06, "loss": 0.0186, "step": 17340 }, { "epoch": 0.14650313482932595, "grad_norm": 1.13346266746521, "learning_rate": 7.325002110951618e-06, "loss": 0.0224, "step": 17350 }, { "epoch": 0.14658757467648983, "grad_norm": 0.558281660079956, "learning_rate": 7.329224014185595e-06, "loss": 0.0227, "step": 17360 }, { "epoch": 0.14667201452365372, "grad_norm": 0.10718414187431335, "learning_rate": 7.333445917419574e-06, "loss": 0.0208, "step": 17370 }, { "epoch": 0.14675645437081758, "grad_norm": 0.6334197521209717, "learning_rate": 7.337667820653551e-06, "loss": 0.0349, "step": 17380 }, { "epoch": 0.14684089421798147, "grad_norm": 1.3383078575134277, "learning_rate": 7.341889723887529e-06, "loss": 0.0258, "step": 17390 }, { "epoch": 0.14692533406514535, "grad_norm": 0.9768407344818115, "learning_rate": 7.3461116271215075e-06, "loss": 0.0146, "step": 17400 }, { "epoch": 0.1470097739123092, "grad_norm": 0.8180995583534241, "learning_rate": 7.3503335303554845e-06, "loss": 0.0293, "step": 17410 }, { "epoch": 0.1470942137594731, "grad_norm": 0.4597821533679962, "learning_rate": 7.354555433589463e-06, "loss": 0.0179, "step": 17420 }, { "epoch": 0.14717865360663698, "grad_norm": 0.893372654914856, "learning_rate": 7.35877733682344e-06, "loss": 0.0288, "step": 17430 }, { "epoch": 0.14726309345380084, "grad_norm": 0.5068398714065552, "learning_rate": 7.362999240057418e-06, "loss": 0.0337, "step": 17440 }, { "epoch": 0.14734753330096473, "grad_norm": 0.8866484761238098, "learning_rate": 7.367221143291397e-06, "loss": 0.0212, "step": 17450 }, { "epoch": 0.14743197314812861, "grad_norm": 1.2878484725952148, "learning_rate": 7.371443046525374e-06, "loss": 0.0291, "step": 17460 }, { "epoch": 0.14751641299529247, "grad_norm": 0.823606550693512, "learning_rate": 7.3756649497593525e-06, "loss": 0.0245, "step": 17470 }, { "epoch": 0.14760085284245636, "grad_norm": 1.1895920038223267, "learning_rate": 7.3798868529933295e-06, "loss": 0.0406, "step": 17480 }, { "epoch": 0.14768529268962025, "grad_norm": 0.9803257584571838, "learning_rate": 7.384108756227308e-06, "loss": 0.0148, "step": 17490 }, { "epoch": 0.1477697325367841, "grad_norm": 1.1106904745101929, "learning_rate": 7.388330659461286e-06, "loss": 0.0277, "step": 17500 }, { "epoch": 0.147854172383948, "grad_norm": 0.10995715856552124, "learning_rate": 7.392552562695263e-06, "loss": 0.0169, "step": 17510 }, { "epoch": 0.14793861223111185, "grad_norm": 1.3582680225372314, "learning_rate": 7.396774465929242e-06, "loss": 0.0239, "step": 17520 }, { "epoch": 0.14802305207827574, "grad_norm": 0.7911913394927979, "learning_rate": 7.400996369163219e-06, "loss": 0.0266, "step": 17530 }, { "epoch": 0.14810749192543962, "grad_norm": 0.9430488348007202, "learning_rate": 7.4052182723971975e-06, "loss": 0.0204, "step": 17540 }, { "epoch": 0.14819193177260348, "grad_norm": 1.6596649885177612, "learning_rate": 7.409440175631175e-06, "loss": 0.032, "step": 17550 }, { "epoch": 0.14827637161976737, "grad_norm": 0.5810958743095398, "learning_rate": 7.413662078865152e-06, "loss": 0.0315, "step": 17560 }, { "epoch": 0.14836081146693125, "grad_norm": 0.6171731948852539, "learning_rate": 7.417883982099131e-06, "loss": 0.0188, "step": 17570 }, { "epoch": 0.1484452513140951, "grad_norm": 0.5586444735527039, "learning_rate": 7.422105885333108e-06, "loss": 0.0217, "step": 17580 }, { "epoch": 0.148529691161259, "grad_norm": 0.8644912242889404, "learning_rate": 7.426327788567087e-06, "loss": 0.024, "step": 17590 }, { "epoch": 0.14861413100842288, "grad_norm": 0.8249645829200745, "learning_rate": 7.430549691801065e-06, "loss": 0.0265, "step": 17600 }, { "epoch": 0.14869857085558674, "grad_norm": 0.7975590229034424, "learning_rate": 7.434771595035042e-06, "loss": 0.0167, "step": 17610 }, { "epoch": 0.14878301070275063, "grad_norm": 1.662542700767517, "learning_rate": 7.43899349826902e-06, "loss": 0.028, "step": 17620 }, { "epoch": 0.14886745054991452, "grad_norm": 0.6725811958312988, "learning_rate": 7.443215401502997e-06, "loss": 0.0199, "step": 17630 }, { "epoch": 0.14895189039707837, "grad_norm": 1.0559889078140259, "learning_rate": 7.447437304736976e-06, "loss": 0.0312, "step": 17640 }, { "epoch": 0.14903633024424226, "grad_norm": 0.2986188232898712, "learning_rate": 7.451659207970954e-06, "loss": 0.0235, "step": 17650 }, { "epoch": 0.14912077009140615, "grad_norm": 0.6499559879302979, "learning_rate": 7.455881111204933e-06, "loss": 0.0297, "step": 17660 }, { "epoch": 0.14920520993857, "grad_norm": 0.9796914458274841, "learning_rate": 7.46010301443891e-06, "loss": 0.0251, "step": 17670 }, { "epoch": 0.1492896497857339, "grad_norm": 0.9658421874046326, "learning_rate": 7.4643249176728875e-06, "loss": 0.0348, "step": 17680 }, { "epoch": 0.14937408963289778, "grad_norm": 0.6565117835998535, "learning_rate": 7.468546820906865e-06, "loss": 0.0197, "step": 17690 }, { "epoch": 0.14945852948006164, "grad_norm": 1.0379927158355713, "learning_rate": 7.472768724140843e-06, "loss": 0.0307, "step": 17700 }, { "epoch": 0.14954296932722552, "grad_norm": 0.4907655417919159, "learning_rate": 7.476990627374822e-06, "loss": 0.0269, "step": 17710 }, { "epoch": 0.14962740917438938, "grad_norm": 0.8533995747566223, "learning_rate": 7.481212530608799e-06, "loss": 0.0153, "step": 17720 }, { "epoch": 0.14971184902155327, "grad_norm": 1.2323578596115112, "learning_rate": 7.485434433842777e-06, "loss": 0.0195, "step": 17730 }, { "epoch": 0.14979628886871715, "grad_norm": 2.0895707607269287, "learning_rate": 7.489656337076755e-06, "loss": 0.0183, "step": 17740 }, { "epoch": 0.149880728715881, "grad_norm": 0.5203273296356201, "learning_rate": 7.4938782403107325e-06, "loss": 0.0216, "step": 17750 }, { "epoch": 0.1499651685630449, "grad_norm": 0.4012795090675354, "learning_rate": 7.498100143544711e-06, "loss": 0.0173, "step": 17760 }, { "epoch": 0.15004960841020878, "grad_norm": 0.789699912071228, "learning_rate": 7.502322046778688e-06, "loss": 0.0215, "step": 17770 }, { "epoch": 0.15013404825737264, "grad_norm": 1.0971298217773438, "learning_rate": 7.506543950012666e-06, "loss": 0.0406, "step": 17780 }, { "epoch": 0.15021848810453653, "grad_norm": 0.339201420545578, "learning_rate": 7.510765853246644e-06, "loss": 0.0133, "step": 17790 }, { "epoch": 0.15030292795170042, "grad_norm": 1.063773274421692, "learning_rate": 7.514987756480622e-06, "loss": 0.0232, "step": 17800 }, { "epoch": 0.15038736779886427, "grad_norm": 0.7994635701179504, "learning_rate": 7.5192096597146005e-06, "loss": 0.0242, "step": 17810 }, { "epoch": 0.15047180764602816, "grad_norm": 2.0278961658477783, "learning_rate": 7.5234315629485776e-06, "loss": 0.022, "step": 17820 }, { "epoch": 0.15055624749319205, "grad_norm": 0.7159832715988159, "learning_rate": 7.527653466182556e-06, "loss": 0.0401, "step": 17830 }, { "epoch": 0.1506406873403559, "grad_norm": 0.5196929574012756, "learning_rate": 7.531875369416533e-06, "loss": 0.0172, "step": 17840 }, { "epoch": 0.1507251271875198, "grad_norm": 1.1367143392562866, "learning_rate": 7.536097272650511e-06, "loss": 0.0369, "step": 17850 }, { "epoch": 0.15080956703468368, "grad_norm": 1.1037548780441284, "learning_rate": 7.54031917588449e-06, "loss": 0.0283, "step": 17860 }, { "epoch": 0.15089400688184754, "grad_norm": 0.8916688561439514, "learning_rate": 7.544541079118467e-06, "loss": 0.0216, "step": 17870 }, { "epoch": 0.15097844672901142, "grad_norm": 0.6192010045051575, "learning_rate": 7.5487629823524456e-06, "loss": 0.0255, "step": 17880 }, { "epoch": 0.1510628865761753, "grad_norm": 0.6381406784057617, "learning_rate": 7.5529848855864226e-06, "loss": 0.0179, "step": 17890 }, { "epoch": 0.15114732642333917, "grad_norm": 0.12822486460208893, "learning_rate": 7.5572067888204004e-06, "loss": 0.0213, "step": 17900 }, { "epoch": 0.15123176627050305, "grad_norm": 1.0982495546340942, "learning_rate": 7.561428692054379e-06, "loss": 0.0215, "step": 17910 }, { "epoch": 0.15131620611766694, "grad_norm": 1.4021823406219482, "learning_rate": 7.565650595288356e-06, "loss": 0.0256, "step": 17920 }, { "epoch": 0.1514006459648308, "grad_norm": 0.9561692476272583, "learning_rate": 7.569872498522335e-06, "loss": 0.0155, "step": 17930 }, { "epoch": 0.15148508581199469, "grad_norm": 1.586890459060669, "learning_rate": 7.574094401756312e-06, "loss": 0.0193, "step": 17940 }, { "epoch": 0.15156952565915854, "grad_norm": 0.9648007750511169, "learning_rate": 7.57831630499029e-06, "loss": 0.0233, "step": 17950 }, { "epoch": 0.15165396550632243, "grad_norm": 0.9344497919082642, "learning_rate": 7.582538208224268e-06, "loss": 0.0205, "step": 17960 }, { "epoch": 0.15173840535348632, "grad_norm": 0.5521931052207947, "learning_rate": 7.5867601114582454e-06, "loss": 0.0238, "step": 17970 }, { "epoch": 0.15182284520065017, "grad_norm": 0.6787450313568115, "learning_rate": 7.590982014692224e-06, "loss": 0.0275, "step": 17980 }, { "epoch": 0.15190728504781406, "grad_norm": 0.9550923705101013, "learning_rate": 7.595203917926202e-06, "loss": 0.0259, "step": 17990 }, { "epoch": 0.15199172489497795, "grad_norm": 1.4860354661941528, "learning_rate": 7.59942582116018e-06, "loss": 0.0287, "step": 18000 }, { "epoch": 0.1520761647421418, "grad_norm": 1.4900497198104858, "learning_rate": 7.603647724394158e-06, "loss": 0.0368, "step": 18010 }, { "epoch": 0.1521606045893057, "grad_norm": 0.8343244791030884, "learning_rate": 7.607869627628135e-06, "loss": 0.031, "step": 18020 }, { "epoch": 0.15224504443646958, "grad_norm": 0.43790456652641296, "learning_rate": 7.6120915308621134e-06, "loss": 0.027, "step": 18030 }, { "epoch": 0.15232948428363344, "grad_norm": 1.3273173570632935, "learning_rate": 7.616313434096091e-06, "loss": 0.0262, "step": 18040 }, { "epoch": 0.15241392413079732, "grad_norm": 0.3533484637737274, "learning_rate": 7.620535337330069e-06, "loss": 0.017, "step": 18050 }, { "epoch": 0.1524983639779612, "grad_norm": 0.525574803352356, "learning_rate": 7.624757240564047e-06, "loss": 0.0378, "step": 18060 }, { "epoch": 0.15258280382512507, "grad_norm": 1.3714412450790405, "learning_rate": 7.628979143798024e-06, "loss": 0.021, "step": 18070 }, { "epoch": 0.15266724367228895, "grad_norm": 0.7319872379302979, "learning_rate": 7.633201047032004e-06, "loss": 0.0233, "step": 18080 }, { "epoch": 0.15275168351945284, "grad_norm": 1.191005825996399, "learning_rate": 7.63742295026598e-06, "loss": 0.0366, "step": 18090 }, { "epoch": 0.1528361233666167, "grad_norm": 1.0213900804519653, "learning_rate": 7.64164485349996e-06, "loss": 0.0247, "step": 18100 }, { "epoch": 0.15292056321378059, "grad_norm": 1.2527483701705933, "learning_rate": 7.645866756733935e-06, "loss": 0.0325, "step": 18110 }, { "epoch": 0.15300500306094447, "grad_norm": 0.5067441463470459, "learning_rate": 7.650088659967913e-06, "loss": 0.0208, "step": 18120 }, { "epoch": 0.15308944290810833, "grad_norm": 0.7192997336387634, "learning_rate": 7.654310563201893e-06, "loss": 0.0148, "step": 18130 }, { "epoch": 0.15317388275527222, "grad_norm": 0.7585585117340088, "learning_rate": 7.658532466435869e-06, "loss": 0.0288, "step": 18140 }, { "epoch": 0.1532583226024361, "grad_norm": 0.4669145941734314, "learning_rate": 7.662754369669849e-06, "loss": 0.0273, "step": 18150 }, { "epoch": 0.15334276244959996, "grad_norm": 0.4873228073120117, "learning_rate": 7.666976272903825e-06, "loss": 0.0193, "step": 18160 }, { "epoch": 0.15342720229676385, "grad_norm": 0.9884337186813354, "learning_rate": 7.671198176137804e-06, "loss": 0.0197, "step": 18170 }, { "epoch": 0.1535116421439277, "grad_norm": 0.24142898619174957, "learning_rate": 7.675420079371782e-06, "loss": 0.0201, "step": 18180 }, { "epoch": 0.1535960819910916, "grad_norm": 1.100033164024353, "learning_rate": 7.679641982605758e-06, "loss": 0.0277, "step": 18190 }, { "epoch": 0.15368052183825548, "grad_norm": 0.678693950176239, "learning_rate": 7.683863885839738e-06, "loss": 0.0173, "step": 18200 }, { "epoch": 0.15376496168541934, "grad_norm": 1.0167897939682007, "learning_rate": 7.688085789073714e-06, "loss": 0.0282, "step": 18210 }, { "epoch": 0.15384940153258322, "grad_norm": 1.67361581325531, "learning_rate": 7.692307692307694e-06, "loss": 0.0338, "step": 18220 }, { "epoch": 0.1539338413797471, "grad_norm": 0.31713396310806274, "learning_rate": 7.696529595541671e-06, "loss": 0.0205, "step": 18230 }, { "epoch": 0.15401828122691097, "grad_norm": 0.8490947484970093, "learning_rate": 7.700751498775648e-06, "loss": 0.0392, "step": 18240 }, { "epoch": 0.15410272107407486, "grad_norm": 0.9748955368995667, "learning_rate": 7.704973402009627e-06, "loss": 0.0201, "step": 18250 }, { "epoch": 0.15418716092123874, "grad_norm": 0.5102134346961975, "learning_rate": 7.709195305243603e-06, "loss": 0.0238, "step": 18260 }, { "epoch": 0.1542716007684026, "grad_norm": 0.8037154078483582, "learning_rate": 7.713417208477583e-06, "loss": 0.015, "step": 18270 }, { "epoch": 0.1543560406155665, "grad_norm": 0.9099264740943909, "learning_rate": 7.71763911171156e-06, "loss": 0.0265, "step": 18280 }, { "epoch": 0.15444048046273037, "grad_norm": 0.5010583996772766, "learning_rate": 7.721861014945537e-06, "loss": 0.0196, "step": 18290 }, { "epoch": 0.15452492030989423, "grad_norm": 0.6165549755096436, "learning_rate": 7.726082918179516e-06, "loss": 0.0248, "step": 18300 }, { "epoch": 0.15460936015705812, "grad_norm": 1.5286715030670166, "learning_rate": 7.730304821413494e-06, "loss": 0.0348, "step": 18310 }, { "epoch": 0.154693800004222, "grad_norm": 0.693348228931427, "learning_rate": 7.734526724647472e-06, "loss": 0.0201, "step": 18320 }, { "epoch": 0.15477823985138586, "grad_norm": 0.5327478051185608, "learning_rate": 7.73874862788145e-06, "loss": 0.0171, "step": 18330 }, { "epoch": 0.15486267969854975, "grad_norm": 2.4216132164001465, "learning_rate": 7.742970531115428e-06, "loss": 0.0205, "step": 18340 }, { "epoch": 0.15494711954571364, "grad_norm": 0.4470646381378174, "learning_rate": 7.747192434349406e-06, "loss": 0.0367, "step": 18350 }, { "epoch": 0.1550315593928775, "grad_norm": 0.8960773348808289, "learning_rate": 7.751414337583384e-06, "loss": 0.0163, "step": 18360 }, { "epoch": 0.15511599924004138, "grad_norm": 0.9917023181915283, "learning_rate": 7.755636240817361e-06, "loss": 0.0233, "step": 18370 }, { "epoch": 0.15520043908720524, "grad_norm": 1.7014126777648926, "learning_rate": 7.75985814405134e-06, "loss": 0.0231, "step": 18380 }, { "epoch": 0.15528487893436912, "grad_norm": 0.6884021162986755, "learning_rate": 7.764080047285317e-06, "loss": 0.0272, "step": 18390 }, { "epoch": 0.155369318781533, "grad_norm": 1.5862574577331543, "learning_rate": 7.768301950519295e-06, "loss": 0.03, "step": 18400 }, { "epoch": 0.15545375862869687, "grad_norm": 0.4740973114967346, "learning_rate": 7.772523853753273e-06, "loss": 0.0314, "step": 18410 }, { "epoch": 0.15553819847586076, "grad_norm": 0.7941863536834717, "learning_rate": 7.77674575698725e-06, "loss": 0.0256, "step": 18420 }, { "epoch": 0.15562263832302464, "grad_norm": 1.3320714235305786, "learning_rate": 7.780967660221229e-06, "loss": 0.0254, "step": 18430 }, { "epoch": 0.1557070781701885, "grad_norm": 1.312302589416504, "learning_rate": 7.785189563455206e-06, "loss": 0.0313, "step": 18440 }, { "epoch": 0.1557915180173524, "grad_norm": 0.45516452193260193, "learning_rate": 7.789411466689184e-06, "loss": 0.0359, "step": 18450 }, { "epoch": 0.15587595786451627, "grad_norm": 0.743920087814331, "learning_rate": 7.793633369923162e-06, "loss": 0.0382, "step": 18460 }, { "epoch": 0.15596039771168013, "grad_norm": 1.12250554561615, "learning_rate": 7.79785527315714e-06, "loss": 0.0285, "step": 18470 }, { "epoch": 0.15604483755884402, "grad_norm": 0.996316134929657, "learning_rate": 7.802077176391118e-06, "loss": 0.0165, "step": 18480 }, { "epoch": 0.1561292774060079, "grad_norm": 1.1476751565933228, "learning_rate": 7.806299079625096e-06, "loss": 0.0318, "step": 18490 }, { "epoch": 0.15621371725317176, "grad_norm": 0.735664427280426, "learning_rate": 7.810520982859074e-06, "loss": 0.0226, "step": 18500 }, { "epoch": 0.15629815710033565, "grad_norm": 1.0471007823944092, "learning_rate": 7.814742886093051e-06, "loss": 0.0233, "step": 18510 }, { "epoch": 0.15638259694749954, "grad_norm": 0.9847665429115295, "learning_rate": 7.81896478932703e-06, "loss": 0.0118, "step": 18520 }, { "epoch": 0.1564670367946634, "grad_norm": 0.7352040410041809, "learning_rate": 7.823186692561007e-06, "loss": 0.026, "step": 18530 }, { "epoch": 0.15655147664182728, "grad_norm": 0.889775276184082, "learning_rate": 7.827408595794985e-06, "loss": 0.0297, "step": 18540 }, { "epoch": 0.15663591648899117, "grad_norm": 0.3841188848018646, "learning_rate": 7.831630499028963e-06, "loss": 0.0366, "step": 18550 }, { "epoch": 0.15672035633615503, "grad_norm": 0.8514705300331116, "learning_rate": 7.83585240226294e-06, "loss": 0.0289, "step": 18560 }, { "epoch": 0.1568047961833189, "grad_norm": 0.7975107431411743, "learning_rate": 7.840074305496919e-06, "loss": 0.0146, "step": 18570 }, { "epoch": 0.1568892360304828, "grad_norm": 0.8247429728507996, "learning_rate": 7.844296208730896e-06, "loss": 0.0217, "step": 18580 }, { "epoch": 0.15697367587764666, "grad_norm": 1.2800408601760864, "learning_rate": 7.848518111964874e-06, "loss": 0.0227, "step": 18590 }, { "epoch": 0.15705811572481054, "grad_norm": 0.5229666233062744, "learning_rate": 7.852740015198852e-06, "loss": 0.0212, "step": 18600 }, { "epoch": 0.1571425555719744, "grad_norm": 0.9822630882263184, "learning_rate": 7.85696191843283e-06, "loss": 0.0225, "step": 18610 }, { "epoch": 0.1572269954191383, "grad_norm": 0.7213184833526611, "learning_rate": 7.861183821666808e-06, "loss": 0.0234, "step": 18620 }, { "epoch": 0.15731143526630217, "grad_norm": 0.9168914556503296, "learning_rate": 7.865405724900786e-06, "loss": 0.0222, "step": 18630 }, { "epoch": 0.15739587511346603, "grad_norm": 1.2739872932434082, "learning_rate": 7.869627628134764e-06, "loss": 0.0279, "step": 18640 }, { "epoch": 0.15748031496062992, "grad_norm": 0.2975366711616516, "learning_rate": 7.873849531368741e-06, "loss": 0.0171, "step": 18650 }, { "epoch": 0.1575647548077938, "grad_norm": 0.741293728351593, "learning_rate": 7.87807143460272e-06, "loss": 0.0231, "step": 18660 }, { "epoch": 0.15764919465495766, "grad_norm": 0.4670557379722595, "learning_rate": 7.882293337836697e-06, "loss": 0.0243, "step": 18670 }, { "epoch": 0.15773363450212155, "grad_norm": 0.7920486927032471, "learning_rate": 7.886515241070675e-06, "loss": 0.028, "step": 18680 }, { "epoch": 0.15781807434928544, "grad_norm": 0.6799213886260986, "learning_rate": 7.890737144304653e-06, "loss": 0.0262, "step": 18690 }, { "epoch": 0.1579025141964493, "grad_norm": 1.1782416105270386, "learning_rate": 7.89495904753863e-06, "loss": 0.0227, "step": 18700 }, { "epoch": 0.15798695404361318, "grad_norm": 0.9231999516487122, "learning_rate": 7.899180950772609e-06, "loss": 0.0162, "step": 18710 }, { "epoch": 0.15807139389077707, "grad_norm": 0.8963977098464966, "learning_rate": 7.903402854006587e-06, "loss": 0.02, "step": 18720 }, { "epoch": 0.15815583373794093, "grad_norm": 0.47817277908325195, "learning_rate": 7.907624757240564e-06, "loss": 0.0227, "step": 18730 }, { "epoch": 0.1582402735851048, "grad_norm": 0.6572131514549255, "learning_rate": 7.911846660474542e-06, "loss": 0.0177, "step": 18740 }, { "epoch": 0.1583247134322687, "grad_norm": 0.46339648962020874, "learning_rate": 7.91606856370852e-06, "loss": 0.0293, "step": 18750 }, { "epoch": 0.15840915327943256, "grad_norm": 0.47824209928512573, "learning_rate": 7.920290466942498e-06, "loss": 0.0275, "step": 18760 }, { "epoch": 0.15849359312659644, "grad_norm": 0.34905990958213806, "learning_rate": 7.924512370176476e-06, "loss": 0.0236, "step": 18770 }, { "epoch": 0.15857803297376033, "grad_norm": 0.7845252156257629, "learning_rate": 7.928734273410454e-06, "loss": 0.0127, "step": 18780 }, { "epoch": 0.1586624728209242, "grad_norm": 0.6565216183662415, "learning_rate": 7.932956176644432e-06, "loss": 0.0403, "step": 18790 }, { "epoch": 0.15874691266808807, "grad_norm": 0.6413242816925049, "learning_rate": 7.937178079878411e-06, "loss": 0.0254, "step": 18800 }, { "epoch": 0.15883135251525196, "grad_norm": 0.5237478613853455, "learning_rate": 7.941399983112387e-06, "loss": 0.0237, "step": 18810 }, { "epoch": 0.15891579236241582, "grad_norm": 1.4816397428512573, "learning_rate": 7.945621886346365e-06, "loss": 0.0356, "step": 18820 }, { "epoch": 0.1590002322095797, "grad_norm": 1.0183550119400024, "learning_rate": 7.949843789580343e-06, "loss": 0.0408, "step": 18830 }, { "epoch": 0.15908467205674356, "grad_norm": 1.7455177307128906, "learning_rate": 7.95406569281432e-06, "loss": 0.0212, "step": 18840 }, { "epoch": 0.15916911190390745, "grad_norm": 0.397863507270813, "learning_rate": 7.9582875960483e-06, "loss": 0.0211, "step": 18850 }, { "epoch": 0.15925355175107134, "grad_norm": 0.504611074924469, "learning_rate": 7.962509499282277e-06, "loss": 0.0133, "step": 18860 }, { "epoch": 0.1593379915982352, "grad_norm": 0.31764400005340576, "learning_rate": 7.966731402516254e-06, "loss": 0.0172, "step": 18870 }, { "epoch": 0.15942243144539908, "grad_norm": 0.6867719888687134, "learning_rate": 7.970953305750232e-06, "loss": 0.0231, "step": 18880 }, { "epoch": 0.15950687129256297, "grad_norm": 0.1367802768945694, "learning_rate": 7.97517520898421e-06, "loss": 0.0251, "step": 18890 }, { "epoch": 0.15959131113972683, "grad_norm": 0.4428565800189972, "learning_rate": 7.97939711221819e-06, "loss": 0.0326, "step": 18900 }, { "epoch": 0.1596757509868907, "grad_norm": 0.25672075152397156, "learning_rate": 7.983619015452166e-06, "loss": 0.0245, "step": 18910 }, { "epoch": 0.1597601908340546, "grad_norm": 0.615278422832489, "learning_rate": 7.987840918686144e-06, "loss": 0.0178, "step": 18920 }, { "epoch": 0.15984463068121846, "grad_norm": 0.27693748474121094, "learning_rate": 7.992062821920123e-06, "loss": 0.0183, "step": 18930 }, { "epoch": 0.15992907052838234, "grad_norm": 0.8483620285987854, "learning_rate": 7.9962847251541e-06, "loss": 0.0222, "step": 18940 }, { "epoch": 0.16001351037554623, "grad_norm": 0.6355902552604675, "learning_rate": 8.000506628388079e-06, "loss": 0.0234, "step": 18950 }, { "epoch": 0.1600979502227101, "grad_norm": 1.1622638702392578, "learning_rate": 8.004728531622055e-06, "loss": 0.0155, "step": 18960 }, { "epoch": 0.16018239006987398, "grad_norm": 0.14388242363929749, "learning_rate": 8.008950434856035e-06, "loss": 0.0168, "step": 18970 }, { "epoch": 0.16026682991703786, "grad_norm": 1.3778141736984253, "learning_rate": 8.013172338090013e-06, "loss": 0.0232, "step": 18980 }, { "epoch": 0.16035126976420172, "grad_norm": 0.6891886591911316, "learning_rate": 8.017394241323989e-06, "loss": 0.0321, "step": 18990 }, { "epoch": 0.1604357096113656, "grad_norm": 0.6477354764938354, "learning_rate": 8.021616144557968e-06, "loss": 0.0127, "step": 19000 }, { "epoch": 0.1605201494585295, "grad_norm": 0.6898015737533569, "learning_rate": 8.025838047791944e-06, "loss": 0.0217, "step": 19010 }, { "epoch": 0.16060458930569335, "grad_norm": 1.409182071685791, "learning_rate": 8.030059951025924e-06, "loss": 0.0212, "step": 19020 }, { "epoch": 0.16068902915285724, "grad_norm": 0.5589684844017029, "learning_rate": 8.034281854259902e-06, "loss": 0.0223, "step": 19030 }, { "epoch": 0.1607734690000211, "grad_norm": 0.48344823718070984, "learning_rate": 8.038503757493878e-06, "loss": 0.0245, "step": 19040 }, { "epoch": 0.16085790884718498, "grad_norm": 0.6267610192298889, "learning_rate": 8.042725660727858e-06, "loss": 0.0166, "step": 19050 }, { "epoch": 0.16094234869434887, "grad_norm": 0.14168541133403778, "learning_rate": 8.046947563961834e-06, "loss": 0.0231, "step": 19060 }, { "epoch": 0.16102678854151273, "grad_norm": 1.3044264316558838, "learning_rate": 8.051169467195813e-06, "loss": 0.0201, "step": 19070 }, { "epoch": 0.16111122838867661, "grad_norm": 0.739151120185852, "learning_rate": 8.055391370429791e-06, "loss": 0.027, "step": 19080 }, { "epoch": 0.1611956682358405, "grad_norm": 0.6748553514480591, "learning_rate": 8.059613273663767e-06, "loss": 0.0175, "step": 19090 }, { "epoch": 0.16128010808300436, "grad_norm": 2.032522201538086, "learning_rate": 8.063835176897747e-06, "loss": 0.036, "step": 19100 }, { "epoch": 0.16136454793016825, "grad_norm": 0.7156232595443726, "learning_rate": 8.068057080131723e-06, "loss": 0.0162, "step": 19110 }, { "epoch": 0.16144898777733213, "grad_norm": 0.630846381187439, "learning_rate": 8.072278983365703e-06, "loss": 0.0178, "step": 19120 }, { "epoch": 0.161533427624496, "grad_norm": 1.3785903453826904, "learning_rate": 8.07650088659968e-06, "loss": 0.0282, "step": 19130 }, { "epoch": 0.16161786747165988, "grad_norm": 0.6879985928535461, "learning_rate": 8.080722789833658e-06, "loss": 0.0189, "step": 19140 }, { "epoch": 0.16170230731882376, "grad_norm": 0.6965945959091187, "learning_rate": 8.084944693067636e-06, "loss": 0.0326, "step": 19150 }, { "epoch": 0.16178674716598762, "grad_norm": 0.6370919942855835, "learning_rate": 8.089166596301612e-06, "loss": 0.0229, "step": 19160 }, { "epoch": 0.1618711870131515, "grad_norm": 0.752675473690033, "learning_rate": 8.093388499535592e-06, "loss": 0.0279, "step": 19170 }, { "epoch": 0.1619556268603154, "grad_norm": 0.9060198664665222, "learning_rate": 8.09761040276957e-06, "loss": 0.0145, "step": 19180 }, { "epoch": 0.16204006670747925, "grad_norm": 0.892031192779541, "learning_rate": 8.101832306003548e-06, "loss": 0.0175, "step": 19190 }, { "epoch": 0.16212450655464314, "grad_norm": 0.5730953812599182, "learning_rate": 8.106054209237525e-06, "loss": 0.0257, "step": 19200 }, { "epoch": 0.16220894640180702, "grad_norm": 0.8330877423286438, "learning_rate": 8.110276112471502e-06, "loss": 0.0276, "step": 19210 }, { "epoch": 0.16229338624897088, "grad_norm": 0.8787133097648621, "learning_rate": 8.114498015705481e-06, "loss": 0.0331, "step": 19220 }, { "epoch": 0.16237782609613477, "grad_norm": 0.41748911142349243, "learning_rate": 8.118719918939459e-06, "loss": 0.021, "step": 19230 }, { "epoch": 0.16246226594329866, "grad_norm": 0.577864408493042, "learning_rate": 8.122941822173437e-06, "loss": 0.0179, "step": 19240 }, { "epoch": 0.16254670579046251, "grad_norm": 0.5905469059944153, "learning_rate": 8.127163725407415e-06, "loss": 0.0184, "step": 19250 }, { "epoch": 0.1626311456376264, "grad_norm": 0.11228826642036438, "learning_rate": 8.131385628641391e-06, "loss": 0.0241, "step": 19260 }, { "epoch": 0.16271558548479026, "grad_norm": 0.7430187463760376, "learning_rate": 8.13560753187537e-06, "loss": 0.0242, "step": 19270 }, { "epoch": 0.16280002533195415, "grad_norm": 0.8222396373748779, "learning_rate": 8.139829435109348e-06, "loss": 0.0199, "step": 19280 }, { "epoch": 0.16288446517911803, "grad_norm": 1.2936477661132812, "learning_rate": 8.144051338343326e-06, "loss": 0.0114, "step": 19290 }, { "epoch": 0.1629689050262819, "grad_norm": 1.1596851348876953, "learning_rate": 8.148273241577304e-06, "loss": 0.0183, "step": 19300 }, { "epoch": 0.16305334487344578, "grad_norm": 0.96759033203125, "learning_rate": 8.152495144811282e-06, "loss": 0.0286, "step": 19310 }, { "epoch": 0.16313778472060966, "grad_norm": 0.6403232216835022, "learning_rate": 8.15671704804526e-06, "loss": 0.0122, "step": 19320 }, { "epoch": 0.16322222456777352, "grad_norm": 1.0062187910079956, "learning_rate": 8.160938951279238e-06, "loss": 0.0254, "step": 19330 }, { "epoch": 0.1633066644149374, "grad_norm": 0.6448637843132019, "learning_rate": 8.165160854513215e-06, "loss": 0.0274, "step": 19340 }, { "epoch": 0.1633911042621013, "grad_norm": 0.8233175873756409, "learning_rate": 8.169382757747193e-06, "loss": 0.0283, "step": 19350 }, { "epoch": 0.16347554410926515, "grad_norm": 1.1410486698150635, "learning_rate": 8.173604660981171e-06, "loss": 0.0354, "step": 19360 }, { "epoch": 0.16355998395642904, "grad_norm": 0.7661970257759094, "learning_rate": 8.177826564215149e-06, "loss": 0.0285, "step": 19370 }, { "epoch": 0.16364442380359293, "grad_norm": 0.2689370810985565, "learning_rate": 8.182048467449127e-06, "loss": 0.0165, "step": 19380 }, { "epoch": 0.16372886365075678, "grad_norm": 0.8232908844947815, "learning_rate": 8.186270370683105e-06, "loss": 0.0094, "step": 19390 }, { "epoch": 0.16381330349792067, "grad_norm": 1.268480658531189, "learning_rate": 8.190492273917083e-06, "loss": 0.0271, "step": 19400 }, { "epoch": 0.16389774334508456, "grad_norm": 1.1623715162277222, "learning_rate": 8.19471417715106e-06, "loss": 0.0254, "step": 19410 }, { "epoch": 0.16398218319224842, "grad_norm": 0.5675643086433411, "learning_rate": 8.198936080385038e-06, "loss": 0.0259, "step": 19420 }, { "epoch": 0.1640666230394123, "grad_norm": 0.8722912669181824, "learning_rate": 8.203157983619016e-06, "loss": 0.026, "step": 19430 }, { "epoch": 0.1641510628865762, "grad_norm": 0.8234683871269226, "learning_rate": 8.207379886852994e-06, "loss": 0.0211, "step": 19440 }, { "epoch": 0.16423550273374005, "grad_norm": 1.2127537727355957, "learning_rate": 8.211601790086972e-06, "loss": 0.0305, "step": 19450 }, { "epoch": 0.16431994258090393, "grad_norm": 0.870840847492218, "learning_rate": 8.21582369332095e-06, "loss": 0.0262, "step": 19460 }, { "epoch": 0.16440438242806782, "grad_norm": 0.7105597257614136, "learning_rate": 8.220045596554928e-06, "loss": 0.0202, "step": 19470 }, { "epoch": 0.16448882227523168, "grad_norm": 0.8713875412940979, "learning_rate": 8.224267499788905e-06, "loss": 0.0242, "step": 19480 }, { "epoch": 0.16457326212239556, "grad_norm": 0.8503458499908447, "learning_rate": 8.228489403022883e-06, "loss": 0.0355, "step": 19490 }, { "epoch": 0.16465770196955942, "grad_norm": 0.31307321786880493, "learning_rate": 8.232711306256861e-06, "loss": 0.0278, "step": 19500 }, { "epoch": 0.1647421418167233, "grad_norm": 0.3209267258644104, "learning_rate": 8.236933209490839e-06, "loss": 0.0327, "step": 19510 }, { "epoch": 0.1648265816638872, "grad_norm": 0.523529589176178, "learning_rate": 8.241155112724817e-06, "loss": 0.0166, "step": 19520 }, { "epoch": 0.16491102151105105, "grad_norm": 0.6811987161636353, "learning_rate": 8.245377015958795e-06, "loss": 0.0222, "step": 19530 }, { "epoch": 0.16499546135821494, "grad_norm": 0.5465698838233948, "learning_rate": 8.249598919192773e-06, "loss": 0.0201, "step": 19540 }, { "epoch": 0.16507990120537883, "grad_norm": 0.7691783308982849, "learning_rate": 8.25382082242675e-06, "loss": 0.0207, "step": 19550 }, { "epoch": 0.16516434105254268, "grad_norm": 0.8387575149536133, "learning_rate": 8.258042725660728e-06, "loss": 0.0221, "step": 19560 }, { "epoch": 0.16524878089970657, "grad_norm": 0.6076646447181702, "learning_rate": 8.262264628894706e-06, "loss": 0.0265, "step": 19570 }, { "epoch": 0.16533322074687046, "grad_norm": 0.15085478127002716, "learning_rate": 8.266486532128684e-06, "loss": 0.0308, "step": 19580 }, { "epoch": 0.16541766059403432, "grad_norm": 0.8239607810974121, "learning_rate": 8.270708435362662e-06, "loss": 0.0207, "step": 19590 }, { "epoch": 0.1655021004411982, "grad_norm": 1.2017537355422974, "learning_rate": 8.27493033859664e-06, "loss": 0.0262, "step": 19600 }, { "epoch": 0.1655865402883621, "grad_norm": 1.0507252216339111, "learning_rate": 8.279152241830618e-06, "loss": 0.0204, "step": 19610 }, { "epoch": 0.16567098013552595, "grad_norm": 0.6523212790489197, "learning_rate": 8.283374145064595e-06, "loss": 0.0196, "step": 19620 }, { "epoch": 0.16575541998268983, "grad_norm": 0.013760029338300228, "learning_rate": 8.287596048298573e-06, "loss": 0.0284, "step": 19630 }, { "epoch": 0.16583985982985372, "grad_norm": 0.09394026547670364, "learning_rate": 8.291817951532551e-06, "loss": 0.0206, "step": 19640 }, { "epoch": 0.16592429967701758, "grad_norm": 0.8365838527679443, "learning_rate": 8.29603985476653e-06, "loss": 0.0216, "step": 19650 }, { "epoch": 0.16600873952418146, "grad_norm": 0.7437459826469421, "learning_rate": 8.300261758000507e-06, "loss": 0.0305, "step": 19660 }, { "epoch": 0.16609317937134535, "grad_norm": 0.639014482498169, "learning_rate": 8.304483661234485e-06, "loss": 0.0176, "step": 19670 }, { "epoch": 0.1661776192185092, "grad_norm": 0.3736830949783325, "learning_rate": 8.308705564468463e-06, "loss": 0.0154, "step": 19680 }, { "epoch": 0.1662620590656731, "grad_norm": 1.3203850984573364, "learning_rate": 8.31292746770244e-06, "loss": 0.0159, "step": 19690 }, { "epoch": 0.16634649891283695, "grad_norm": 0.6148375868797302, "learning_rate": 8.31714937093642e-06, "loss": 0.0258, "step": 19700 }, { "epoch": 0.16643093876000084, "grad_norm": 0.4961124062538147, "learning_rate": 8.321371274170396e-06, "loss": 0.0143, "step": 19710 }, { "epoch": 0.16651537860716473, "grad_norm": 0.7715726494789124, "learning_rate": 8.325593177404374e-06, "loss": 0.0318, "step": 19720 }, { "epoch": 0.16659981845432859, "grad_norm": 0.9022248387336731, "learning_rate": 8.329815080638352e-06, "loss": 0.0239, "step": 19730 }, { "epoch": 0.16668425830149247, "grad_norm": 0.9371006488800049, "learning_rate": 8.33403698387233e-06, "loss": 0.037, "step": 19740 }, { "epoch": 0.16676869814865636, "grad_norm": 0.5915957093238831, "learning_rate": 8.33825888710631e-06, "loss": 0.0334, "step": 19750 }, { "epoch": 0.16685313799582022, "grad_norm": 1.2907952070236206, "learning_rate": 8.342480790340285e-06, "loss": 0.0281, "step": 19760 }, { "epoch": 0.1669375778429841, "grad_norm": 0.665012776851654, "learning_rate": 8.346702693574263e-06, "loss": 0.019, "step": 19770 }, { "epoch": 0.167022017690148, "grad_norm": 1.2732185125350952, "learning_rate": 8.350924596808241e-06, "loss": 0.0258, "step": 19780 }, { "epoch": 0.16710645753731185, "grad_norm": 0.9768775105476379, "learning_rate": 8.355146500042219e-06, "loss": 0.0187, "step": 19790 }, { "epoch": 0.16719089738447573, "grad_norm": 1.5952789783477783, "learning_rate": 8.359368403276199e-06, "loss": 0.0426, "step": 19800 }, { "epoch": 0.16727533723163962, "grad_norm": 0.23056384921073914, "learning_rate": 8.363590306510175e-06, "loss": 0.0134, "step": 19810 }, { "epoch": 0.16735977707880348, "grad_norm": 1.7438503503799438, "learning_rate": 8.367812209744154e-06, "loss": 0.02, "step": 19820 }, { "epoch": 0.16744421692596737, "grad_norm": 0.666573703289032, "learning_rate": 8.37203411297813e-06, "loss": 0.0179, "step": 19830 }, { "epoch": 0.16752865677313125, "grad_norm": 0.1926988959312439, "learning_rate": 8.376256016212108e-06, "loss": 0.0157, "step": 19840 }, { "epoch": 0.1676130966202951, "grad_norm": 0.5578415989875793, "learning_rate": 8.380477919446088e-06, "loss": 0.0202, "step": 19850 }, { "epoch": 0.167697536467459, "grad_norm": 2.190394401550293, "learning_rate": 8.384699822680064e-06, "loss": 0.037, "step": 19860 }, { "epoch": 0.16778197631462288, "grad_norm": 1.0391769409179688, "learning_rate": 8.388921725914044e-06, "loss": 0.0195, "step": 19870 }, { "epoch": 0.16786641616178674, "grad_norm": 0.955602765083313, "learning_rate": 8.39314362914802e-06, "loss": 0.0165, "step": 19880 }, { "epoch": 0.16795085600895063, "grad_norm": 1.3650152683258057, "learning_rate": 8.397365532381998e-06, "loss": 0.0179, "step": 19890 }, { "epoch": 0.1680352958561145, "grad_norm": 0.8405512571334839, "learning_rate": 8.401587435615977e-06, "loss": 0.0195, "step": 19900 }, { "epoch": 0.16811973570327837, "grad_norm": 0.34363847970962524, "learning_rate": 8.405809338849953e-06, "loss": 0.0188, "step": 19910 }, { "epoch": 0.16820417555044226, "grad_norm": 1.649635910987854, "learning_rate": 8.410031242083933e-06, "loss": 0.0248, "step": 19920 }, { "epoch": 0.16828861539760612, "grad_norm": 0.5615369081497192, "learning_rate": 8.41425314531791e-06, "loss": 0.0233, "step": 19930 }, { "epoch": 0.16837305524477, "grad_norm": 0.8982528448104858, "learning_rate": 8.418475048551887e-06, "loss": 0.0196, "step": 19940 }, { "epoch": 0.1684574950919339, "grad_norm": 0.44011473655700684, "learning_rate": 8.422696951785866e-06, "loss": 0.0214, "step": 19950 }, { "epoch": 0.16854193493909775, "grad_norm": 0.6065938472747803, "learning_rate": 8.426918855019843e-06, "loss": 0.0207, "step": 19960 }, { "epoch": 0.16862637478626163, "grad_norm": 0.9221178293228149, "learning_rate": 8.431140758253822e-06, "loss": 0.0325, "step": 19970 }, { "epoch": 0.16871081463342552, "grad_norm": 0.899339497089386, "learning_rate": 8.4353626614878e-06, "loss": 0.0271, "step": 19980 }, { "epoch": 0.16879525448058938, "grad_norm": 0.7450941205024719, "learning_rate": 8.439584564721778e-06, "loss": 0.0265, "step": 19990 }, { "epoch": 0.16887969432775327, "grad_norm": 0.6880413293838501, "learning_rate": 8.443806467955756e-06, "loss": 0.0261, "step": 20000 }, { "epoch": 0.16896413417491715, "grad_norm": 0.471720814704895, "learning_rate": 8.448028371189732e-06, "loss": 0.0204, "step": 20010 }, { "epoch": 0.169048574022081, "grad_norm": 1.1357702016830444, "learning_rate": 8.452250274423711e-06, "loss": 0.0384, "step": 20020 }, { "epoch": 0.1691330138692449, "grad_norm": 0.8670855760574341, "learning_rate": 8.45647217765769e-06, "loss": 0.024, "step": 20030 }, { "epoch": 0.16921745371640878, "grad_norm": 0.7537679076194763, "learning_rate": 8.460694080891667e-06, "loss": 0.0314, "step": 20040 }, { "epoch": 0.16930189356357264, "grad_norm": 0.2639882266521454, "learning_rate": 8.464915984125645e-06, "loss": 0.0183, "step": 20050 }, { "epoch": 0.16938633341073653, "grad_norm": 0.5188485980033875, "learning_rate": 8.469137887359621e-06, "loss": 0.0214, "step": 20060 }, { "epoch": 0.16947077325790041, "grad_norm": 1.0124999284744263, "learning_rate": 8.4733597905936e-06, "loss": 0.0375, "step": 20070 }, { "epoch": 0.16955521310506427, "grad_norm": 0.7410033345222473, "learning_rate": 8.477581693827579e-06, "loss": 0.0213, "step": 20080 }, { "epoch": 0.16963965295222816, "grad_norm": 0.612280547618866, "learning_rate": 8.481803597061556e-06, "loss": 0.0145, "step": 20090 }, { "epoch": 0.16972409279939205, "grad_norm": 0.22376209497451782, "learning_rate": 8.486025500295534e-06, "loss": 0.0181, "step": 20100 }, { "epoch": 0.1698085326465559, "grad_norm": 1.5183559656143188, "learning_rate": 8.49024740352951e-06, "loss": 0.0329, "step": 20110 }, { "epoch": 0.1698929724937198, "grad_norm": 0.8817469477653503, "learning_rate": 8.49446930676349e-06, "loss": 0.0207, "step": 20120 }, { "epoch": 0.16997741234088368, "grad_norm": 0.5784724950790405, "learning_rate": 8.498691209997468e-06, "loss": 0.0246, "step": 20130 }, { "epoch": 0.17006185218804754, "grad_norm": 0.6791893839836121, "learning_rate": 8.502913113231446e-06, "loss": 0.0161, "step": 20140 }, { "epoch": 0.17014629203521142, "grad_norm": 0.45977598428726196, "learning_rate": 8.507135016465424e-06, "loss": 0.0177, "step": 20150 }, { "epoch": 0.17023073188237528, "grad_norm": 0.8380937576293945, "learning_rate": 8.511356919699402e-06, "loss": 0.0242, "step": 20160 }, { "epoch": 0.17031517172953917, "grad_norm": 0.8813897967338562, "learning_rate": 8.51557882293338e-06, "loss": 0.0408, "step": 20170 }, { "epoch": 0.17039961157670305, "grad_norm": 0.5659587383270264, "learning_rate": 8.519800726167357e-06, "loss": 0.0368, "step": 20180 }, { "epoch": 0.1704840514238669, "grad_norm": 0.5255667567253113, "learning_rate": 8.524022629401335e-06, "loss": 0.0178, "step": 20190 }, { "epoch": 0.1705684912710308, "grad_norm": 0.9254892468452454, "learning_rate": 8.528244532635313e-06, "loss": 0.0218, "step": 20200 }, { "epoch": 0.17065293111819468, "grad_norm": 0.4980028569698334, "learning_rate": 8.53246643586929e-06, "loss": 0.0257, "step": 20210 }, { "epoch": 0.17073737096535854, "grad_norm": 1.5653847455978394, "learning_rate": 8.536688339103269e-06, "loss": 0.0271, "step": 20220 }, { "epoch": 0.17082181081252243, "grad_norm": 0.7834160923957825, "learning_rate": 8.540910242337247e-06, "loss": 0.0248, "step": 20230 }, { "epoch": 0.17090625065968632, "grad_norm": 0.8238977789878845, "learning_rate": 8.545132145571224e-06, "loss": 0.0183, "step": 20240 }, { "epoch": 0.17099069050685017, "grad_norm": 0.578898012638092, "learning_rate": 8.549354048805202e-06, "loss": 0.0131, "step": 20250 }, { "epoch": 0.17107513035401406, "grad_norm": 0.33324772119522095, "learning_rate": 8.55357595203918e-06, "loss": 0.0213, "step": 20260 }, { "epoch": 0.17115957020117795, "grad_norm": 0.31812095642089844, "learning_rate": 8.557797855273158e-06, "loss": 0.0222, "step": 20270 }, { "epoch": 0.1712440100483418, "grad_norm": 0.8923805356025696, "learning_rate": 8.562019758507136e-06, "loss": 0.0237, "step": 20280 }, { "epoch": 0.1713284498955057, "grad_norm": 0.5599892139434814, "learning_rate": 8.566241661741114e-06, "loss": 0.0229, "step": 20290 }, { "epoch": 0.17141288974266958, "grad_norm": 0.7676408886909485, "learning_rate": 8.570463564975092e-06, "loss": 0.0313, "step": 20300 }, { "epoch": 0.17149732958983344, "grad_norm": 0.775499165058136, "learning_rate": 8.57468546820907e-06, "loss": 0.0288, "step": 20310 }, { "epoch": 0.17158176943699732, "grad_norm": 0.0854470282793045, "learning_rate": 8.578907371443047e-06, "loss": 0.0119, "step": 20320 }, { "epoch": 0.1716662092841612, "grad_norm": 0.560004472732544, "learning_rate": 8.583129274677025e-06, "loss": 0.0183, "step": 20330 }, { "epoch": 0.17175064913132507, "grad_norm": 0.3713245093822479, "learning_rate": 8.587351177911003e-06, "loss": 0.0285, "step": 20340 }, { "epoch": 0.17183508897848895, "grad_norm": 1.2319215536117554, "learning_rate": 8.59157308114498e-06, "loss": 0.0215, "step": 20350 }, { "epoch": 0.1719195288256528, "grad_norm": 0.5062403678894043, "learning_rate": 8.595794984378959e-06, "loss": 0.019, "step": 20360 }, { "epoch": 0.1720039686728167, "grad_norm": 0.89622563123703, "learning_rate": 8.600016887612937e-06, "loss": 0.0196, "step": 20370 }, { "epoch": 0.17208840851998058, "grad_norm": 0.7703160643577576, "learning_rate": 8.604238790846914e-06, "loss": 0.0314, "step": 20380 }, { "epoch": 0.17217284836714444, "grad_norm": 0.7227667570114136, "learning_rate": 8.608460694080892e-06, "loss": 0.0131, "step": 20390 }, { "epoch": 0.17225728821430833, "grad_norm": 0.6670889854431152, "learning_rate": 8.61268259731487e-06, "loss": 0.022, "step": 20400 }, { "epoch": 0.17234172806147222, "grad_norm": 1.2192223072052002, "learning_rate": 8.616904500548848e-06, "loss": 0.0241, "step": 20410 }, { "epoch": 0.17242616790863607, "grad_norm": 0.2992931306362152, "learning_rate": 8.621126403782826e-06, "loss": 0.0271, "step": 20420 }, { "epoch": 0.17251060775579996, "grad_norm": 0.885554850101471, "learning_rate": 8.625348307016804e-06, "loss": 0.0196, "step": 20430 }, { "epoch": 0.17259504760296385, "grad_norm": 0.6696837544441223, "learning_rate": 8.629570210250782e-06, "loss": 0.0253, "step": 20440 }, { "epoch": 0.1726794874501277, "grad_norm": 0.7844366431236267, "learning_rate": 8.63379211348476e-06, "loss": 0.0306, "step": 20450 }, { "epoch": 0.1727639272972916, "grad_norm": 0.7399176359176636, "learning_rate": 8.638014016718737e-06, "loss": 0.0181, "step": 20460 }, { "epoch": 0.17284836714445548, "grad_norm": 1.0288081169128418, "learning_rate": 8.642235919952715e-06, "loss": 0.0262, "step": 20470 }, { "epoch": 0.17293280699161934, "grad_norm": 0.838527262210846, "learning_rate": 8.646457823186693e-06, "loss": 0.0202, "step": 20480 }, { "epoch": 0.17301724683878322, "grad_norm": 0.5378268361091614, "learning_rate": 8.65067972642067e-06, "loss": 0.0306, "step": 20490 }, { "epoch": 0.1731016866859471, "grad_norm": 0.1623738557100296, "learning_rate": 8.654901629654649e-06, "loss": 0.0195, "step": 20500 }, { "epoch": 0.17318612653311097, "grad_norm": 0.31767430901527405, "learning_rate": 8.659123532888627e-06, "loss": 0.0168, "step": 20510 }, { "epoch": 0.17327056638027485, "grad_norm": 0.7049438953399658, "learning_rate": 8.663345436122604e-06, "loss": 0.0378, "step": 20520 }, { "epoch": 0.17335500622743874, "grad_norm": 0.5588487982749939, "learning_rate": 8.667567339356582e-06, "loss": 0.0251, "step": 20530 }, { "epoch": 0.1734394460746026, "grad_norm": 0.7255685329437256, "learning_rate": 8.67178924259056e-06, "loss": 0.0317, "step": 20540 }, { "epoch": 0.17352388592176649, "grad_norm": 0.9708834886550903, "learning_rate": 8.67601114582454e-06, "loss": 0.0258, "step": 20550 }, { "epoch": 0.17360832576893037, "grad_norm": 1.6788145303726196, "learning_rate": 8.680233049058516e-06, "loss": 0.0184, "step": 20560 }, { "epoch": 0.17369276561609423, "grad_norm": 1.212470293045044, "learning_rate": 8.684454952292494e-06, "loss": 0.0248, "step": 20570 }, { "epoch": 0.17377720546325812, "grad_norm": 1.1146858930587769, "learning_rate": 8.688676855526472e-06, "loss": 0.0275, "step": 20580 }, { "epoch": 0.17386164531042198, "grad_norm": 0.8405473232269287, "learning_rate": 8.69289875876045e-06, "loss": 0.0155, "step": 20590 }, { "epoch": 0.17394608515758586, "grad_norm": 1.2580093145370483, "learning_rate": 8.697120661994429e-06, "loss": 0.024, "step": 20600 }, { "epoch": 0.17403052500474975, "grad_norm": 0.4797070324420929, "learning_rate": 8.701342565228405e-06, "loss": 0.018, "step": 20610 }, { "epoch": 0.1741149648519136, "grad_norm": 0.5831452012062073, "learning_rate": 8.705564468462383e-06, "loss": 0.0169, "step": 20620 }, { "epoch": 0.1741994046990775, "grad_norm": 0.12517735362052917, "learning_rate": 8.709786371696361e-06, "loss": 0.0199, "step": 20630 }, { "epoch": 0.17428384454624138, "grad_norm": 0.4516519606113434, "learning_rate": 8.714008274930339e-06, "loss": 0.0173, "step": 20640 }, { "epoch": 0.17436828439340524, "grad_norm": 0.9796106815338135, "learning_rate": 8.718230178164318e-06, "loss": 0.0308, "step": 20650 }, { "epoch": 0.17445272424056912, "grad_norm": 0.7764132022857666, "learning_rate": 8.722452081398294e-06, "loss": 0.0207, "step": 20660 }, { "epoch": 0.174537164087733, "grad_norm": 0.19364602863788605, "learning_rate": 8.726673984632274e-06, "loss": 0.0225, "step": 20670 }, { "epoch": 0.17462160393489687, "grad_norm": 0.688499391078949, "learning_rate": 8.73089588786625e-06, "loss": 0.0239, "step": 20680 }, { "epoch": 0.17470604378206075, "grad_norm": 0.21419072151184082, "learning_rate": 8.735117791100228e-06, "loss": 0.028, "step": 20690 }, { "epoch": 0.17479048362922464, "grad_norm": 0.43880534172058105, "learning_rate": 8.739339694334208e-06, "loss": 0.0106, "step": 20700 }, { "epoch": 0.1748749234763885, "grad_norm": 0.6083645820617676, "learning_rate": 8.743561597568184e-06, "loss": 0.0184, "step": 20710 }, { "epoch": 0.17495936332355239, "grad_norm": 0.33008891344070435, "learning_rate": 8.747783500802163e-06, "loss": 0.0159, "step": 20720 }, { "epoch": 0.17504380317071627, "grad_norm": 0.4389740526676178, "learning_rate": 8.75200540403614e-06, "loss": 0.0182, "step": 20730 }, { "epoch": 0.17512824301788013, "grad_norm": 0.8324745893478394, "learning_rate": 8.756227307270117e-06, "loss": 0.0186, "step": 20740 }, { "epoch": 0.17521268286504402, "grad_norm": 0.9922475218772888, "learning_rate": 8.760449210504097e-06, "loss": 0.0295, "step": 20750 }, { "epoch": 0.1752971227122079, "grad_norm": 1.1479512453079224, "learning_rate": 8.764671113738073e-06, "loss": 0.0315, "step": 20760 }, { "epoch": 0.17538156255937176, "grad_norm": 1.2883936166763306, "learning_rate": 8.768893016972053e-06, "loss": 0.0238, "step": 20770 }, { "epoch": 0.17546600240653565, "grad_norm": 0.5768187046051025, "learning_rate": 8.773114920206029e-06, "loss": 0.0111, "step": 20780 }, { "epoch": 0.17555044225369953, "grad_norm": 0.6644711494445801, "learning_rate": 8.777336823440007e-06, "loss": 0.0128, "step": 20790 }, { "epoch": 0.1756348821008634, "grad_norm": 0.45257917046546936, "learning_rate": 8.781558726673986e-06, "loss": 0.0265, "step": 20800 }, { "epoch": 0.17571932194802728, "grad_norm": 0.7497810125350952, "learning_rate": 8.785780629907962e-06, "loss": 0.0312, "step": 20810 }, { "epoch": 0.17580376179519114, "grad_norm": 0.5300589203834534, "learning_rate": 8.790002533141942e-06, "loss": 0.0319, "step": 20820 }, { "epoch": 0.17588820164235502, "grad_norm": 1.1167768239974976, "learning_rate": 8.794224436375918e-06, "loss": 0.0311, "step": 20830 }, { "epoch": 0.1759726414895189, "grad_norm": 0.21933062374591827, "learning_rate": 8.798446339609898e-06, "loss": 0.0186, "step": 20840 }, { "epoch": 0.17605708133668277, "grad_norm": 0.7417646050453186, "learning_rate": 8.802668242843875e-06, "loss": 0.0341, "step": 20850 }, { "epoch": 0.17614152118384666, "grad_norm": 1.410543441772461, "learning_rate": 8.806890146077852e-06, "loss": 0.0285, "step": 20860 }, { "epoch": 0.17622596103101054, "grad_norm": 0.6040399670600891, "learning_rate": 8.811112049311831e-06, "loss": 0.0319, "step": 20870 }, { "epoch": 0.1763104008781744, "grad_norm": 1.004228115081787, "learning_rate": 8.815333952545807e-06, "loss": 0.0251, "step": 20880 }, { "epoch": 0.1763948407253383, "grad_norm": 1.2318778038024902, "learning_rate": 8.819555855779787e-06, "loss": 0.0271, "step": 20890 }, { "epoch": 0.17647928057250217, "grad_norm": 0.6857467889785767, "learning_rate": 8.823777759013765e-06, "loss": 0.0294, "step": 20900 }, { "epoch": 0.17656372041966603, "grad_norm": 0.7768101692199707, "learning_rate": 8.827999662247741e-06, "loss": 0.0239, "step": 20910 }, { "epoch": 0.17664816026682992, "grad_norm": 1.404344081878662, "learning_rate": 8.83222156548172e-06, "loss": 0.0163, "step": 20920 }, { "epoch": 0.1767326001139938, "grad_norm": 1.0005812644958496, "learning_rate": 8.836443468715698e-06, "loss": 0.0195, "step": 20930 }, { "epoch": 0.17681703996115766, "grad_norm": 0.4561859965324402, "learning_rate": 8.840665371949676e-06, "loss": 0.0194, "step": 20940 }, { "epoch": 0.17690147980832155, "grad_norm": 0.43095871806144714, "learning_rate": 8.844887275183654e-06, "loss": 0.0108, "step": 20950 }, { "epoch": 0.17698591965548544, "grad_norm": 0.4756227135658264, "learning_rate": 8.84910917841763e-06, "loss": 0.0189, "step": 20960 }, { "epoch": 0.1770703595026493, "grad_norm": 0.5813237428665161, "learning_rate": 8.85333108165161e-06, "loss": 0.0288, "step": 20970 }, { "epoch": 0.17715479934981318, "grad_norm": 0.7948603630065918, "learning_rate": 8.857552984885588e-06, "loss": 0.0245, "step": 20980 }, { "epoch": 0.17723923919697707, "grad_norm": 1.2451450824737549, "learning_rate": 8.861774888119565e-06, "loss": 0.0344, "step": 20990 }, { "epoch": 0.17732367904414092, "grad_norm": 0.4377795457839966, "learning_rate": 8.865996791353543e-06, "loss": 0.0173, "step": 21000 }, { "epoch": 0.1774081188913048, "grad_norm": 0.6365768909454346, "learning_rate": 8.870218694587521e-06, "loss": 0.0297, "step": 21010 }, { "epoch": 0.17749255873846867, "grad_norm": 0.563241720199585, "learning_rate": 8.874440597821499e-06, "loss": 0.0185, "step": 21020 }, { "epoch": 0.17757699858563256, "grad_norm": 0.8110869526863098, "learning_rate": 8.878662501055477e-06, "loss": 0.0207, "step": 21030 }, { "epoch": 0.17766143843279644, "grad_norm": 1.116837978363037, "learning_rate": 8.882884404289455e-06, "loss": 0.0284, "step": 21040 }, { "epoch": 0.1777458782799603, "grad_norm": 1.151814341545105, "learning_rate": 8.887106307523433e-06, "loss": 0.0146, "step": 21050 }, { "epoch": 0.1778303181271242, "grad_norm": 0.5410155057907104, "learning_rate": 8.89132821075741e-06, "loss": 0.0183, "step": 21060 }, { "epoch": 0.17791475797428807, "grad_norm": 0.7610071897506714, "learning_rate": 8.895550113991388e-06, "loss": 0.0272, "step": 21070 }, { "epoch": 0.17799919782145193, "grad_norm": 0.7090203166007996, "learning_rate": 8.899772017225366e-06, "loss": 0.0191, "step": 21080 }, { "epoch": 0.17808363766861582, "grad_norm": 0.6936790943145752, "learning_rate": 8.903993920459344e-06, "loss": 0.025, "step": 21090 }, { "epoch": 0.1781680775157797, "grad_norm": 1.7987405061721802, "learning_rate": 8.908215823693322e-06, "loss": 0.0281, "step": 21100 }, { "epoch": 0.17825251736294356, "grad_norm": 0.9060165882110596, "learning_rate": 8.9124377269273e-06, "loss": 0.0267, "step": 21110 }, { "epoch": 0.17833695721010745, "grad_norm": 0.9449442028999329, "learning_rate": 8.916659630161278e-06, "loss": 0.0272, "step": 21120 }, { "epoch": 0.17842139705727134, "grad_norm": 0.9353969693183899, "learning_rate": 8.920881533395255e-06, "loss": 0.0204, "step": 21130 }, { "epoch": 0.1785058369044352, "grad_norm": 0.6663933992385864, "learning_rate": 8.925103436629233e-06, "loss": 0.0149, "step": 21140 }, { "epoch": 0.17859027675159908, "grad_norm": 0.6968703866004944, "learning_rate": 8.929325339863211e-06, "loss": 0.019, "step": 21150 }, { "epoch": 0.17867471659876297, "grad_norm": 1.1744678020477295, "learning_rate": 8.933547243097189e-06, "loss": 0.0192, "step": 21160 }, { "epoch": 0.17875915644592683, "grad_norm": 1.8768913745880127, "learning_rate": 8.937769146331167e-06, "loss": 0.0396, "step": 21170 }, { "epoch": 0.1788435962930907, "grad_norm": 0.22393180429935455, "learning_rate": 8.941991049565145e-06, "loss": 0.0195, "step": 21180 }, { "epoch": 0.1789280361402546, "grad_norm": 1.875630259513855, "learning_rate": 8.946212952799123e-06, "loss": 0.0261, "step": 21190 }, { "epoch": 0.17901247598741846, "grad_norm": 0.668274462223053, "learning_rate": 8.9504348560331e-06, "loss": 0.0301, "step": 21200 }, { "epoch": 0.17909691583458234, "grad_norm": 0.5564019083976746, "learning_rate": 8.954656759267078e-06, "loss": 0.0237, "step": 21210 }, { "epoch": 0.17918135568174623, "grad_norm": 0.5219719409942627, "learning_rate": 8.958878662501056e-06, "loss": 0.0199, "step": 21220 }, { "epoch": 0.1792657955289101, "grad_norm": 0.5303821563720703, "learning_rate": 8.963100565735034e-06, "loss": 0.0159, "step": 21230 }, { "epoch": 0.17935023537607397, "grad_norm": 0.27628156542778015, "learning_rate": 8.967322468969012e-06, "loss": 0.0214, "step": 21240 }, { "epoch": 0.17943467522323783, "grad_norm": 0.7736894488334656, "learning_rate": 8.97154437220299e-06, "loss": 0.0176, "step": 21250 }, { "epoch": 0.17951911507040172, "grad_norm": 1.501153826713562, "learning_rate": 8.975766275436968e-06, "loss": 0.0228, "step": 21260 }, { "epoch": 0.1796035549175656, "grad_norm": 0.47437116503715515, "learning_rate": 8.979988178670945e-06, "loss": 0.0263, "step": 21270 }, { "epoch": 0.17968799476472946, "grad_norm": 0.9479640126228333, "learning_rate": 8.984210081904923e-06, "loss": 0.014, "step": 21280 }, { "epoch": 0.17977243461189335, "grad_norm": 1.1217962503433228, "learning_rate": 8.988431985138901e-06, "loss": 0.021, "step": 21290 }, { "epoch": 0.17985687445905724, "grad_norm": 0.11993373185396194, "learning_rate": 8.992653888372879e-06, "loss": 0.0213, "step": 21300 }, { "epoch": 0.1799413143062211, "grad_norm": 0.4495078921318054, "learning_rate": 8.996875791606857e-06, "loss": 0.0261, "step": 21310 }, { "epoch": 0.18002575415338498, "grad_norm": 0.6416181921958923, "learning_rate": 9.001097694840835e-06, "loss": 0.0328, "step": 21320 }, { "epoch": 0.18011019400054887, "grad_norm": 0.5471420884132385, "learning_rate": 9.005319598074813e-06, "loss": 0.02, "step": 21330 }, { "epoch": 0.18019463384771273, "grad_norm": 0.6664610505104065, "learning_rate": 9.00954150130879e-06, "loss": 0.0187, "step": 21340 }, { "epoch": 0.1802790736948766, "grad_norm": 0.6638396382331848, "learning_rate": 9.013763404542768e-06, "loss": 0.0242, "step": 21350 }, { "epoch": 0.1803635135420405, "grad_norm": 1.7168620824813843, "learning_rate": 9.017985307776746e-06, "loss": 0.032, "step": 21360 }, { "epoch": 0.18044795338920436, "grad_norm": 0.6064811944961548, "learning_rate": 9.022207211010724e-06, "loss": 0.0336, "step": 21370 }, { "epoch": 0.18053239323636824, "grad_norm": 0.7324659824371338, "learning_rate": 9.026429114244702e-06, "loss": 0.0238, "step": 21380 }, { "epoch": 0.18061683308353213, "grad_norm": 0.6311683654785156, "learning_rate": 9.03065101747868e-06, "loss": 0.0162, "step": 21390 }, { "epoch": 0.180701272930696, "grad_norm": 0.08503486961126328, "learning_rate": 9.034872920712658e-06, "loss": 0.0145, "step": 21400 }, { "epoch": 0.18078571277785987, "grad_norm": 1.4483797550201416, "learning_rate": 9.039094823946636e-06, "loss": 0.0267, "step": 21410 }, { "epoch": 0.18087015262502376, "grad_norm": 0.3942844569683075, "learning_rate": 9.043316727180613e-06, "loss": 0.0216, "step": 21420 }, { "epoch": 0.18095459247218762, "grad_norm": 0.20434121787548065, "learning_rate": 9.047538630414591e-06, "loss": 0.0243, "step": 21430 }, { "epoch": 0.1810390323193515, "grad_norm": 0.6194642782211304, "learning_rate": 9.051760533648569e-06, "loss": 0.0246, "step": 21440 }, { "epoch": 0.1811234721665154, "grad_norm": 0.8167563080787659, "learning_rate": 9.055982436882547e-06, "loss": 0.0235, "step": 21450 }, { "epoch": 0.18120791201367925, "grad_norm": 1.3097645044326782, "learning_rate": 9.060204340116525e-06, "loss": 0.0254, "step": 21460 }, { "epoch": 0.18129235186084314, "grad_norm": 0.842731773853302, "learning_rate": 9.064426243350504e-06, "loss": 0.0283, "step": 21470 }, { "epoch": 0.181376791708007, "grad_norm": 1.1799395084381104, "learning_rate": 9.06864814658448e-06, "loss": 0.0252, "step": 21480 }, { "epoch": 0.18146123155517088, "grad_norm": 0.517005205154419, "learning_rate": 9.072870049818458e-06, "loss": 0.0174, "step": 21490 }, { "epoch": 0.18154567140233477, "grad_norm": 0.6658276319503784, "learning_rate": 9.077091953052436e-06, "loss": 0.0245, "step": 21500 }, { "epoch": 0.18163011124949863, "grad_norm": 0.1662447303533554, "learning_rate": 9.081313856286414e-06, "loss": 0.0132, "step": 21510 }, { "epoch": 0.1817145510966625, "grad_norm": 1.3099409341812134, "learning_rate": 9.085535759520394e-06, "loss": 0.0247, "step": 21520 }, { "epoch": 0.1817989909438264, "grad_norm": 0.9992308616638184, "learning_rate": 9.08975766275437e-06, "loss": 0.0203, "step": 21530 }, { "epoch": 0.18188343079099026, "grad_norm": 0.5216240286827087, "learning_rate": 9.093979565988348e-06, "loss": 0.0137, "step": 21540 }, { "epoch": 0.18196787063815414, "grad_norm": 1.0209072828292847, "learning_rate": 9.098201469222327e-06, "loss": 0.021, "step": 21550 }, { "epoch": 0.18205231048531803, "grad_norm": 0.4229433238506317, "learning_rate": 9.102423372456303e-06, "loss": 0.0243, "step": 21560 }, { "epoch": 0.1821367503324819, "grad_norm": 1.4109952449798584, "learning_rate": 9.106645275690283e-06, "loss": 0.0247, "step": 21570 }, { "epoch": 0.18222119017964578, "grad_norm": 1.0567189455032349, "learning_rate": 9.110867178924259e-06, "loss": 0.0264, "step": 21580 }, { "epoch": 0.18230563002680966, "grad_norm": 0.536791205406189, "learning_rate": 9.115089082158237e-06, "loss": 0.0225, "step": 21590 }, { "epoch": 0.18239006987397352, "grad_norm": 1.4146969318389893, "learning_rate": 9.119310985392217e-06, "loss": 0.0268, "step": 21600 }, { "epoch": 0.1824745097211374, "grad_norm": 0.3122701048851013, "learning_rate": 9.123532888626193e-06, "loss": 0.0214, "step": 21610 }, { "epoch": 0.1825589495683013, "grad_norm": 0.6153628826141357, "learning_rate": 9.127754791860172e-06, "loss": 0.0184, "step": 21620 }, { "epoch": 0.18264338941546515, "grad_norm": 0.38497281074523926, "learning_rate": 9.131976695094148e-06, "loss": 0.0217, "step": 21630 }, { "epoch": 0.18272782926262904, "grad_norm": 0.40652531385421753, "learning_rate": 9.136198598328128e-06, "loss": 0.0223, "step": 21640 }, { "epoch": 0.18281226910979292, "grad_norm": 0.9020569324493408, "learning_rate": 9.140420501562106e-06, "loss": 0.0238, "step": 21650 }, { "epoch": 0.18289670895695678, "grad_norm": 0.35313501954078674, "learning_rate": 9.144642404796082e-06, "loss": 0.0257, "step": 21660 }, { "epoch": 0.18298114880412067, "grad_norm": 1.2249512672424316, "learning_rate": 9.148864308030062e-06, "loss": 0.0294, "step": 21670 }, { "epoch": 0.18306558865128453, "grad_norm": 0.7965964674949646, "learning_rate": 9.153086211264038e-06, "loss": 0.0132, "step": 21680 }, { "epoch": 0.18315002849844841, "grad_norm": 0.5543729662895203, "learning_rate": 9.157308114498017e-06, "loss": 0.0296, "step": 21690 }, { "epoch": 0.1832344683456123, "grad_norm": 0.5512022376060486, "learning_rate": 9.161530017731995e-06, "loss": 0.0249, "step": 21700 }, { "epoch": 0.18331890819277616, "grad_norm": 1.0143157243728638, "learning_rate": 9.165751920965971e-06, "loss": 0.0231, "step": 21710 }, { "epoch": 0.18340334803994005, "grad_norm": 0.8234068751335144, "learning_rate": 9.16997382419995e-06, "loss": 0.0376, "step": 21720 }, { "epoch": 0.18348778788710393, "grad_norm": 0.31323763728141785, "learning_rate": 9.174195727433927e-06, "loss": 0.0197, "step": 21730 }, { "epoch": 0.1835722277342678, "grad_norm": 0.34038224816322327, "learning_rate": 9.178417630667907e-06, "loss": 0.0228, "step": 21740 }, { "epoch": 0.18365666758143168, "grad_norm": 0.38377144932746887, "learning_rate": 9.182639533901884e-06, "loss": 0.0153, "step": 21750 }, { "epoch": 0.18374110742859556, "grad_norm": 0.8861016035079956, "learning_rate": 9.18686143713586e-06, "loss": 0.0331, "step": 21760 }, { "epoch": 0.18382554727575942, "grad_norm": 0.3791744112968445, "learning_rate": 9.19108334036984e-06, "loss": 0.0396, "step": 21770 }, { "epoch": 0.1839099871229233, "grad_norm": 0.22583292424678802, "learning_rate": 9.195305243603816e-06, "loss": 0.0264, "step": 21780 }, { "epoch": 0.1839944269700872, "grad_norm": 0.7130795121192932, "learning_rate": 9.199527146837796e-06, "loss": 0.0205, "step": 21790 }, { "epoch": 0.18407886681725105, "grad_norm": 0.9065178036689758, "learning_rate": 9.203749050071774e-06, "loss": 0.0187, "step": 21800 }, { "epoch": 0.18416330666441494, "grad_norm": 0.6367951035499573, "learning_rate": 9.207970953305752e-06, "loss": 0.0302, "step": 21810 }, { "epoch": 0.18424774651157882, "grad_norm": 0.2718605697154999, "learning_rate": 9.21219285653973e-06, "loss": 0.0238, "step": 21820 }, { "epoch": 0.18433218635874268, "grad_norm": 0.7184665203094482, "learning_rate": 9.216414759773706e-06, "loss": 0.0143, "step": 21830 }, { "epoch": 0.18441662620590657, "grad_norm": 1.360174536705017, "learning_rate": 9.220636663007685e-06, "loss": 0.0213, "step": 21840 }, { "epoch": 0.18450106605307046, "grad_norm": 0.2520200312137604, "learning_rate": 9.224858566241663e-06, "loss": 0.0296, "step": 21850 }, { "epoch": 0.18458550590023431, "grad_norm": 1.065650463104248, "learning_rate": 9.22908046947564e-06, "loss": 0.0224, "step": 21860 }, { "epoch": 0.1846699457473982, "grad_norm": 0.3778926730155945, "learning_rate": 9.233302372709619e-06, "loss": 0.0211, "step": 21870 }, { "epoch": 0.1847543855945621, "grad_norm": 1.0814669132232666, "learning_rate": 9.237524275943595e-06, "loss": 0.025, "step": 21880 }, { "epoch": 0.18483882544172595, "grad_norm": 0.39990535378456116, "learning_rate": 9.241746179177574e-06, "loss": 0.0264, "step": 21890 }, { "epoch": 0.18492326528888983, "grad_norm": 0.7487533688545227, "learning_rate": 9.245968082411552e-06, "loss": 0.0255, "step": 21900 }, { "epoch": 0.1850077051360537, "grad_norm": 0.4932354986667633, "learning_rate": 9.25018998564553e-06, "loss": 0.0277, "step": 21910 }, { "epoch": 0.18509214498321758, "grad_norm": 1.1059212684631348, "learning_rate": 9.254411888879508e-06, "loss": 0.0295, "step": 21920 }, { "epoch": 0.18517658483038146, "grad_norm": 0.6468890309333801, "learning_rate": 9.258633792113486e-06, "loss": 0.0184, "step": 21930 }, { "epoch": 0.18526102467754532, "grad_norm": 0.42615365982055664, "learning_rate": 9.262855695347464e-06, "loss": 0.0221, "step": 21940 }, { "epoch": 0.1853454645247092, "grad_norm": 0.8322461843490601, "learning_rate": 9.267077598581442e-06, "loss": 0.028, "step": 21950 }, { "epoch": 0.1854299043718731, "grad_norm": 0.26698851585388184, "learning_rate": 9.27129950181542e-06, "loss": 0.0233, "step": 21960 }, { "epoch": 0.18551434421903695, "grad_norm": 0.7341625094413757, "learning_rate": 9.275521405049397e-06, "loss": 0.0215, "step": 21970 }, { "epoch": 0.18559878406620084, "grad_norm": 0.5480787754058838, "learning_rate": 9.279743308283375e-06, "loss": 0.0188, "step": 21980 }, { "epoch": 0.18568322391336473, "grad_norm": 0.3918367326259613, "learning_rate": 9.283965211517353e-06, "loss": 0.0213, "step": 21990 }, { "epoch": 0.18576766376052858, "grad_norm": 0.6627488136291504, "learning_rate": 9.28818711475133e-06, "loss": 0.028, "step": 22000 }, { "epoch": 0.18585210360769247, "grad_norm": 0.7205176949501038, "learning_rate": 9.292409017985309e-06, "loss": 0.0115, "step": 22010 }, { "epoch": 0.18593654345485636, "grad_norm": 0.20767153799533844, "learning_rate": 9.296630921219287e-06, "loss": 0.0255, "step": 22020 }, { "epoch": 0.18602098330202022, "grad_norm": 0.7260789275169373, "learning_rate": 9.300852824453264e-06, "loss": 0.0219, "step": 22030 }, { "epoch": 0.1861054231491841, "grad_norm": 0.5921332240104675, "learning_rate": 9.305074727687242e-06, "loss": 0.0252, "step": 22040 }, { "epoch": 0.186189862996348, "grad_norm": 0.5558503270149231, "learning_rate": 9.30929663092122e-06, "loss": 0.0333, "step": 22050 }, { "epoch": 0.18627430284351185, "grad_norm": 0.5905427932739258, "learning_rate": 9.313518534155198e-06, "loss": 0.0183, "step": 22060 }, { "epoch": 0.18635874269067573, "grad_norm": 0.6001026630401611, "learning_rate": 9.317740437389176e-06, "loss": 0.0344, "step": 22070 }, { "epoch": 0.18644318253783962, "grad_norm": 0.5138627290725708, "learning_rate": 9.321962340623154e-06, "loss": 0.0277, "step": 22080 }, { "epoch": 0.18652762238500348, "grad_norm": 0.5577110648155212, "learning_rate": 9.326184243857132e-06, "loss": 0.0153, "step": 22090 }, { "epoch": 0.18661206223216736, "grad_norm": 0.7849141359329224, "learning_rate": 9.33040614709111e-06, "loss": 0.0268, "step": 22100 }, { "epoch": 0.18669650207933125, "grad_norm": 0.4081132709980011, "learning_rate": 9.334628050325087e-06, "loss": 0.0146, "step": 22110 }, { "epoch": 0.1867809419264951, "grad_norm": 0.8027034401893616, "learning_rate": 9.338849953559065e-06, "loss": 0.0194, "step": 22120 }, { "epoch": 0.186865381773659, "grad_norm": 3.0059406757354736, "learning_rate": 9.343071856793043e-06, "loss": 0.029, "step": 22130 }, { "epoch": 0.18694982162082285, "grad_norm": 0.9337086081504822, "learning_rate": 9.347293760027021e-06, "loss": 0.0258, "step": 22140 }, { "epoch": 0.18703426146798674, "grad_norm": 1.3904565572738647, "learning_rate": 9.351515663260999e-06, "loss": 0.0195, "step": 22150 }, { "epoch": 0.18711870131515063, "grad_norm": 1.525667667388916, "learning_rate": 9.355737566494977e-06, "loss": 0.0214, "step": 22160 }, { "epoch": 0.18720314116231448, "grad_norm": 0.4177974760532379, "learning_rate": 9.359959469728954e-06, "loss": 0.0212, "step": 22170 }, { "epoch": 0.18728758100947837, "grad_norm": 1.2090568542480469, "learning_rate": 9.364181372962932e-06, "loss": 0.0222, "step": 22180 }, { "epoch": 0.18737202085664226, "grad_norm": 0.4071274697780609, "learning_rate": 9.36840327619691e-06, "loss": 0.0176, "step": 22190 }, { "epoch": 0.18745646070380612, "grad_norm": 0.933725118637085, "learning_rate": 9.372625179430888e-06, "loss": 0.0258, "step": 22200 }, { "epoch": 0.18754090055097, "grad_norm": 1.0569913387298584, "learning_rate": 9.376847082664866e-06, "loss": 0.013, "step": 22210 }, { "epoch": 0.1876253403981339, "grad_norm": 0.4248877465724945, "learning_rate": 9.381068985898844e-06, "loss": 0.0191, "step": 22220 }, { "epoch": 0.18770978024529775, "grad_norm": 0.3299630880355835, "learning_rate": 9.385290889132822e-06, "loss": 0.0195, "step": 22230 }, { "epoch": 0.18779422009246163, "grad_norm": 0.8580235242843628, "learning_rate": 9.3895127923668e-06, "loss": 0.0295, "step": 22240 }, { "epoch": 0.18787865993962552, "grad_norm": 0.33489611744880676, "learning_rate": 9.393734695600777e-06, "loss": 0.0141, "step": 22250 }, { "epoch": 0.18796309978678938, "grad_norm": 0.9522349238395691, "learning_rate": 9.397956598834755e-06, "loss": 0.0155, "step": 22260 }, { "epoch": 0.18804753963395326, "grad_norm": 0.45734187960624695, "learning_rate": 9.402178502068733e-06, "loss": 0.0186, "step": 22270 }, { "epoch": 0.18813197948111715, "grad_norm": 1.0095657110214233, "learning_rate": 9.406400405302711e-06, "loss": 0.0206, "step": 22280 }, { "epoch": 0.188216419328281, "grad_norm": 1.0023548603057861, "learning_rate": 9.410622308536689e-06, "loss": 0.0193, "step": 22290 }, { "epoch": 0.1883008591754449, "grad_norm": 0.6587561964988708, "learning_rate": 9.414844211770667e-06, "loss": 0.0414, "step": 22300 }, { "epoch": 0.18838529902260878, "grad_norm": 0.3865381181240082, "learning_rate": 9.419066115004644e-06, "loss": 0.0252, "step": 22310 }, { "epoch": 0.18846973886977264, "grad_norm": 0.587402880191803, "learning_rate": 9.423288018238624e-06, "loss": 0.019, "step": 22320 }, { "epoch": 0.18855417871693653, "grad_norm": 0.4001496434211731, "learning_rate": 9.4275099214726e-06, "loss": 0.0189, "step": 22330 }, { "epoch": 0.18863861856410039, "grad_norm": 0.46108192205429077, "learning_rate": 9.431731824706578e-06, "loss": 0.0157, "step": 22340 }, { "epoch": 0.18872305841126427, "grad_norm": 0.4393627643585205, "learning_rate": 9.435953727940556e-06, "loss": 0.0204, "step": 22350 }, { "epoch": 0.18880749825842816, "grad_norm": 0.3041136562824249, "learning_rate": 9.440175631174534e-06, "loss": 0.0188, "step": 22360 }, { "epoch": 0.18889193810559202, "grad_norm": 0.5894213318824768, "learning_rate": 9.444397534408513e-06, "loss": 0.0246, "step": 22370 }, { "epoch": 0.1889763779527559, "grad_norm": 0.32438114285469055, "learning_rate": 9.44861943764249e-06, "loss": 0.0146, "step": 22380 }, { "epoch": 0.1890608177999198, "grad_norm": 0.5058949589729309, "learning_rate": 9.452841340876467e-06, "loss": 0.0211, "step": 22390 }, { "epoch": 0.18914525764708365, "grad_norm": 0.3435748219490051, "learning_rate": 9.457063244110445e-06, "loss": 0.0125, "step": 22400 }, { "epoch": 0.18922969749424753, "grad_norm": 0.7105909585952759, "learning_rate": 9.461285147344423e-06, "loss": 0.0299, "step": 22410 }, { "epoch": 0.18931413734141142, "grad_norm": 1.0891956090927124, "learning_rate": 9.465507050578403e-06, "loss": 0.0379, "step": 22420 }, { "epoch": 0.18939857718857528, "grad_norm": 0.3709535598754883, "learning_rate": 9.469728953812379e-06, "loss": 0.0178, "step": 22430 }, { "epoch": 0.18948301703573917, "grad_norm": 0.46741846203804016, "learning_rate": 9.473950857046357e-06, "loss": 0.0153, "step": 22440 }, { "epoch": 0.18956745688290305, "grad_norm": 1.0549825429916382, "learning_rate": 9.478172760280334e-06, "loss": 0.03, "step": 22450 }, { "epoch": 0.1896518967300669, "grad_norm": 0.8814504742622375, "learning_rate": 9.482394663514312e-06, "loss": 0.041, "step": 22460 }, { "epoch": 0.1897363365772308, "grad_norm": 0.9147501587867737, "learning_rate": 9.486616566748292e-06, "loss": 0.0393, "step": 22470 }, { "epoch": 0.18982077642439468, "grad_norm": 0.5698230266571045, "learning_rate": 9.490838469982268e-06, "loss": 0.0264, "step": 22480 }, { "epoch": 0.18990521627155854, "grad_norm": 0.3201412856578827, "learning_rate": 9.495060373216248e-06, "loss": 0.017, "step": 22490 }, { "epoch": 0.18998965611872243, "grad_norm": 0.5634185075759888, "learning_rate": 9.499282276450224e-06, "loss": 0.0311, "step": 22500 }, { "epoch": 0.1900740959658863, "grad_norm": 0.8381980061531067, "learning_rate": 9.503504179684202e-06, "loss": 0.0214, "step": 22510 }, { "epoch": 0.19015853581305017, "grad_norm": 0.28473547101020813, "learning_rate": 9.507726082918181e-06, "loss": 0.0147, "step": 22520 }, { "epoch": 0.19024297566021406, "grad_norm": 1.359425663948059, "learning_rate": 9.511947986152157e-06, "loss": 0.021, "step": 22530 }, { "epoch": 0.19032741550737795, "grad_norm": 0.636663019657135, "learning_rate": 9.516169889386137e-06, "loss": 0.0256, "step": 22540 }, { "epoch": 0.1904118553545418, "grad_norm": 0.4693452715873718, "learning_rate": 9.520391792620115e-06, "loss": 0.019, "step": 22550 }, { "epoch": 0.1904962952017057, "grad_norm": 0.3986167907714844, "learning_rate": 9.524613695854091e-06, "loss": 0.0215, "step": 22560 }, { "epoch": 0.19058073504886955, "grad_norm": 0.7101147770881653, "learning_rate": 9.52883559908807e-06, "loss": 0.0273, "step": 22570 }, { "epoch": 0.19066517489603343, "grad_norm": 0.9458107948303223, "learning_rate": 9.533057502322047e-06, "loss": 0.0267, "step": 22580 }, { "epoch": 0.19074961474319732, "grad_norm": 0.9935819506645203, "learning_rate": 9.537279405556026e-06, "loss": 0.0276, "step": 22590 }, { "epoch": 0.19083405459036118, "grad_norm": 0.5251389145851135, "learning_rate": 9.541501308790004e-06, "loss": 0.0193, "step": 22600 }, { "epoch": 0.19091849443752507, "grad_norm": 0.7744702100753784, "learning_rate": 9.54572321202398e-06, "loss": 0.0291, "step": 22610 }, { "epoch": 0.19100293428468895, "grad_norm": 0.9163485169410706, "learning_rate": 9.54994511525796e-06, "loss": 0.0212, "step": 22620 }, { "epoch": 0.1910873741318528, "grad_norm": 0.8671236634254456, "learning_rate": 9.554167018491936e-06, "loss": 0.0246, "step": 22630 }, { "epoch": 0.1911718139790167, "grad_norm": 0.7087061405181885, "learning_rate": 9.558388921725915e-06, "loss": 0.0157, "step": 22640 }, { "epoch": 0.19125625382618058, "grad_norm": 0.5327713489532471, "learning_rate": 9.562610824959893e-06, "loss": 0.0194, "step": 22650 }, { "epoch": 0.19134069367334444, "grad_norm": 0.660888135433197, "learning_rate": 9.566832728193871e-06, "loss": 0.026, "step": 22660 }, { "epoch": 0.19142513352050833, "grad_norm": 0.9530481696128845, "learning_rate": 9.571054631427849e-06, "loss": 0.0245, "step": 22670 }, { "epoch": 0.19150957336767221, "grad_norm": 0.01797301135957241, "learning_rate": 9.575276534661825e-06, "loss": 0.0245, "step": 22680 }, { "epoch": 0.19159401321483607, "grad_norm": 0.8436072468757629, "learning_rate": 9.579498437895805e-06, "loss": 0.0301, "step": 22690 }, { "epoch": 0.19167845306199996, "grad_norm": 0.10149452090263367, "learning_rate": 9.583720341129783e-06, "loss": 0.0271, "step": 22700 }, { "epoch": 0.19176289290916385, "grad_norm": 1.2007874250411987, "learning_rate": 9.58794224436376e-06, "loss": 0.0248, "step": 22710 }, { "epoch": 0.1918473327563277, "grad_norm": 0.8234274983406067, "learning_rate": 9.592164147597738e-06, "loss": 0.0132, "step": 22720 }, { "epoch": 0.1919317726034916, "grad_norm": 0.4797816872596741, "learning_rate": 9.596386050831715e-06, "loss": 0.0198, "step": 22730 }, { "epoch": 0.19201621245065548, "grad_norm": 0.9319807887077332, "learning_rate": 9.600607954065694e-06, "loss": 0.0298, "step": 22740 }, { "epoch": 0.19210065229781934, "grad_norm": 0.6964142322540283, "learning_rate": 9.604829857299672e-06, "loss": 0.023, "step": 22750 }, { "epoch": 0.19218509214498322, "grad_norm": 1.0531154870986938, "learning_rate": 9.60905176053365e-06, "loss": 0.0284, "step": 22760 }, { "epoch": 0.1922695319921471, "grad_norm": 0.5918439626693726, "learning_rate": 9.613273663767628e-06, "loss": 0.0316, "step": 22770 }, { "epoch": 0.19235397183931097, "grad_norm": 0.5283039808273315, "learning_rate": 9.617495567001604e-06, "loss": 0.022, "step": 22780 }, { "epoch": 0.19243841168647485, "grad_norm": 0.6663150191307068, "learning_rate": 9.621717470235583e-06, "loss": 0.0163, "step": 22790 }, { "epoch": 0.1925228515336387, "grad_norm": 1.348347544670105, "learning_rate": 9.625939373469561e-06, "loss": 0.0235, "step": 22800 }, { "epoch": 0.1926072913808026, "grad_norm": 0.7426485419273376, "learning_rate": 9.630161276703539e-06, "loss": 0.0217, "step": 22810 }, { "epoch": 0.19269173122796648, "grad_norm": 0.5220271944999695, "learning_rate": 9.634383179937517e-06, "loss": 0.0221, "step": 22820 }, { "epoch": 0.19277617107513034, "grad_norm": 0.5653102397918701, "learning_rate": 9.638605083171495e-06, "loss": 0.0196, "step": 22830 }, { "epoch": 0.19286061092229423, "grad_norm": 1.4048312902450562, "learning_rate": 9.642826986405473e-06, "loss": 0.0251, "step": 22840 }, { "epoch": 0.19294505076945812, "grad_norm": 0.6588847041130066, "learning_rate": 9.64704888963945e-06, "loss": 0.0223, "step": 22850 }, { "epoch": 0.19302949061662197, "grad_norm": 0.8635783195495605, "learning_rate": 9.651270792873428e-06, "loss": 0.0245, "step": 22860 }, { "epoch": 0.19311393046378586, "grad_norm": 0.6668582558631897, "learning_rate": 9.655492696107406e-06, "loss": 0.0196, "step": 22870 }, { "epoch": 0.19319837031094975, "grad_norm": 0.4657265245914459, "learning_rate": 9.659714599341384e-06, "loss": 0.0189, "step": 22880 }, { "epoch": 0.1932828101581136, "grad_norm": 1.2314321994781494, "learning_rate": 9.663936502575362e-06, "loss": 0.0252, "step": 22890 }, { "epoch": 0.1933672500052775, "grad_norm": 1.1760622262954712, "learning_rate": 9.66815840580934e-06, "loss": 0.023, "step": 22900 }, { "epoch": 0.19345168985244138, "grad_norm": 0.6571716666221619, "learning_rate": 9.672380309043318e-06, "loss": 0.0238, "step": 22910 }, { "epoch": 0.19353612969960524, "grad_norm": 0.05845675989985466, "learning_rate": 9.676602212277296e-06, "loss": 0.019, "step": 22920 }, { "epoch": 0.19362056954676912, "grad_norm": 0.8779283761978149, "learning_rate": 9.680824115511273e-06, "loss": 0.0277, "step": 22930 }, { "epoch": 0.193705009393933, "grad_norm": 0.5394509434700012, "learning_rate": 9.685046018745251e-06, "loss": 0.0221, "step": 22940 }, { "epoch": 0.19378944924109687, "grad_norm": 0.2195930778980255, "learning_rate": 9.689267921979229e-06, "loss": 0.0159, "step": 22950 }, { "epoch": 0.19387388908826075, "grad_norm": 0.5737360119819641, "learning_rate": 9.693489825213207e-06, "loss": 0.0323, "step": 22960 }, { "epoch": 0.19395832893542464, "grad_norm": 0.42423325777053833, "learning_rate": 9.697711728447185e-06, "loss": 0.0226, "step": 22970 }, { "epoch": 0.1940427687825885, "grad_norm": 0.6025111675262451, "learning_rate": 9.701933631681163e-06, "loss": 0.0292, "step": 22980 }, { "epoch": 0.19412720862975238, "grad_norm": 0.4094398319721222, "learning_rate": 9.70615553491514e-06, "loss": 0.0206, "step": 22990 }, { "epoch": 0.19421164847691624, "grad_norm": 0.4955112636089325, "learning_rate": 9.710377438149118e-06, "loss": 0.0229, "step": 23000 }, { "epoch": 0.19429608832408013, "grad_norm": 0.20175540447235107, "learning_rate": 9.714599341383096e-06, "loss": 0.0212, "step": 23010 }, { "epoch": 0.19438052817124402, "grad_norm": 0.6320863366127014, "learning_rate": 9.718821244617074e-06, "loss": 0.0157, "step": 23020 }, { "epoch": 0.19446496801840787, "grad_norm": 0.8761091828346252, "learning_rate": 9.723043147851052e-06, "loss": 0.0234, "step": 23030 }, { "epoch": 0.19454940786557176, "grad_norm": 0.6412445306777954, "learning_rate": 9.72726505108503e-06, "loss": 0.0318, "step": 23040 }, { "epoch": 0.19463384771273565, "grad_norm": 0.055340275168418884, "learning_rate": 9.731486954319008e-06, "loss": 0.0237, "step": 23050 }, { "epoch": 0.1947182875598995, "grad_norm": 0.9011456370353699, "learning_rate": 9.735708857552986e-06, "loss": 0.0143, "step": 23060 }, { "epoch": 0.1948027274070634, "grad_norm": 0.502528727054596, "learning_rate": 9.739930760786963e-06, "loss": 0.0176, "step": 23070 }, { "epoch": 0.19488716725422728, "grad_norm": 0.7807870507240295, "learning_rate": 9.744152664020941e-06, "loss": 0.0181, "step": 23080 }, { "epoch": 0.19497160710139114, "grad_norm": 0.627671480178833, "learning_rate": 9.748374567254919e-06, "loss": 0.0209, "step": 23090 }, { "epoch": 0.19505604694855502, "grad_norm": 0.9701053500175476, "learning_rate": 9.752596470488897e-06, "loss": 0.0181, "step": 23100 }, { "epoch": 0.1951404867957189, "grad_norm": 0.8868230581283569, "learning_rate": 9.756818373722875e-06, "loss": 0.0219, "step": 23110 }, { "epoch": 0.19522492664288277, "grad_norm": 0.5306175351142883, "learning_rate": 9.761040276956853e-06, "loss": 0.0169, "step": 23120 }, { "epoch": 0.19530936649004665, "grad_norm": 1.2526495456695557, "learning_rate": 9.76526218019083e-06, "loss": 0.0217, "step": 23130 }, { "epoch": 0.19539380633721054, "grad_norm": 0.6512631773948669, "learning_rate": 9.769484083424808e-06, "loss": 0.0277, "step": 23140 }, { "epoch": 0.1954782461843744, "grad_norm": 0.7653629183769226, "learning_rate": 9.773705986658786e-06, "loss": 0.0147, "step": 23150 }, { "epoch": 0.19556268603153829, "grad_norm": 0.5152048468589783, "learning_rate": 9.777927889892764e-06, "loss": 0.0223, "step": 23160 }, { "epoch": 0.19564712587870217, "grad_norm": 0.7986600995063782, "learning_rate": 9.782149793126744e-06, "loss": 0.0196, "step": 23170 }, { "epoch": 0.19573156572586603, "grad_norm": 0.8255448937416077, "learning_rate": 9.78637169636072e-06, "loss": 0.0234, "step": 23180 }, { "epoch": 0.19581600557302992, "grad_norm": 0.22833503782749176, "learning_rate": 9.790593599594698e-06, "loss": 0.0135, "step": 23190 }, { "epoch": 0.1959004454201938, "grad_norm": 1.0898727178573608, "learning_rate": 9.794815502828676e-06, "loss": 0.0223, "step": 23200 }, { "epoch": 0.19598488526735766, "grad_norm": 0.6731241345405579, "learning_rate": 9.799037406062653e-06, "loss": 0.0155, "step": 23210 }, { "epoch": 0.19606932511452155, "grad_norm": 0.7127688527107239, "learning_rate": 9.803259309296633e-06, "loss": 0.0263, "step": 23220 }, { "epoch": 0.1961537649616854, "grad_norm": 1.1319520473480225, "learning_rate": 9.807481212530609e-06, "loss": 0.0276, "step": 23230 }, { "epoch": 0.1962382048088493, "grad_norm": 0.6966882944107056, "learning_rate": 9.811703115764587e-06, "loss": 0.019, "step": 23240 }, { "epoch": 0.19632264465601318, "grad_norm": 0.3186330497264862, "learning_rate": 9.815925018998565e-06, "loss": 0.0245, "step": 23250 }, { "epoch": 0.19640708450317704, "grad_norm": 0.7061418294906616, "learning_rate": 9.820146922232543e-06, "loss": 0.0302, "step": 23260 }, { "epoch": 0.19649152435034092, "grad_norm": 0.8877068161964417, "learning_rate": 9.824368825466522e-06, "loss": 0.0149, "step": 23270 }, { "epoch": 0.1965759641975048, "grad_norm": 0.302592396736145, "learning_rate": 9.828590728700498e-06, "loss": 0.0212, "step": 23280 }, { "epoch": 0.19666040404466867, "grad_norm": 1.0097402334213257, "learning_rate": 9.832812631934476e-06, "loss": 0.022, "step": 23290 }, { "epoch": 0.19674484389183255, "grad_norm": 0.8106351494789124, "learning_rate": 9.837034535168454e-06, "loss": 0.0187, "step": 23300 }, { "epoch": 0.19682928373899644, "grad_norm": 0.40141910314559937, "learning_rate": 9.841256438402432e-06, "loss": 0.0144, "step": 23310 }, { "epoch": 0.1969137235861603, "grad_norm": 0.949592113494873, "learning_rate": 9.845478341636412e-06, "loss": 0.0265, "step": 23320 }, { "epoch": 0.19699816343332419, "grad_norm": 0.6583260893821716, "learning_rate": 9.849700244870388e-06, "loss": 0.0187, "step": 23330 }, { "epoch": 0.19708260328048807, "grad_norm": 0.592963695526123, "learning_rate": 9.853922148104367e-06, "loss": 0.0218, "step": 23340 }, { "epoch": 0.19716704312765193, "grad_norm": 0.28028181195259094, "learning_rate": 9.858144051338343e-06, "loss": 0.0157, "step": 23350 }, { "epoch": 0.19725148297481582, "grad_norm": 0.6073331236839294, "learning_rate": 9.862365954572321e-06, "loss": 0.0229, "step": 23360 }, { "epoch": 0.1973359228219797, "grad_norm": 1.0735183954238892, "learning_rate": 9.8665878578063e-06, "loss": 0.0183, "step": 23370 }, { "epoch": 0.19742036266914356, "grad_norm": 1.2742737531661987, "learning_rate": 9.870809761040277e-06, "loss": 0.0201, "step": 23380 }, { "epoch": 0.19750480251630745, "grad_norm": 0.6334354281425476, "learning_rate": 9.875031664274257e-06, "loss": 0.0147, "step": 23390 }, { "epoch": 0.19758924236347133, "grad_norm": 0.9161413908004761, "learning_rate": 9.879253567508233e-06, "loss": 0.0249, "step": 23400 }, { "epoch": 0.1976736822106352, "grad_norm": 0.6330240368843079, "learning_rate": 9.88347547074221e-06, "loss": 0.0345, "step": 23410 }, { "epoch": 0.19775812205779908, "grad_norm": 0.7330021858215332, "learning_rate": 9.88769737397619e-06, "loss": 0.0192, "step": 23420 }, { "epoch": 0.19784256190496297, "grad_norm": 0.6762617230415344, "learning_rate": 9.891919277210166e-06, "loss": 0.0185, "step": 23430 }, { "epoch": 0.19792700175212682, "grad_norm": 0.24051202833652496, "learning_rate": 9.896141180444146e-06, "loss": 0.0228, "step": 23440 }, { "epoch": 0.1980114415992907, "grad_norm": 0.5574269890785217, "learning_rate": 9.900363083678122e-06, "loss": 0.0179, "step": 23450 }, { "epoch": 0.19809588144645457, "grad_norm": 0.42837730050086975, "learning_rate": 9.9045849869121e-06, "loss": 0.0281, "step": 23460 }, { "epoch": 0.19818032129361846, "grad_norm": 0.16687116026878357, "learning_rate": 9.90880689014608e-06, "loss": 0.0101, "step": 23470 }, { "epoch": 0.19826476114078234, "grad_norm": 0.7879504561424255, "learning_rate": 9.913028793380056e-06, "loss": 0.0272, "step": 23480 }, { "epoch": 0.1983492009879462, "grad_norm": 0.42011767625808716, "learning_rate": 9.917250696614035e-06, "loss": 0.0179, "step": 23490 }, { "epoch": 0.1984336408351101, "grad_norm": 0.6823874711990356, "learning_rate": 9.921472599848011e-06, "loss": 0.0169, "step": 23500 }, { "epoch": 0.19851808068227397, "grad_norm": 0.40220847725868225, "learning_rate": 9.925694503081991e-06, "loss": 0.0175, "step": 23510 }, { "epoch": 0.19860252052943783, "grad_norm": 0.7345701456069946, "learning_rate": 9.929916406315969e-06, "loss": 0.0189, "step": 23520 }, { "epoch": 0.19868696037660172, "grad_norm": 0.26998263597488403, "learning_rate": 9.934138309549945e-06, "loss": 0.0186, "step": 23530 }, { "epoch": 0.1987714002237656, "grad_norm": 0.9361869096755981, "learning_rate": 9.938360212783924e-06, "loss": 0.033, "step": 23540 }, { "epoch": 0.19885584007092946, "grad_norm": 0.7143958806991577, "learning_rate": 9.942582116017902e-06, "loss": 0.0384, "step": 23550 }, { "epoch": 0.19894027991809335, "grad_norm": 0.334596186876297, "learning_rate": 9.94680401925188e-06, "loss": 0.014, "step": 23560 }, { "epoch": 0.19902471976525724, "grad_norm": 0.014942856505513191, "learning_rate": 9.951025922485858e-06, "loss": 0.021, "step": 23570 }, { "epoch": 0.1991091596124211, "grad_norm": 1.0111356973648071, "learning_rate": 9.955247825719834e-06, "loss": 0.0312, "step": 23580 }, { "epoch": 0.19919359945958498, "grad_norm": 0.6565834283828735, "learning_rate": 9.959469728953814e-06, "loss": 0.0233, "step": 23590 }, { "epoch": 0.19927803930674887, "grad_norm": 0.5284163355827332, "learning_rate": 9.963691632187792e-06, "loss": 0.0183, "step": 23600 }, { "epoch": 0.19936247915391273, "grad_norm": 1.0039445161819458, "learning_rate": 9.96791353542177e-06, "loss": 0.0258, "step": 23610 }, { "epoch": 0.1994469190010766, "grad_norm": 1.2274736166000366, "learning_rate": 9.972135438655747e-06, "loss": 0.0358, "step": 23620 }, { "epoch": 0.1995313588482405, "grad_norm": 0.6464967131614685, "learning_rate": 9.976357341889723e-06, "loss": 0.0311, "step": 23630 }, { "epoch": 0.19961579869540436, "grad_norm": 0.6917843222618103, "learning_rate": 9.980579245123703e-06, "loss": 0.0173, "step": 23640 }, { "epoch": 0.19970023854256824, "grad_norm": 2.169175863265991, "learning_rate": 9.984801148357681e-06, "loss": 0.0208, "step": 23650 }, { "epoch": 0.1997846783897321, "grad_norm": 0.7171470522880554, "learning_rate": 9.989023051591659e-06, "loss": 0.0234, "step": 23660 }, { "epoch": 0.199869118236896, "grad_norm": 1.184633731842041, "learning_rate": 9.993244954825637e-06, "loss": 0.02, "step": 23670 }, { "epoch": 0.19995355808405987, "grad_norm": 1.2439827919006348, "learning_rate": 9.997466858059614e-06, "loss": 0.0228, "step": 23680 }, { "epoch": 0.20003799793122373, "grad_norm": 1.6860154867172241, "learning_rate": 9.999999991312082e-06, "loss": 0.0219, "step": 23690 }, { "epoch": 0.20012243777838762, "grad_norm": 0.26774707436561584, "learning_rate": 9.999999893573005e-06, "loss": 0.022, "step": 23700 }, { "epoch": 0.2002068776255515, "grad_norm": 0.5438324809074402, "learning_rate": 9.999999687234954e-06, "loss": 0.0189, "step": 23710 }, { "epoch": 0.20029131747271536, "grad_norm": 0.9235499501228333, "learning_rate": 9.999999372297934e-06, "loss": 0.0227, "step": 23720 }, { "epoch": 0.20037575731987925, "grad_norm": 0.9357843995094299, "learning_rate": 9.999998948761954e-06, "loss": 0.0248, "step": 23730 }, { "epoch": 0.20046019716704314, "grad_norm": 0.1804770529270172, "learning_rate": 9.99999841662702e-06, "loss": 0.0225, "step": 23740 }, { "epoch": 0.200544637014207, "grad_norm": 0.8744932413101196, "learning_rate": 9.999997775893145e-06, "loss": 0.0172, "step": 23750 }, { "epoch": 0.20062907686137088, "grad_norm": 0.3906909227371216, "learning_rate": 9.999997026560344e-06, "loss": 0.0234, "step": 23760 }, { "epoch": 0.20071351670853477, "grad_norm": 1.7380638122558594, "learning_rate": 9.999996168628632e-06, "loss": 0.0203, "step": 23770 }, { "epoch": 0.20079795655569863, "grad_norm": 0.3308785557746887, "learning_rate": 9.999995202098026e-06, "loss": 0.0211, "step": 23780 }, { "epoch": 0.2008823964028625, "grad_norm": 1.5441919565200806, "learning_rate": 9.999994126968551e-06, "loss": 0.0322, "step": 23790 }, { "epoch": 0.2009668362500264, "grad_norm": 0.47174009680747986, "learning_rate": 9.999992943240227e-06, "loss": 0.0194, "step": 23800 }, { "epoch": 0.20105127609719026, "grad_norm": 1.4504847526550293, "learning_rate": 9.99999165091308e-06, "loss": 0.0392, "step": 23810 }, { "epoch": 0.20113571594435414, "grad_norm": 0.6154069304466248, "learning_rate": 9.999990249987141e-06, "loss": 0.0156, "step": 23820 }, { "epoch": 0.20122015579151803, "grad_norm": 0.4528435170650482, "learning_rate": 9.999988740462436e-06, "loss": 0.0236, "step": 23830 }, { "epoch": 0.2013045956386819, "grad_norm": 0.7603870630264282, "learning_rate": 9.999987122339002e-06, "loss": 0.0217, "step": 23840 }, { "epoch": 0.20138903548584577, "grad_norm": 1.2501741647720337, "learning_rate": 9.999985395616872e-06, "loss": 0.022, "step": 23850 }, { "epoch": 0.20147347533300966, "grad_norm": 1.3133621215820312, "learning_rate": 9.999983560296083e-06, "loss": 0.0261, "step": 23860 }, { "epoch": 0.20155791518017352, "grad_norm": 1.1488386392593384, "learning_rate": 9.999981616376677e-06, "loss": 0.0184, "step": 23870 }, { "epoch": 0.2016423550273374, "grad_norm": 0.2226329892873764, "learning_rate": 9.999979563858695e-06, "loss": 0.0256, "step": 23880 }, { "epoch": 0.20172679487450126, "grad_norm": 1.083152174949646, "learning_rate": 9.999977402742179e-06, "loss": 0.0274, "step": 23890 }, { "epoch": 0.20181123472166515, "grad_norm": 0.1445496529340744, "learning_rate": 9.99997513302718e-06, "loss": 0.0213, "step": 23900 }, { "epoch": 0.20189567456882904, "grad_norm": 0.7973111271858215, "learning_rate": 9.999972754713746e-06, "loss": 0.0172, "step": 23910 }, { "epoch": 0.2019801144159929, "grad_norm": 0.7316088676452637, "learning_rate": 9.999970267801928e-06, "loss": 0.0256, "step": 23920 }, { "epoch": 0.20206455426315678, "grad_norm": 0.49026650190353394, "learning_rate": 9.99996767229178e-06, "loss": 0.0275, "step": 23930 }, { "epoch": 0.20214899411032067, "grad_norm": 0.9959790110588074, "learning_rate": 9.99996496818336e-06, "loss": 0.0268, "step": 23940 }, { "epoch": 0.20223343395748453, "grad_norm": 0.3520582318305969, "learning_rate": 9.999962155476725e-06, "loss": 0.0173, "step": 23950 }, { "epoch": 0.2023178738046484, "grad_norm": 1.0243306159973145, "learning_rate": 9.999959234171935e-06, "loss": 0.0299, "step": 23960 }, { "epoch": 0.2024023136518123, "grad_norm": 0.27443403005599976, "learning_rate": 9.999956204269057e-06, "loss": 0.018, "step": 23970 }, { "epoch": 0.20248675349897616, "grad_norm": 0.8837739825248718, "learning_rate": 9.999953065768153e-06, "loss": 0.0244, "step": 23980 }, { "epoch": 0.20257119334614004, "grad_norm": 0.36656829714775085, "learning_rate": 9.999949818669295e-06, "loss": 0.0207, "step": 23990 }, { "epoch": 0.20265563319330393, "grad_norm": 1.009102702140808, "learning_rate": 9.99994646297255e-06, "loss": 0.0322, "step": 24000 }, { "epoch": 0.2027400730404678, "grad_norm": 0.6408103108406067, "learning_rate": 9.999942998677993e-06, "loss": 0.0201, "step": 24010 }, { "epoch": 0.20282451288763167, "grad_norm": 0.4783472716808319, "learning_rate": 9.9999394257857e-06, "loss": 0.0345, "step": 24020 }, { "epoch": 0.20290895273479556, "grad_norm": 0.607334315776825, "learning_rate": 9.999935744295746e-06, "loss": 0.0228, "step": 24030 }, { "epoch": 0.20299339258195942, "grad_norm": 0.415075421333313, "learning_rate": 9.999931954208211e-06, "loss": 0.0192, "step": 24040 }, { "epoch": 0.2030778324291233, "grad_norm": 1.5391091108322144, "learning_rate": 9.99992805552318e-06, "loss": 0.0312, "step": 24050 }, { "epoch": 0.2031622722762872, "grad_norm": 0.7180690765380859, "learning_rate": 9.999924048240737e-06, "loss": 0.0322, "step": 24060 }, { "epoch": 0.20324671212345105, "grad_norm": 0.3074902296066284, "learning_rate": 9.999919932360967e-06, "loss": 0.028, "step": 24070 }, { "epoch": 0.20333115197061494, "grad_norm": 1.140161156654358, "learning_rate": 9.99991570788396e-06, "loss": 0.0304, "step": 24080 }, { "epoch": 0.20341559181777882, "grad_norm": 0.4300670027732849, "learning_rate": 9.99991137480981e-06, "loss": 0.0161, "step": 24090 }, { "epoch": 0.20350003166494268, "grad_norm": 0.6494176387786865, "learning_rate": 9.999906933138607e-06, "loss": 0.0173, "step": 24100 }, { "epoch": 0.20358447151210657, "grad_norm": 0.37760135531425476, "learning_rate": 9.999902382870452e-06, "loss": 0.0212, "step": 24110 }, { "epoch": 0.20366891135927043, "grad_norm": 0.38310012221336365, "learning_rate": 9.999897724005443e-06, "loss": 0.0148, "step": 24120 }, { "epoch": 0.2037533512064343, "grad_norm": 0.942863404750824, "learning_rate": 9.999892956543677e-06, "loss": 0.0154, "step": 24130 }, { "epoch": 0.2038377910535982, "grad_norm": 0.6768354773521423, "learning_rate": 9.999888080485263e-06, "loss": 0.0214, "step": 24140 }, { "epoch": 0.20392223090076206, "grad_norm": 0.8627556562423706, "learning_rate": 9.999883095830303e-06, "loss": 0.0181, "step": 24150 }, { "epoch": 0.20400667074792594, "grad_norm": 0.22143392264842987, "learning_rate": 9.999878002578907e-06, "loss": 0.0228, "step": 24160 }, { "epoch": 0.20409111059508983, "grad_norm": 0.6625533699989319, "learning_rate": 9.999872800731186e-06, "loss": 0.0257, "step": 24170 }, { "epoch": 0.2041755504422537, "grad_norm": 1.2319700717926025, "learning_rate": 9.999867490287251e-06, "loss": 0.017, "step": 24180 }, { "epoch": 0.20425999028941758, "grad_norm": 0.42215341329574585, "learning_rate": 9.999862071247221e-06, "loss": 0.016, "step": 24190 }, { "epoch": 0.20434443013658146, "grad_norm": 2.0788235664367676, "learning_rate": 9.99985654361121e-06, "loss": 0.0229, "step": 24200 }, { "epoch": 0.20442886998374532, "grad_norm": 1.0140620470046997, "learning_rate": 9.99985090737934e-06, "loss": 0.0273, "step": 24210 }, { "epoch": 0.2045133098309092, "grad_norm": 0.8672569990158081, "learning_rate": 9.999845162551732e-06, "loss": 0.0204, "step": 24220 }, { "epoch": 0.2045977496780731, "grad_norm": 0.38700470328330994, "learning_rate": 9.999839309128512e-06, "loss": 0.025, "step": 24230 }, { "epoch": 0.20468218952523695, "grad_norm": 0.484343022108078, "learning_rate": 9.999833347109806e-06, "loss": 0.0198, "step": 24240 }, { "epoch": 0.20476662937240084, "grad_norm": 0.5231393575668335, "learning_rate": 9.999827276495746e-06, "loss": 0.0323, "step": 24250 }, { "epoch": 0.20485106921956472, "grad_norm": 1.2726088762283325, "learning_rate": 9.999821097286462e-06, "loss": 0.0311, "step": 24260 }, { "epoch": 0.20493550906672858, "grad_norm": 0.8476937413215637, "learning_rate": 9.999814809482086e-06, "loss": 0.0154, "step": 24270 }, { "epoch": 0.20501994891389247, "grad_norm": 0.5462242960929871, "learning_rate": 9.99980841308276e-06, "loss": 0.0176, "step": 24280 }, { "epoch": 0.20510438876105636, "grad_norm": 0.2827368974685669, "learning_rate": 9.999801908088618e-06, "loss": 0.0252, "step": 24290 }, { "epoch": 0.20518882860822021, "grad_norm": 1.006183385848999, "learning_rate": 9.999795294499802e-06, "loss": 0.0327, "step": 24300 }, { "epoch": 0.2052732684553841, "grad_norm": 0.9341366291046143, "learning_rate": 9.999788572316458e-06, "loss": 0.0359, "step": 24310 }, { "epoch": 0.20535770830254796, "grad_norm": 1.842712163925171, "learning_rate": 9.999781741538733e-06, "loss": 0.0218, "step": 24320 }, { "epoch": 0.20544214814971185, "grad_norm": 1.0756981372833252, "learning_rate": 9.99977480216677e-06, "loss": 0.0183, "step": 24330 }, { "epoch": 0.20552658799687573, "grad_norm": 0.8136707544326782, "learning_rate": 9.999767754200723e-06, "loss": 0.0287, "step": 24340 }, { "epoch": 0.2056110278440396, "grad_norm": 1.033066987991333, "learning_rate": 9.999760597640746e-06, "loss": 0.0242, "step": 24350 }, { "epoch": 0.20569546769120348, "grad_norm": 0.40608954429626465, "learning_rate": 9.999753332486992e-06, "loss": 0.0219, "step": 24360 }, { "epoch": 0.20577990753836736, "grad_norm": 0.5183582901954651, "learning_rate": 9.999745958739622e-06, "loss": 0.0224, "step": 24370 }, { "epoch": 0.20586434738553122, "grad_norm": 0.8530271649360657, "learning_rate": 9.999738476398795e-06, "loss": 0.0247, "step": 24380 }, { "epoch": 0.2059487872326951, "grad_norm": 0.39629894495010376, "learning_rate": 9.99973088546467e-06, "loss": 0.0181, "step": 24390 }, { "epoch": 0.206033227079859, "grad_norm": 0.5583615899085999, "learning_rate": 9.999723185937416e-06, "loss": 0.015, "step": 24400 }, { "epoch": 0.20611766692702285, "grad_norm": 0.7332913875579834, "learning_rate": 9.999715377817199e-06, "loss": 0.026, "step": 24410 }, { "epoch": 0.20620210677418674, "grad_norm": 0.4313439130783081, "learning_rate": 9.999707461104188e-06, "loss": 0.0226, "step": 24420 }, { "epoch": 0.20628654662135062, "grad_norm": 0.49044302105903625, "learning_rate": 9.999699435798557e-06, "loss": 0.0173, "step": 24430 }, { "epoch": 0.20637098646851448, "grad_norm": 0.8466586470603943, "learning_rate": 9.999691301900478e-06, "loss": 0.0245, "step": 24440 }, { "epoch": 0.20645542631567837, "grad_norm": 0.44425851106643677, "learning_rate": 9.999683059410127e-06, "loss": 0.0272, "step": 24450 }, { "epoch": 0.20653986616284226, "grad_norm": 0.5856901407241821, "learning_rate": 9.999674708327686e-06, "loss": 0.0204, "step": 24460 }, { "epoch": 0.20662430601000611, "grad_norm": 0.5189399719238281, "learning_rate": 9.999666248653336e-06, "loss": 0.0171, "step": 24470 }, { "epoch": 0.20670874585717, "grad_norm": 0.37345439195632935, "learning_rate": 9.999657680387257e-06, "loss": 0.0194, "step": 24480 }, { "epoch": 0.2067931857043339, "grad_norm": 0.5634168386459351, "learning_rate": 9.99964900352964e-06, "loss": 0.0161, "step": 24490 }, { "epoch": 0.20687762555149775, "grad_norm": 0.888290286064148, "learning_rate": 9.999640218080671e-06, "loss": 0.0189, "step": 24500 }, { "epoch": 0.20696206539866163, "grad_norm": 0.582158625125885, "learning_rate": 9.99963132404054e-06, "loss": 0.0192, "step": 24510 }, { "epoch": 0.20704650524582552, "grad_norm": 1.3231120109558105, "learning_rate": 9.99962232140944e-06, "loss": 0.0212, "step": 24520 }, { "epoch": 0.20713094509298938, "grad_norm": 0.6610705256462097, "learning_rate": 9.99961321018757e-06, "loss": 0.019, "step": 24530 }, { "epoch": 0.20721538494015326, "grad_norm": 0.5416086316108704, "learning_rate": 9.999603990375125e-06, "loss": 0.024, "step": 24540 }, { "epoch": 0.20729982478731712, "grad_norm": 0.7570158839225769, "learning_rate": 9.999594661972305e-06, "loss": 0.0171, "step": 24550 }, { "epoch": 0.207384264634481, "grad_norm": 0.49400797486305237, "learning_rate": 9.999585224979314e-06, "loss": 0.0224, "step": 24560 }, { "epoch": 0.2074687044816449, "grad_norm": 0.8512254357337952, "learning_rate": 9.999575679396357e-06, "loss": 0.0435, "step": 24570 }, { "epoch": 0.20755314432880875, "grad_norm": 0.8787938356399536, "learning_rate": 9.999566025223639e-06, "loss": 0.0346, "step": 24580 }, { "epoch": 0.20763758417597264, "grad_norm": 0.49991533160209656, "learning_rate": 9.999556262461373e-06, "loss": 0.0191, "step": 24590 }, { "epoch": 0.20772202402313653, "grad_norm": 0.47370073199272156, "learning_rate": 9.99954639110977e-06, "loss": 0.0112, "step": 24600 }, { "epoch": 0.20780646387030038, "grad_norm": 0.5232733488082886, "learning_rate": 9.999536411169042e-06, "loss": 0.0204, "step": 24610 }, { "epoch": 0.20789090371746427, "grad_norm": 0.7604248523712158, "learning_rate": 9.999526322639408e-06, "loss": 0.0219, "step": 24620 }, { "epoch": 0.20797534356462816, "grad_norm": 0.685202419757843, "learning_rate": 9.999516125521088e-06, "loss": 0.0214, "step": 24630 }, { "epoch": 0.20805978341179202, "grad_norm": 0.3828114867210388, "learning_rate": 9.9995058198143e-06, "loss": 0.0263, "step": 24640 }, { "epoch": 0.2081442232589559, "grad_norm": 0.5686129331588745, "learning_rate": 9.999495405519273e-06, "loss": 0.0178, "step": 24650 }, { "epoch": 0.2082286631061198, "grad_norm": 0.3212684392929077, "learning_rate": 9.999484882636228e-06, "loss": 0.0249, "step": 24660 }, { "epoch": 0.20831310295328365, "grad_norm": 0.10122224688529968, "learning_rate": 9.999474251165398e-06, "loss": 0.0223, "step": 24670 }, { "epoch": 0.20839754280044753, "grad_norm": 0.4753769636154175, "learning_rate": 9.99946351110701e-06, "loss": 0.0366, "step": 24680 }, { "epoch": 0.20848198264761142, "grad_norm": 1.5747421979904175, "learning_rate": 9.9994526624613e-06, "loss": 0.0313, "step": 24690 }, { "epoch": 0.20856642249477528, "grad_norm": 0.7882055640220642, "learning_rate": 9.999441705228504e-06, "loss": 0.0249, "step": 24700 }, { "epoch": 0.20865086234193916, "grad_norm": 0.46692579984664917, "learning_rate": 9.999430639408856e-06, "loss": 0.0178, "step": 24710 }, { "epoch": 0.20873530218910305, "grad_norm": 0.7969984412193298, "learning_rate": 9.9994194650026e-06, "loss": 0.024, "step": 24720 }, { "epoch": 0.2088197420362669, "grad_norm": 0.49311351776123047, "learning_rate": 9.999408182009977e-06, "loss": 0.0308, "step": 24730 }, { "epoch": 0.2089041818834308, "grad_norm": 0.6398715972900391, "learning_rate": 9.999396790431233e-06, "loss": 0.0262, "step": 24740 }, { "epoch": 0.20898862173059468, "grad_norm": 0.766343891620636, "learning_rate": 9.999385290266616e-06, "loss": 0.0141, "step": 24750 }, { "epoch": 0.20907306157775854, "grad_norm": 2.351543664932251, "learning_rate": 9.999373681516374e-06, "loss": 0.0243, "step": 24760 }, { "epoch": 0.20915750142492243, "grad_norm": 0.3701601028442383, "learning_rate": 9.999361964180761e-06, "loss": 0.0233, "step": 24770 }, { "epoch": 0.20924194127208628, "grad_norm": 0.543444812297821, "learning_rate": 9.99935013826003e-06, "loss": 0.0286, "step": 24780 }, { "epoch": 0.20932638111925017, "grad_norm": 0.6621018648147583, "learning_rate": 9.999338203754438e-06, "loss": 0.0223, "step": 24790 }, { "epoch": 0.20941082096641406, "grad_norm": 1.4268600940704346, "learning_rate": 9.999326160664246e-06, "loss": 0.0287, "step": 24800 }, { "epoch": 0.20949526081357792, "grad_norm": 0.926680326461792, "learning_rate": 9.999314008989714e-06, "loss": 0.0296, "step": 24810 }, { "epoch": 0.2095797006607418, "grad_norm": 0.5374296307563782, "learning_rate": 9.999301748731105e-06, "loss": 0.0194, "step": 24820 }, { "epoch": 0.2096641405079057, "grad_norm": 0.40276938676834106, "learning_rate": 9.999289379888687e-06, "loss": 0.0174, "step": 24830 }, { "epoch": 0.20974858035506955, "grad_norm": 0.6191152334213257, "learning_rate": 9.99927690246273e-06, "loss": 0.0217, "step": 24840 }, { "epoch": 0.20983302020223343, "grad_norm": 0.8292160630226135, "learning_rate": 9.9992643164535e-06, "loss": 0.0142, "step": 24850 }, { "epoch": 0.20991746004939732, "grad_norm": 0.3971332907676697, "learning_rate": 9.999251621861275e-06, "loss": 0.0184, "step": 24860 }, { "epoch": 0.21000189989656118, "grad_norm": 0.6809071898460388, "learning_rate": 9.99923881868633e-06, "loss": 0.0176, "step": 24870 }, { "epoch": 0.21008633974372506, "grad_norm": 0.2908972203731537, "learning_rate": 9.999225906928942e-06, "loss": 0.017, "step": 24880 }, { "epoch": 0.21017077959088895, "grad_norm": 0.7996467351913452, "learning_rate": 9.999212886589391e-06, "loss": 0.0241, "step": 24890 }, { "epoch": 0.2102552194380528, "grad_norm": 0.39945897459983826, "learning_rate": 9.99919975766796e-06, "loss": 0.0187, "step": 24900 }, { "epoch": 0.2103396592852167, "grad_norm": 0.8938065767288208, "learning_rate": 9.999186520164936e-06, "loss": 0.0194, "step": 24910 }, { "epoch": 0.21042409913238058, "grad_norm": 0.5270742774009705, "learning_rate": 9.999173174080606e-06, "loss": 0.0194, "step": 24920 }, { "epoch": 0.21050853897954444, "grad_norm": 0.2976175844669342, "learning_rate": 9.999159719415257e-06, "loss": 0.0142, "step": 24930 }, { "epoch": 0.21059297882670833, "grad_norm": 0.6227111220359802, "learning_rate": 9.999146156169184e-06, "loss": 0.0152, "step": 24940 }, { "epoch": 0.2106774186738722, "grad_norm": 0.5551165342330933, "learning_rate": 9.99913248434268e-06, "loss": 0.0246, "step": 24950 }, { "epoch": 0.21076185852103607, "grad_norm": 0.5491289496421814, "learning_rate": 9.999118703936045e-06, "loss": 0.0188, "step": 24960 }, { "epoch": 0.21084629836819996, "grad_norm": 0.6508186459541321, "learning_rate": 9.999104814949573e-06, "loss": 0.0202, "step": 24970 }, { "epoch": 0.21093073821536382, "grad_norm": 1.0458903312683105, "learning_rate": 9.999090817383572e-06, "loss": 0.0205, "step": 24980 }, { "epoch": 0.2110151780625277, "grad_norm": 0.7246270775794983, "learning_rate": 9.999076711238343e-06, "loss": 0.024, "step": 24990 }, { "epoch": 0.2110996179096916, "grad_norm": 1.0554957389831543, "learning_rate": 9.99906249651419e-06, "loss": 0.0316, "step": 25000 }, { "epoch": 0.21118405775685545, "grad_norm": 0.5922115445137024, "learning_rate": 9.999048173211426e-06, "loss": 0.0172, "step": 25010 }, { "epoch": 0.21126849760401933, "grad_norm": 0.3617112338542938, "learning_rate": 9.999033741330358e-06, "loss": 0.0223, "step": 25020 }, { "epoch": 0.21135293745118322, "grad_norm": 0.6616569757461548, "learning_rate": 9.999019200871303e-06, "loss": 0.0212, "step": 25030 }, { "epoch": 0.21143737729834708, "grad_norm": 0.29388684034347534, "learning_rate": 9.999004551834575e-06, "loss": 0.0256, "step": 25040 }, { "epoch": 0.21152181714551097, "grad_norm": 0.6055065989494324, "learning_rate": 9.998989794220492e-06, "loss": 0.0279, "step": 25050 }, { "epoch": 0.21160625699267485, "grad_norm": 0.6768965125083923, "learning_rate": 9.998974928029374e-06, "loss": 0.0131, "step": 25060 }, { "epoch": 0.2116906968398387, "grad_norm": 0.8754789233207703, "learning_rate": 9.998959953261548e-06, "loss": 0.014, "step": 25070 }, { "epoch": 0.2117751366870026, "grad_norm": 0.7113556265830994, "learning_rate": 9.998944869917335e-06, "loss": 0.0192, "step": 25080 }, { "epoch": 0.21185957653416648, "grad_norm": 0.6127923130989075, "learning_rate": 9.998929677997063e-06, "loss": 0.0321, "step": 25090 }, { "epoch": 0.21194401638133034, "grad_norm": 0.7083094716072083, "learning_rate": 9.998914377501063e-06, "loss": 0.0191, "step": 25100 }, { "epoch": 0.21202845622849423, "grad_norm": 0.3919692039489746, "learning_rate": 9.998898968429668e-06, "loss": 0.0122, "step": 25110 }, { "epoch": 0.21211289607565811, "grad_norm": 0.4873667061328888, "learning_rate": 9.99888345078321e-06, "loss": 0.0212, "step": 25120 }, { "epoch": 0.21219733592282197, "grad_norm": 1.0319325923919678, "learning_rate": 9.99886782456203e-06, "loss": 0.0218, "step": 25130 }, { "epoch": 0.21228177576998586, "grad_norm": 0.284603476524353, "learning_rate": 9.998852089766463e-06, "loss": 0.016, "step": 25140 }, { "epoch": 0.21236621561714975, "grad_norm": 0.6495558023452759, "learning_rate": 9.998836246396855e-06, "loss": 0.0263, "step": 25150 }, { "epoch": 0.2124506554643136, "grad_norm": 0.10725927352905273, "learning_rate": 9.998820294453548e-06, "loss": 0.0262, "step": 25160 }, { "epoch": 0.2125350953114775, "grad_norm": 1.1660343408584595, "learning_rate": 9.998804233936887e-06, "loss": 0.0115, "step": 25170 }, { "epoch": 0.21261953515864138, "grad_norm": 1.0001658201217651, "learning_rate": 9.998788064847222e-06, "loss": 0.0213, "step": 25180 }, { "epoch": 0.21270397500580523, "grad_norm": 1.156286597251892, "learning_rate": 9.998771787184907e-06, "loss": 0.0216, "step": 25190 }, { "epoch": 0.21278841485296912, "grad_norm": 0.012386438436806202, "learning_rate": 9.998755400950291e-06, "loss": 0.0242, "step": 25200 }, { "epoch": 0.21287285470013298, "grad_norm": 0.6350577473640442, "learning_rate": 9.998738906143733e-06, "loss": 0.0166, "step": 25210 }, { "epoch": 0.21295729454729687, "grad_norm": 0.6332740783691406, "learning_rate": 9.998722302765591e-06, "loss": 0.0178, "step": 25220 }, { "epoch": 0.21304173439446075, "grad_norm": 0.4616744816303253, "learning_rate": 9.998705590816224e-06, "loss": 0.0198, "step": 25230 }, { "epoch": 0.2131261742416246, "grad_norm": 0.575376033782959, "learning_rate": 9.998688770295996e-06, "loss": 0.0256, "step": 25240 }, { "epoch": 0.2132106140887885, "grad_norm": 1.2639626264572144, "learning_rate": 9.998671841205271e-06, "loss": 0.018, "step": 25250 }, { "epoch": 0.21329505393595238, "grad_norm": 0.9767090082168579, "learning_rate": 9.998654803544421e-06, "loss": 0.0323, "step": 25260 }, { "epoch": 0.21337949378311624, "grad_norm": 0.7851904630661011, "learning_rate": 9.99863765731381e-06, "loss": 0.0311, "step": 25270 }, { "epoch": 0.21346393363028013, "grad_norm": 0.5118215084075928, "learning_rate": 9.998620402513815e-06, "loss": 0.0185, "step": 25280 }, { "epoch": 0.21354837347744401, "grad_norm": 0.6743268966674805, "learning_rate": 9.99860303914481e-06, "loss": 0.0244, "step": 25290 }, { "epoch": 0.21363281332460787, "grad_norm": 0.4776153266429901, "learning_rate": 9.998585567207168e-06, "loss": 0.0129, "step": 25300 }, { "epoch": 0.21371725317177176, "grad_norm": 0.7782028913497925, "learning_rate": 9.998567986701275e-06, "loss": 0.0213, "step": 25310 }, { "epoch": 0.21380169301893565, "grad_norm": 0.4659802317619324, "learning_rate": 9.998550297627509e-06, "loss": 0.0198, "step": 25320 }, { "epoch": 0.2138861328660995, "grad_norm": 0.842404842376709, "learning_rate": 9.998532499986252e-06, "loss": 0.0266, "step": 25330 }, { "epoch": 0.2139705727132634, "grad_norm": 0.46465060114860535, "learning_rate": 9.998514593777896e-06, "loss": 0.0246, "step": 25340 }, { "epoch": 0.21405501256042728, "grad_norm": 0.5182591080665588, "learning_rate": 9.998496579002827e-06, "loss": 0.0179, "step": 25350 }, { "epoch": 0.21413945240759114, "grad_norm": 0.3778470456600189, "learning_rate": 9.998478455661437e-06, "loss": 0.0313, "step": 25360 }, { "epoch": 0.21422389225475502, "grad_norm": 0.5335585474967957, "learning_rate": 9.99846022375412e-06, "loss": 0.0124, "step": 25370 }, { "epoch": 0.2143083321019189, "grad_norm": 0.9246655106544495, "learning_rate": 9.99844188328127e-06, "loss": 0.0197, "step": 25380 }, { "epoch": 0.21439277194908277, "grad_norm": 0.288949191570282, "learning_rate": 9.998423434243287e-06, "loss": 0.0194, "step": 25390 }, { "epoch": 0.21447721179624665, "grad_norm": 0.40579119324684143, "learning_rate": 9.99840487664057e-06, "loss": 0.0156, "step": 25400 }, { "epoch": 0.21456165164341054, "grad_norm": 0.5065811276435852, "learning_rate": 9.998386210473526e-06, "loss": 0.0317, "step": 25410 }, { "epoch": 0.2146460914905744, "grad_norm": 0.33290109038352966, "learning_rate": 9.998367435742556e-06, "loss": 0.0107, "step": 25420 }, { "epoch": 0.21473053133773828, "grad_norm": 0.9453659653663635, "learning_rate": 9.998348552448068e-06, "loss": 0.0115, "step": 25430 }, { "epoch": 0.21481497118490214, "grad_norm": 0.4028318226337433, "learning_rate": 9.998329560590477e-06, "loss": 0.0201, "step": 25440 }, { "epoch": 0.21489941103206603, "grad_norm": 0.3988511860370636, "learning_rate": 9.998310460170189e-06, "loss": 0.017, "step": 25450 }, { "epoch": 0.21498385087922992, "grad_norm": 0.5415330529212952, "learning_rate": 9.998291251187625e-06, "loss": 0.0255, "step": 25460 }, { "epoch": 0.21506829072639377, "grad_norm": 0.468393474817276, "learning_rate": 9.998271933643197e-06, "loss": 0.02, "step": 25470 }, { "epoch": 0.21515273057355766, "grad_norm": 0.2821195721626282, "learning_rate": 9.998252507537327e-06, "loss": 0.0219, "step": 25480 }, { "epoch": 0.21523717042072155, "grad_norm": 0.6758138537406921, "learning_rate": 9.998232972870438e-06, "loss": 0.0136, "step": 25490 }, { "epoch": 0.2153216102678854, "grad_norm": 0.657581090927124, "learning_rate": 9.998213329642952e-06, "loss": 0.0218, "step": 25500 }, { "epoch": 0.2154060501150493, "grad_norm": 0.5281630158424377, "learning_rate": 9.998193577855297e-06, "loss": 0.0189, "step": 25510 }, { "epoch": 0.21549048996221318, "grad_norm": 0.7199000716209412, "learning_rate": 9.998173717507903e-06, "loss": 0.0268, "step": 25520 }, { "epoch": 0.21557492980937704, "grad_norm": 0.6132403612136841, "learning_rate": 9.998153748601198e-06, "loss": 0.0137, "step": 25530 }, { "epoch": 0.21565936965654092, "grad_norm": 0.20025554299354553, "learning_rate": 9.99813367113562e-06, "loss": 0.0204, "step": 25540 }, { "epoch": 0.2157438095037048, "grad_norm": 0.6955196857452393, "learning_rate": 9.9981134851116e-06, "loss": 0.0199, "step": 25550 }, { "epoch": 0.21582824935086867, "grad_norm": 0.33561429381370544, "learning_rate": 9.998093190529581e-06, "loss": 0.0265, "step": 25560 }, { "epoch": 0.21591268919803255, "grad_norm": 0.5058835744857788, "learning_rate": 9.998072787390003e-06, "loss": 0.0209, "step": 25570 }, { "epoch": 0.21599712904519644, "grad_norm": 0.4940107464790344, "learning_rate": 9.998052275693307e-06, "loss": 0.023, "step": 25580 }, { "epoch": 0.2160815688923603, "grad_norm": 0.3639446198940277, "learning_rate": 9.99803165543994e-06, "loss": 0.0284, "step": 25590 }, { "epoch": 0.21616600873952418, "grad_norm": 0.5373221039772034, "learning_rate": 9.99801092663035e-06, "loss": 0.0224, "step": 25600 }, { "epoch": 0.21625044858668807, "grad_norm": 0.9673978090286255, "learning_rate": 9.997990089264988e-06, "loss": 0.0191, "step": 25610 }, { "epoch": 0.21633488843385193, "grad_norm": 0.5856829285621643, "learning_rate": 9.997969143344304e-06, "loss": 0.0256, "step": 25620 }, { "epoch": 0.21641932828101582, "grad_norm": 1.147714614868164, "learning_rate": 9.997948088868755e-06, "loss": 0.0249, "step": 25630 }, { "epoch": 0.21650376812817967, "grad_norm": 0.40291470289230347, "learning_rate": 9.997926925838796e-06, "loss": 0.0141, "step": 25640 }, { "epoch": 0.21658820797534356, "grad_norm": 0.29275310039520264, "learning_rate": 9.997905654254889e-06, "loss": 0.0247, "step": 25650 }, { "epoch": 0.21667264782250745, "grad_norm": 0.2729884684085846, "learning_rate": 9.997884274117496e-06, "loss": 0.0165, "step": 25660 }, { "epoch": 0.2167570876696713, "grad_norm": 1.5933003425598145, "learning_rate": 9.997862785427081e-06, "loss": 0.012, "step": 25670 }, { "epoch": 0.2168415275168352, "grad_norm": 0.32527732849121094, "learning_rate": 9.99784118818411e-06, "loss": 0.0325, "step": 25680 }, { "epoch": 0.21692596736399908, "grad_norm": 0.9714996814727783, "learning_rate": 9.997819482389053e-06, "loss": 0.0255, "step": 25690 }, { "epoch": 0.21701040721116294, "grad_norm": 0.48668038845062256, "learning_rate": 9.99779766804238e-06, "loss": 0.0333, "step": 25700 }, { "epoch": 0.21709484705832682, "grad_norm": 0.4242713749408722, "learning_rate": 9.997775745144566e-06, "loss": 0.0209, "step": 25710 }, { "epoch": 0.2171792869054907, "grad_norm": 0.7870621681213379, "learning_rate": 9.997753713696088e-06, "loss": 0.0162, "step": 25720 }, { "epoch": 0.21726372675265457, "grad_norm": 0.5923681259155273, "learning_rate": 9.997731573697422e-06, "loss": 0.0213, "step": 25730 }, { "epoch": 0.21734816659981845, "grad_norm": 1.497139573097229, "learning_rate": 9.997709325149052e-06, "loss": 0.0273, "step": 25740 }, { "epoch": 0.21743260644698234, "grad_norm": 0.41679641604423523, "learning_rate": 9.997686968051459e-06, "loss": 0.0213, "step": 25750 }, { "epoch": 0.2175170462941462, "grad_norm": 0.9581217169761658, "learning_rate": 9.997664502405128e-06, "loss": 0.0276, "step": 25760 }, { "epoch": 0.21760148614131009, "grad_norm": 0.5941908359527588, "learning_rate": 9.99764192821055e-06, "loss": 0.0213, "step": 25770 }, { "epoch": 0.21768592598847397, "grad_norm": 0.46214210987091064, "learning_rate": 9.997619245468213e-06, "loss": 0.0257, "step": 25780 }, { "epoch": 0.21777036583563783, "grad_norm": 0.5864717960357666, "learning_rate": 9.997596454178609e-06, "loss": 0.0249, "step": 25790 }, { "epoch": 0.21785480568280172, "grad_norm": 0.23679296672344208, "learning_rate": 9.997573554342235e-06, "loss": 0.019, "step": 25800 }, { "epoch": 0.2179392455299656, "grad_norm": 0.8176667094230652, "learning_rate": 9.997550545959586e-06, "loss": 0.0323, "step": 25810 }, { "epoch": 0.21802368537712946, "grad_norm": 0.6943109035491943, "learning_rate": 9.997527429031166e-06, "loss": 0.0214, "step": 25820 }, { "epoch": 0.21810812522429335, "grad_norm": 0.5169945955276489, "learning_rate": 9.997504203557472e-06, "loss": 0.0187, "step": 25830 }, { "epoch": 0.21819256507145723, "grad_norm": 0.4942447543144226, "learning_rate": 9.997480869539011e-06, "loss": 0.0272, "step": 25840 }, { "epoch": 0.2182770049186211, "grad_norm": 0.9686113595962524, "learning_rate": 9.997457426976291e-06, "loss": 0.0217, "step": 25850 }, { "epoch": 0.21836144476578498, "grad_norm": 0.7323258519172668, "learning_rate": 9.997433875869819e-06, "loss": 0.0197, "step": 25860 }, { "epoch": 0.21844588461294884, "grad_norm": 0.2513096034526825, "learning_rate": 9.997410216220107e-06, "loss": 0.0246, "step": 25870 }, { "epoch": 0.21853032446011272, "grad_norm": 0.36516088247299194, "learning_rate": 9.99738644802767e-06, "loss": 0.0178, "step": 25880 }, { "epoch": 0.2186147643072766, "grad_norm": 0.2536872327327728, "learning_rate": 9.997362571293023e-06, "loss": 0.0119, "step": 25890 }, { "epoch": 0.21869920415444047, "grad_norm": 0.5898242592811584, "learning_rate": 9.997338586016684e-06, "loss": 0.0177, "step": 25900 }, { "epoch": 0.21878364400160435, "grad_norm": 0.7350232005119324, "learning_rate": 9.997314492199175e-06, "loss": 0.0135, "step": 25910 }, { "epoch": 0.21886808384876824, "grad_norm": 0.3810914158821106, "learning_rate": 9.997290289841021e-06, "loss": 0.0283, "step": 25920 }, { "epoch": 0.2189525236959321, "grad_norm": 0.3073737621307373, "learning_rate": 9.997265978942744e-06, "loss": 0.0227, "step": 25930 }, { "epoch": 0.21903696354309599, "grad_norm": 0.7661053538322449, "learning_rate": 9.997241559504876e-06, "loss": 0.032, "step": 25940 }, { "epoch": 0.21912140339025987, "grad_norm": 0.36939266324043274, "learning_rate": 9.997217031527943e-06, "loss": 0.0169, "step": 25950 }, { "epoch": 0.21920584323742373, "grad_norm": 0.9217997193336487, "learning_rate": 9.997192395012483e-06, "loss": 0.0169, "step": 25960 }, { "epoch": 0.21929028308458762, "grad_norm": 0.3317801356315613, "learning_rate": 9.997167649959027e-06, "loss": 0.0196, "step": 25970 }, { "epoch": 0.2193747229317515, "grad_norm": 0.9270144701004028, "learning_rate": 9.997142796368111e-06, "loss": 0.0262, "step": 25980 }, { "epoch": 0.21945916277891536, "grad_norm": 1.043249487876892, "learning_rate": 9.997117834240283e-06, "loss": 0.0223, "step": 25990 }, { "epoch": 0.21954360262607925, "grad_norm": 0.8988041877746582, "learning_rate": 9.997092763576074e-06, "loss": 0.0266, "step": 26000 }, { "epoch": 0.21962804247324313, "grad_norm": 0.1659495234489441, "learning_rate": 9.997067584376037e-06, "loss": 0.0143, "step": 26010 }, { "epoch": 0.219712482320407, "grad_norm": 0.8065207004547119, "learning_rate": 9.997042296640715e-06, "loss": 0.0196, "step": 26020 }, { "epoch": 0.21979692216757088, "grad_norm": 0.7554645538330078, "learning_rate": 9.997016900370659e-06, "loss": 0.0254, "step": 26030 }, { "epoch": 0.21988136201473477, "grad_norm": 0.4534749388694763, "learning_rate": 9.99699139556642e-06, "loss": 0.0182, "step": 26040 }, { "epoch": 0.21996580186189862, "grad_norm": 1.0162148475646973, "learning_rate": 9.996965782228552e-06, "loss": 0.0196, "step": 26050 }, { "epoch": 0.2200502417090625, "grad_norm": 0.8457193374633789, "learning_rate": 9.996940060357611e-06, "loss": 0.0218, "step": 26060 }, { "epoch": 0.2201346815562264, "grad_norm": 0.7323012351989746, "learning_rate": 9.996914229954155e-06, "loss": 0.021, "step": 26070 }, { "epoch": 0.22021912140339026, "grad_norm": 0.6501046419143677, "learning_rate": 9.996888291018748e-06, "loss": 0.02, "step": 26080 }, { "epoch": 0.22030356125055414, "grad_norm": 0.42148271203041077, "learning_rate": 9.99686224355195e-06, "loss": 0.0231, "step": 26090 }, { "epoch": 0.220388001097718, "grad_norm": 0.35854795575141907, "learning_rate": 9.996836087554329e-06, "loss": 0.0106, "step": 26100 }, { "epoch": 0.2204724409448819, "grad_norm": 0.5628857612609863, "learning_rate": 9.99680982302645e-06, "loss": 0.0131, "step": 26110 }, { "epoch": 0.22055688079204577, "grad_norm": 0.48959001898765564, "learning_rate": 9.996783449968888e-06, "loss": 0.0218, "step": 26120 }, { "epoch": 0.22064132063920963, "grad_norm": 0.6012258529663086, "learning_rate": 9.996756968382211e-06, "loss": 0.0246, "step": 26130 }, { "epoch": 0.22072576048637352, "grad_norm": 0.34981027245521545, "learning_rate": 9.996730378266999e-06, "loss": 0.0226, "step": 26140 }, { "epoch": 0.2208102003335374, "grad_norm": 0.0242458526045084, "learning_rate": 9.996703679623824e-06, "loss": 0.0154, "step": 26150 }, { "epoch": 0.22089464018070126, "grad_norm": 0.3672408163547516, "learning_rate": 9.996676872453271e-06, "loss": 0.0177, "step": 26160 }, { "epoch": 0.22097908002786515, "grad_norm": 0.7642616033554077, "learning_rate": 9.996649956755919e-06, "loss": 0.0181, "step": 26170 }, { "epoch": 0.22106351987502904, "grad_norm": 1.1504219770431519, "learning_rate": 9.996622932532357e-06, "loss": 0.0288, "step": 26180 }, { "epoch": 0.2211479597221929, "grad_norm": 0.5258864760398865, "learning_rate": 9.996595799783166e-06, "loss": 0.0239, "step": 26190 }, { "epoch": 0.22123239956935678, "grad_norm": 0.9298508167266846, "learning_rate": 9.996568558508939e-06, "loss": 0.0373, "step": 26200 }, { "epoch": 0.22131683941652067, "grad_norm": 0.8856810927391052, "learning_rate": 9.996541208710267e-06, "loss": 0.0246, "step": 26210 }, { "epoch": 0.22140127926368453, "grad_norm": 0.47698038816452026, "learning_rate": 9.996513750387744e-06, "loss": 0.0171, "step": 26220 }, { "epoch": 0.2214857191108484, "grad_norm": 0.8455895781517029, "learning_rate": 9.996486183541965e-06, "loss": 0.0201, "step": 26230 }, { "epoch": 0.2215701589580123, "grad_norm": 1.2756915092468262, "learning_rate": 9.99645850817353e-06, "loss": 0.0278, "step": 26240 }, { "epoch": 0.22165459880517616, "grad_norm": 2.30576753616333, "learning_rate": 9.996430724283042e-06, "loss": 0.0172, "step": 26250 }, { "epoch": 0.22173903865234004, "grad_norm": 0.17418089509010315, "learning_rate": 9.9964028318711e-06, "loss": 0.0234, "step": 26260 }, { "epoch": 0.22182347849950393, "grad_norm": 0.2630585730075836, "learning_rate": 9.996374830938314e-06, "loss": 0.0177, "step": 26270 }, { "epoch": 0.2219079183466678, "grad_norm": 0.4102226495742798, "learning_rate": 9.99634672148529e-06, "loss": 0.0221, "step": 26280 }, { "epoch": 0.22199235819383167, "grad_norm": 0.7758890986442566, "learning_rate": 9.996318503512639e-06, "loss": 0.0132, "step": 26290 }, { "epoch": 0.22207679804099553, "grad_norm": 0.4755285978317261, "learning_rate": 9.996290177020974e-06, "loss": 0.0183, "step": 26300 }, { "epoch": 0.22216123788815942, "grad_norm": 0.2072785645723343, "learning_rate": 9.99626174201091e-06, "loss": 0.0181, "step": 26310 }, { "epoch": 0.2222456777353233, "grad_norm": 0.3920679986476898, "learning_rate": 9.996233198483065e-06, "loss": 0.0213, "step": 26320 }, { "epoch": 0.22233011758248716, "grad_norm": 0.5002937316894531, "learning_rate": 9.99620454643806e-06, "loss": 0.0175, "step": 26330 }, { "epoch": 0.22241455742965105, "grad_norm": 0.5496050715446472, "learning_rate": 9.996175785876514e-06, "loss": 0.0134, "step": 26340 }, { "epoch": 0.22249899727681494, "grad_norm": 0.20304206013679504, "learning_rate": 9.996146916799055e-06, "loss": 0.0283, "step": 26350 }, { "epoch": 0.2225834371239788, "grad_norm": 0.8094618916511536, "learning_rate": 9.996117939206309e-06, "loss": 0.0193, "step": 26360 }, { "epoch": 0.22266787697114268, "grad_norm": 0.990069568157196, "learning_rate": 9.996088853098904e-06, "loss": 0.0184, "step": 26370 }, { "epoch": 0.22275231681830657, "grad_norm": 0.748321533203125, "learning_rate": 9.996059658477476e-06, "loss": 0.0248, "step": 26380 }, { "epoch": 0.22283675666547043, "grad_norm": 0.101618193089962, "learning_rate": 9.996030355342652e-06, "loss": 0.0275, "step": 26390 }, { "epoch": 0.2229211965126343, "grad_norm": 1.4041067361831665, "learning_rate": 9.996000943695074e-06, "loss": 0.0222, "step": 26400 }, { "epoch": 0.2230056363597982, "grad_norm": 0.6623303890228271, "learning_rate": 9.995971423535379e-06, "loss": 0.0183, "step": 26410 }, { "epoch": 0.22309007620696206, "grad_norm": 0.3093818724155426, "learning_rate": 9.99594179486421e-06, "loss": 0.0169, "step": 26420 }, { "epoch": 0.22317451605412594, "grad_norm": 0.5611135363578796, "learning_rate": 9.995912057682207e-06, "loss": 0.0234, "step": 26430 }, { "epoch": 0.22325895590128983, "grad_norm": 0.22231921553611755, "learning_rate": 9.995882211990018e-06, "loss": 0.0236, "step": 26440 }, { "epoch": 0.2233433957484537, "grad_norm": 0.5202013850212097, "learning_rate": 9.995852257788291e-06, "loss": 0.0179, "step": 26450 }, { "epoch": 0.22342783559561757, "grad_norm": 1.4457650184631348, "learning_rate": 9.995822195077678e-06, "loss": 0.0292, "step": 26460 }, { "epoch": 0.22351227544278146, "grad_norm": 0.21039637923240662, "learning_rate": 9.995792023858831e-06, "loss": 0.0163, "step": 26470 }, { "epoch": 0.22359671528994532, "grad_norm": 0.36491286754608154, "learning_rate": 9.995761744132403e-06, "loss": 0.0266, "step": 26480 }, { "epoch": 0.2236811551371092, "grad_norm": 1.117435097694397, "learning_rate": 9.995731355899056e-06, "loss": 0.0217, "step": 26490 }, { "epoch": 0.2237655949842731, "grad_norm": 0.7871583104133606, "learning_rate": 9.995700859159445e-06, "loss": 0.0179, "step": 26500 }, { "epoch": 0.22385003483143695, "grad_norm": 0.9296548366546631, "learning_rate": 9.995670253914238e-06, "loss": 0.0223, "step": 26510 }, { "epoch": 0.22393447467860084, "grad_norm": 0.9035691618919373, "learning_rate": 9.995639540164094e-06, "loss": 0.0295, "step": 26520 }, { "epoch": 0.2240189145257647, "grad_norm": 1.3572911024093628, "learning_rate": 9.995608717909684e-06, "loss": 0.0339, "step": 26530 }, { "epoch": 0.22410335437292858, "grad_norm": 0.2690751552581787, "learning_rate": 9.995577787151677e-06, "loss": 0.0122, "step": 26540 }, { "epoch": 0.22418779422009247, "grad_norm": 0.20543324947357178, "learning_rate": 9.995546747890743e-06, "loss": 0.0254, "step": 26550 }, { "epoch": 0.22427223406725633, "grad_norm": 0.7215108871459961, "learning_rate": 9.995515600127558e-06, "loss": 0.0226, "step": 26560 }, { "epoch": 0.2243566739144202, "grad_norm": 0.982671856880188, "learning_rate": 9.995484343862799e-06, "loss": 0.0197, "step": 26570 }, { "epoch": 0.2244411137615841, "grad_norm": 0.33657893538475037, "learning_rate": 9.995452979097142e-06, "loss": 0.0122, "step": 26580 }, { "epoch": 0.22452555360874796, "grad_norm": 0.6894280314445496, "learning_rate": 9.995421505831271e-06, "loss": 0.0181, "step": 26590 }, { "epoch": 0.22460999345591184, "grad_norm": 1.331727147102356, "learning_rate": 9.995389924065868e-06, "loss": 0.0218, "step": 26600 }, { "epoch": 0.22469443330307573, "grad_norm": 0.5049757361412048, "learning_rate": 9.99535823380162e-06, "loss": 0.0168, "step": 26610 }, { "epoch": 0.2247788731502396, "grad_norm": 0.663317084312439, "learning_rate": 9.995326435039214e-06, "loss": 0.025, "step": 26620 }, { "epoch": 0.22486331299740348, "grad_norm": 0.7517447471618652, "learning_rate": 9.99529452777934e-06, "loss": 0.0212, "step": 26630 }, { "epoch": 0.22494775284456736, "grad_norm": 0.7296304702758789, "learning_rate": 9.995262512022696e-06, "loss": 0.0291, "step": 26640 }, { "epoch": 0.22503219269173122, "grad_norm": 0.7156190872192383, "learning_rate": 9.995230387769971e-06, "loss": 0.0196, "step": 26650 }, { "epoch": 0.2251166325388951, "grad_norm": 0.5790315270423889, "learning_rate": 9.995198155021868e-06, "loss": 0.0157, "step": 26660 }, { "epoch": 0.225201072386059, "grad_norm": 0.41718602180480957, "learning_rate": 9.995165813779082e-06, "loss": 0.0156, "step": 26670 }, { "epoch": 0.22528551223322285, "grad_norm": 0.6578715443611145, "learning_rate": 9.99513336404232e-06, "loss": 0.0216, "step": 26680 }, { "epoch": 0.22536995208038674, "grad_norm": 0.7191850543022156, "learning_rate": 9.995100805812286e-06, "loss": 0.0181, "step": 26690 }, { "epoch": 0.22545439192755062, "grad_norm": 0.33948010206222534, "learning_rate": 9.995068139089684e-06, "loss": 0.0192, "step": 26700 }, { "epoch": 0.22553883177471448, "grad_norm": 0.5685214996337891, "learning_rate": 9.995035363875224e-06, "loss": 0.0292, "step": 26710 }, { "epoch": 0.22562327162187837, "grad_norm": 0.7468769550323486, "learning_rate": 9.995002480169624e-06, "loss": 0.0184, "step": 26720 }, { "epoch": 0.22570771146904225, "grad_norm": 0.6937370896339417, "learning_rate": 9.99496948797359e-06, "loss": 0.0209, "step": 26730 }, { "epoch": 0.2257921513162061, "grad_norm": 0.5411621928215027, "learning_rate": 9.994936387287844e-06, "loss": 0.0191, "step": 26740 }, { "epoch": 0.22587659116337, "grad_norm": 0.6977248787879944, "learning_rate": 9.994903178113104e-06, "loss": 0.0175, "step": 26750 }, { "epoch": 0.22596103101053386, "grad_norm": 0.28987419605255127, "learning_rate": 9.99486986045009e-06, "loss": 0.0168, "step": 26760 }, { "epoch": 0.22604547085769774, "grad_norm": 0.9230344295501709, "learning_rate": 9.994836434299525e-06, "loss": 0.0214, "step": 26770 }, { "epoch": 0.22612991070486163, "grad_norm": 1.2326431274414062, "learning_rate": 9.994802899662137e-06, "loss": 0.0231, "step": 26780 }, { "epoch": 0.2262143505520255, "grad_norm": 0.38119468092918396, "learning_rate": 9.994769256538653e-06, "loss": 0.0205, "step": 26790 }, { "epoch": 0.22629879039918938, "grad_norm": 0.26117193698883057, "learning_rate": 9.994735504929804e-06, "loss": 0.0185, "step": 26800 }, { "epoch": 0.22638323024635326, "grad_norm": 0.3642174303531647, "learning_rate": 9.994701644836324e-06, "loss": 0.0171, "step": 26810 }, { "epoch": 0.22646767009351712, "grad_norm": 0.44568803906440735, "learning_rate": 9.994667676258948e-06, "loss": 0.0221, "step": 26820 }, { "epoch": 0.226552109940681, "grad_norm": 0.37159228324890137, "learning_rate": 9.994633599198413e-06, "loss": 0.0214, "step": 26830 }, { "epoch": 0.2266365497878449, "grad_norm": 0.9071556329727173, "learning_rate": 9.99459941365546e-06, "loss": 0.0242, "step": 26840 }, { "epoch": 0.22672098963500875, "grad_norm": 0.6229547262191772, "learning_rate": 9.994565119630832e-06, "loss": 0.021, "step": 26850 }, { "epoch": 0.22680542948217264, "grad_norm": 0.5916224122047424, "learning_rate": 9.994530717125272e-06, "loss": 0.0188, "step": 26860 }, { "epoch": 0.22688986932933652, "grad_norm": 0.7839229702949524, "learning_rate": 9.99449620613953e-06, "loss": 0.0158, "step": 26870 }, { "epoch": 0.22697430917650038, "grad_norm": 0.5083691477775574, "learning_rate": 9.994461586674351e-06, "loss": 0.0198, "step": 26880 }, { "epoch": 0.22705874902366427, "grad_norm": 0.9338452816009521, "learning_rate": 9.99442685873049e-06, "loss": 0.0335, "step": 26890 }, { "epoch": 0.22714318887082816, "grad_norm": 0.25168415904045105, "learning_rate": 9.994392022308703e-06, "loss": 0.0167, "step": 26900 }, { "epoch": 0.22722762871799201, "grad_norm": 0.46277931332588196, "learning_rate": 9.994357077409746e-06, "loss": 0.0187, "step": 26910 }, { "epoch": 0.2273120685651559, "grad_norm": 0.7370343804359436, "learning_rate": 9.994322024034374e-06, "loss": 0.0316, "step": 26920 }, { "epoch": 0.2273965084123198, "grad_norm": 0.8419890999794006, "learning_rate": 9.994286862183351e-06, "loss": 0.0193, "step": 26930 }, { "epoch": 0.22748094825948365, "grad_norm": 0.32417795062065125, "learning_rate": 9.994251591857443e-06, "loss": 0.0167, "step": 26940 }, { "epoch": 0.22756538810664753, "grad_norm": 0.9056411981582642, "learning_rate": 9.994216213057413e-06, "loss": 0.0197, "step": 26950 }, { "epoch": 0.2276498279538114, "grad_norm": 0.7615956664085388, "learning_rate": 9.994180725784028e-06, "loss": 0.0217, "step": 26960 }, { "epoch": 0.22773426780097528, "grad_norm": 0.22266970574855804, "learning_rate": 9.994145130038065e-06, "loss": 0.0181, "step": 26970 }, { "epoch": 0.22781870764813916, "grad_norm": 0.7149803042411804, "learning_rate": 9.994109425820289e-06, "loss": 0.0237, "step": 26980 }, { "epoch": 0.22790314749530302, "grad_norm": 0.680219829082489, "learning_rate": 9.994073613131483e-06, "loss": 0.0213, "step": 26990 }, { "epoch": 0.2279875873424669, "grad_norm": 0.43211641907691956, "learning_rate": 9.99403769197242e-06, "loss": 0.0261, "step": 27000 }, { "epoch": 0.2280720271896308, "grad_norm": 1.1136538982391357, "learning_rate": 9.994001662343882e-06, "loss": 0.0268, "step": 27010 }, { "epoch": 0.22815646703679465, "grad_norm": 0.6474602818489075, "learning_rate": 9.993965524246652e-06, "loss": 0.0217, "step": 27020 }, { "epoch": 0.22824090688395854, "grad_norm": 0.529000461101532, "learning_rate": 9.993929277681512e-06, "loss": 0.0295, "step": 27030 }, { "epoch": 0.22832534673112242, "grad_norm": 0.5573339462280273, "learning_rate": 9.993892922649253e-06, "loss": 0.0185, "step": 27040 }, { "epoch": 0.22840978657828628, "grad_norm": 0.6101168394088745, "learning_rate": 9.993856459150664e-06, "loss": 0.0149, "step": 27050 }, { "epoch": 0.22849422642545017, "grad_norm": 0.09141503274440765, "learning_rate": 9.993819887186533e-06, "loss": 0.0122, "step": 27060 }, { "epoch": 0.22857866627261406, "grad_norm": 0.682431161403656, "learning_rate": 9.99378320675766e-06, "loss": 0.0157, "step": 27070 }, { "epoch": 0.22866310611977791, "grad_norm": 0.17229408025741577, "learning_rate": 9.993746417864837e-06, "loss": 0.0092, "step": 27080 }, { "epoch": 0.2287475459669418, "grad_norm": 0.7031053304672241, "learning_rate": 9.993709520508868e-06, "loss": 0.02, "step": 27090 }, { "epoch": 0.2288319858141057, "grad_norm": 0.47848960757255554, "learning_rate": 9.99367251469055e-06, "loss": 0.0209, "step": 27100 }, { "epoch": 0.22891642566126955, "grad_norm": 0.31518542766571045, "learning_rate": 9.993635400410688e-06, "loss": 0.0156, "step": 27110 }, { "epoch": 0.22900086550843343, "grad_norm": 0.5510332584381104, "learning_rate": 9.99359817767009e-06, "loss": 0.0328, "step": 27120 }, { "epoch": 0.22908530535559732, "grad_norm": 1.1776283979415894, "learning_rate": 9.993560846469561e-06, "loss": 0.0289, "step": 27130 }, { "epoch": 0.22916974520276118, "grad_norm": 0.38962098956108093, "learning_rate": 9.993523406809914e-06, "loss": 0.0189, "step": 27140 }, { "epoch": 0.22925418504992506, "grad_norm": 0.6376552581787109, "learning_rate": 9.993485858691962e-06, "loss": 0.0147, "step": 27150 }, { "epoch": 0.22933862489708895, "grad_norm": 1.1078087091445923, "learning_rate": 9.99344820211652e-06, "loss": 0.0231, "step": 27160 }, { "epoch": 0.2294230647442528, "grad_norm": 0.9274165630340576, "learning_rate": 9.993410437084407e-06, "loss": 0.0284, "step": 27170 }, { "epoch": 0.2295075045914167, "grad_norm": 0.47144758701324463, "learning_rate": 9.993372563596443e-06, "loss": 0.0207, "step": 27180 }, { "epoch": 0.22959194443858055, "grad_norm": 0.5418394804000854, "learning_rate": 9.993334581653448e-06, "loss": 0.0178, "step": 27190 }, { "epoch": 0.22967638428574444, "grad_norm": 0.5045050382614136, "learning_rate": 9.993296491256252e-06, "loss": 0.0125, "step": 27200 }, { "epoch": 0.22976082413290833, "grad_norm": 0.5375857353210449, "learning_rate": 9.993258292405678e-06, "loss": 0.0131, "step": 27210 }, { "epoch": 0.22984526398007218, "grad_norm": 0.39772942662239075, "learning_rate": 9.993219985102558e-06, "loss": 0.0259, "step": 27220 }, { "epoch": 0.22992970382723607, "grad_norm": 0.5070903301239014, "learning_rate": 9.993181569347721e-06, "loss": 0.0235, "step": 27230 }, { "epoch": 0.23001414367439996, "grad_norm": 0.9910537004470825, "learning_rate": 9.993143045142004e-06, "loss": 0.0192, "step": 27240 }, { "epoch": 0.23009858352156382, "grad_norm": 0.8872367143630981, "learning_rate": 9.993104412486245e-06, "loss": 0.0184, "step": 27250 }, { "epoch": 0.2301830233687277, "grad_norm": 0.2703758776187897, "learning_rate": 9.99306567138128e-06, "loss": 0.0151, "step": 27260 }, { "epoch": 0.2302674632158916, "grad_norm": 0.34444156289100647, "learning_rate": 9.993026821827954e-06, "loss": 0.0211, "step": 27270 }, { "epoch": 0.23035190306305545, "grad_norm": 0.5229703187942505, "learning_rate": 9.992987863827105e-06, "loss": 0.0155, "step": 27280 }, { "epoch": 0.23043634291021933, "grad_norm": 0.5942719578742981, "learning_rate": 9.992948797379586e-06, "loss": 0.029, "step": 27290 }, { "epoch": 0.23052078275738322, "grad_norm": 0.36972057819366455, "learning_rate": 9.99290962248624e-06, "loss": 0.0119, "step": 27300 }, { "epoch": 0.23060522260454708, "grad_norm": 0.2538045048713684, "learning_rate": 9.992870339147921e-06, "loss": 0.0105, "step": 27310 }, { "epoch": 0.23068966245171096, "grad_norm": 0.7615702152252197, "learning_rate": 9.992830947365481e-06, "loss": 0.0177, "step": 27320 }, { "epoch": 0.23077410229887485, "grad_norm": 0.061507903039455414, "learning_rate": 9.992791447139778e-06, "loss": 0.0187, "step": 27330 }, { "epoch": 0.2308585421460387, "grad_norm": 0.266486257314682, "learning_rate": 9.992751838471666e-06, "loss": 0.0177, "step": 27340 }, { "epoch": 0.2309429819932026, "grad_norm": 1.0720938444137573, "learning_rate": 9.992712121362008e-06, "loss": 0.0211, "step": 27350 }, { "epoch": 0.23102742184036648, "grad_norm": 0.723011314868927, "learning_rate": 9.992672295811664e-06, "loss": 0.0223, "step": 27360 }, { "epoch": 0.23111186168753034, "grad_norm": 1.161595106124878, "learning_rate": 9.992632361821501e-06, "loss": 0.0199, "step": 27370 }, { "epoch": 0.23119630153469423, "grad_norm": 0.4722878336906433, "learning_rate": 9.992592319392388e-06, "loss": 0.018, "step": 27380 }, { "epoch": 0.2312807413818581, "grad_norm": 0.06015455350279808, "learning_rate": 9.992552168525193e-06, "loss": 0.0186, "step": 27390 }, { "epoch": 0.23136518122902197, "grad_norm": 0.3877563178539276, "learning_rate": 9.992511909220786e-06, "loss": 0.013, "step": 27400 }, { "epoch": 0.23144962107618586, "grad_norm": 0.24039201438426971, "learning_rate": 9.992471541480044e-06, "loss": 0.0174, "step": 27410 }, { "epoch": 0.23153406092334972, "grad_norm": 1.1249136924743652, "learning_rate": 9.992431065303844e-06, "loss": 0.0275, "step": 27420 }, { "epoch": 0.2316185007705136, "grad_norm": 0.5762504935264587, "learning_rate": 9.992390480693065e-06, "loss": 0.0223, "step": 27430 }, { "epoch": 0.2317029406176775, "grad_norm": 0.38406461477279663, "learning_rate": 9.992349787648587e-06, "loss": 0.0162, "step": 27440 }, { "epoch": 0.23178738046484135, "grad_norm": 0.8563830852508545, "learning_rate": 9.992308986171295e-06, "loss": 0.0217, "step": 27450 }, { "epoch": 0.23187182031200523, "grad_norm": 0.9686345458030701, "learning_rate": 9.992268076262073e-06, "loss": 0.0263, "step": 27460 }, { "epoch": 0.23195626015916912, "grad_norm": 0.8781335949897766, "learning_rate": 9.992227057921813e-06, "loss": 0.0261, "step": 27470 }, { "epoch": 0.23204070000633298, "grad_norm": 0.9198773503303528, "learning_rate": 9.992185931151404e-06, "loss": 0.0144, "step": 27480 }, { "epoch": 0.23212513985349686, "grad_norm": 0.2521646022796631, "learning_rate": 9.99214469595174e-06, "loss": 0.0196, "step": 27490 }, { "epoch": 0.23220957970066075, "grad_norm": 0.6189895272254944, "learning_rate": 9.992103352323716e-06, "loss": 0.0185, "step": 27500 }, { "epoch": 0.2322940195478246, "grad_norm": 0.5302832722663879, "learning_rate": 9.992061900268228e-06, "loss": 0.0137, "step": 27510 }, { "epoch": 0.2323784593949885, "grad_norm": 0.7135818004608154, "learning_rate": 9.99202033978618e-06, "loss": 0.0197, "step": 27520 }, { "epoch": 0.23246289924215238, "grad_norm": 0.5184612274169922, "learning_rate": 9.991978670878475e-06, "loss": 0.021, "step": 27530 }, { "epoch": 0.23254733908931624, "grad_norm": 0.9389841556549072, "learning_rate": 9.991936893546015e-06, "loss": 0.0179, "step": 27540 }, { "epoch": 0.23263177893648013, "grad_norm": 0.6279402375221252, "learning_rate": 9.99189500778971e-06, "loss": 0.021, "step": 27550 }, { "epoch": 0.232716218783644, "grad_norm": 0.28801795840263367, "learning_rate": 9.991853013610466e-06, "loss": 0.0223, "step": 27560 }, { "epoch": 0.23280065863080787, "grad_norm": 0.2966119050979614, "learning_rate": 9.991810911009197e-06, "loss": 0.0133, "step": 27570 }, { "epoch": 0.23288509847797176, "grad_norm": 1.17726731300354, "learning_rate": 9.991768699986819e-06, "loss": 0.0371, "step": 27580 }, { "epoch": 0.23296953832513564, "grad_norm": 0.473028302192688, "learning_rate": 9.991726380544248e-06, "loss": 0.0147, "step": 27590 }, { "epoch": 0.2330539781722995, "grad_norm": 0.4156632125377655, "learning_rate": 9.991683952682403e-06, "loss": 0.0173, "step": 27600 }, { "epoch": 0.2331384180194634, "grad_norm": 0.6980392932891846, "learning_rate": 9.991641416402206e-06, "loss": 0.0274, "step": 27610 }, { "epoch": 0.23322285786662725, "grad_norm": 0.46748781204223633, "learning_rate": 9.99159877170458e-06, "loss": 0.0226, "step": 27620 }, { "epoch": 0.23330729771379113, "grad_norm": 0.2849699556827545, "learning_rate": 9.991556018590451e-06, "loss": 0.0152, "step": 27630 }, { "epoch": 0.23339173756095502, "grad_norm": 0.7996003031730652, "learning_rate": 9.991513157060749e-06, "loss": 0.0213, "step": 27640 }, { "epoch": 0.23347617740811888, "grad_norm": 0.8095079064369202, "learning_rate": 9.991470187116402e-06, "loss": 0.0278, "step": 27650 }, { "epoch": 0.23356061725528277, "grad_norm": 1.1581815481185913, "learning_rate": 9.991427108758347e-06, "loss": 0.0254, "step": 27660 }, { "epoch": 0.23364505710244665, "grad_norm": 0.46989116072654724, "learning_rate": 9.991383921987519e-06, "loss": 0.0239, "step": 27670 }, { "epoch": 0.2337294969496105, "grad_norm": 1.2644532918930054, "learning_rate": 9.991340626804853e-06, "loss": 0.0359, "step": 27680 }, { "epoch": 0.2338139367967744, "grad_norm": 0.8619560599327087, "learning_rate": 9.991297223211292e-06, "loss": 0.0154, "step": 27690 }, { "epoch": 0.23389837664393828, "grad_norm": 0.5236608982086182, "learning_rate": 9.991253711207777e-06, "loss": 0.0207, "step": 27700 }, { "epoch": 0.23398281649110214, "grad_norm": 0.5299783945083618, "learning_rate": 9.991210090795256e-06, "loss": 0.0314, "step": 27710 }, { "epoch": 0.23406725633826603, "grad_norm": 0.702595591545105, "learning_rate": 9.991166361974675e-06, "loss": 0.0168, "step": 27720 }, { "epoch": 0.23415169618542991, "grad_norm": 0.4380881190299988, "learning_rate": 9.991122524746984e-06, "loss": 0.0323, "step": 27730 }, { "epoch": 0.23423613603259377, "grad_norm": 0.42142269015312195, "learning_rate": 9.99107857911313e-06, "loss": 0.0131, "step": 27740 }, { "epoch": 0.23432057587975766, "grad_norm": 0.43106281757354736, "learning_rate": 9.991034525074077e-06, "loss": 0.0182, "step": 27750 }, { "epoch": 0.23440501572692155, "grad_norm": 0.6722537875175476, "learning_rate": 9.990990362630775e-06, "loss": 0.0214, "step": 27760 }, { "epoch": 0.2344894555740854, "grad_norm": 0.5632750391960144, "learning_rate": 9.990946091784186e-06, "loss": 0.0143, "step": 27770 }, { "epoch": 0.2345738954212493, "grad_norm": 0.8654385805130005, "learning_rate": 9.99090171253527e-06, "loss": 0.0188, "step": 27780 }, { "epoch": 0.23465833526841318, "grad_norm": 0.5442235469818115, "learning_rate": 9.990857224884991e-06, "loss": 0.0226, "step": 27790 }, { "epoch": 0.23474277511557703, "grad_norm": 0.4992046356201172, "learning_rate": 9.990812628834316e-06, "loss": 0.0246, "step": 27800 }, { "epoch": 0.23482721496274092, "grad_norm": 0.7370325922966003, "learning_rate": 9.990767924384214e-06, "loss": 0.0103, "step": 27810 }, { "epoch": 0.2349116548099048, "grad_norm": 0.556958794593811, "learning_rate": 9.990723111535654e-06, "loss": 0.0165, "step": 27820 }, { "epoch": 0.23499609465706867, "grad_norm": 0.7070615887641907, "learning_rate": 9.990678190289611e-06, "loss": 0.0124, "step": 27830 }, { "epoch": 0.23508053450423255, "grad_norm": 0.5641347169876099, "learning_rate": 9.990633160647062e-06, "loss": 0.0379, "step": 27840 }, { "epoch": 0.2351649743513964, "grad_norm": 0.4146539866924286, "learning_rate": 9.990588022608982e-06, "loss": 0.0154, "step": 27850 }, { "epoch": 0.2352494141985603, "grad_norm": 0.7874208092689514, "learning_rate": 9.990542776176354e-06, "loss": 0.012, "step": 27860 }, { "epoch": 0.23533385404572418, "grad_norm": 0.9576641321182251, "learning_rate": 9.990497421350159e-06, "loss": 0.0261, "step": 27870 }, { "epoch": 0.23541829389288804, "grad_norm": 0.5383537411689758, "learning_rate": 9.990451958131382e-06, "loss": 0.0164, "step": 27880 }, { "epoch": 0.23550273374005193, "grad_norm": 0.47079119086265564, "learning_rate": 9.990406386521012e-06, "loss": 0.0225, "step": 27890 }, { "epoch": 0.23558717358721581, "grad_norm": 0.40660324692726135, "learning_rate": 9.990360706520036e-06, "loss": 0.0218, "step": 27900 }, { "epoch": 0.23567161343437967, "grad_norm": 0.7700371742248535, "learning_rate": 9.990314918129452e-06, "loss": 0.0172, "step": 27910 }, { "epoch": 0.23575605328154356, "grad_norm": 0.3443540930747986, "learning_rate": 9.990269021350248e-06, "loss": 0.0201, "step": 27920 }, { "epoch": 0.23584049312870745, "grad_norm": 0.40735599398612976, "learning_rate": 9.990223016183423e-06, "loss": 0.0167, "step": 27930 }, { "epoch": 0.2359249329758713, "grad_norm": 1.1383869647979736, "learning_rate": 9.990176902629978e-06, "loss": 0.0207, "step": 27940 }, { "epoch": 0.2360093728230352, "grad_norm": 1.536368727684021, "learning_rate": 9.990130680690911e-06, "loss": 0.0269, "step": 27950 }, { "epoch": 0.23609381267019908, "grad_norm": 0.3662489056587219, "learning_rate": 9.99008435036723e-06, "loss": 0.0146, "step": 27960 }, { "epoch": 0.23617825251736294, "grad_norm": 0.9106585383415222, "learning_rate": 9.99003791165994e-06, "loss": 0.0178, "step": 27970 }, { "epoch": 0.23626269236452682, "grad_norm": 0.3988121449947357, "learning_rate": 9.989991364570048e-06, "loss": 0.0137, "step": 27980 }, { "epoch": 0.2363471322116907, "grad_norm": 0.2876858413219452, "learning_rate": 9.989944709098568e-06, "loss": 0.0175, "step": 27990 }, { "epoch": 0.23643157205885457, "grad_norm": 0.4107872545719147, "learning_rate": 9.989897945246508e-06, "loss": 0.0231, "step": 28000 }, { "epoch": 0.23651601190601845, "grad_norm": 0.29103949666023254, "learning_rate": 9.989851073014889e-06, "loss": 0.0131, "step": 28010 }, { "epoch": 0.23660045175318234, "grad_norm": 0.5893530249595642, "learning_rate": 9.989804092404728e-06, "loss": 0.0276, "step": 28020 }, { "epoch": 0.2366848916003462, "grad_norm": 0.5361539721488953, "learning_rate": 9.989757003417044e-06, "loss": 0.0208, "step": 28030 }, { "epoch": 0.23676933144751008, "grad_norm": 0.4647360146045685, "learning_rate": 9.989709806052861e-06, "loss": 0.016, "step": 28040 }, { "epoch": 0.23685377129467397, "grad_norm": 0.49575650691986084, "learning_rate": 9.989662500313202e-06, "loss": 0.0244, "step": 28050 }, { "epoch": 0.23693821114183783, "grad_norm": 0.5283940434455872, "learning_rate": 9.989615086199097e-06, "loss": 0.0175, "step": 28060 }, { "epoch": 0.23702265098900172, "grad_norm": 0.2981789708137512, "learning_rate": 9.989567563711575e-06, "loss": 0.0234, "step": 28070 }, { "epoch": 0.23710709083616557, "grad_norm": 1.1213611364364624, "learning_rate": 9.989519932851668e-06, "loss": 0.0201, "step": 28080 }, { "epoch": 0.23719153068332946, "grad_norm": 0.28391048312187195, "learning_rate": 9.989472193620408e-06, "loss": 0.023, "step": 28090 }, { "epoch": 0.23727597053049335, "grad_norm": 1.047142505645752, "learning_rate": 9.989424346018838e-06, "loss": 0.0217, "step": 28100 }, { "epoch": 0.2373604103776572, "grad_norm": 1.0090724229812622, "learning_rate": 9.989376390047992e-06, "loss": 0.0195, "step": 28110 }, { "epoch": 0.2374448502248211, "grad_norm": 0.7084403038024902, "learning_rate": 9.989328325708913e-06, "loss": 0.0364, "step": 28120 }, { "epoch": 0.23752929007198498, "grad_norm": 0.5663070678710938, "learning_rate": 9.989280153002646e-06, "loss": 0.0229, "step": 28130 }, { "epoch": 0.23761372991914884, "grad_norm": 0.5628429651260376, "learning_rate": 9.989231871930236e-06, "loss": 0.0256, "step": 28140 }, { "epoch": 0.23769816976631272, "grad_norm": 0.6185563206672668, "learning_rate": 9.989183482492731e-06, "loss": 0.031, "step": 28150 }, { "epoch": 0.2377826096134766, "grad_norm": 0.3084894120693207, "learning_rate": 9.989134984691185e-06, "loss": 0.0196, "step": 28160 }, { "epoch": 0.23786704946064047, "grad_norm": 0.9319602847099304, "learning_rate": 9.98908637852665e-06, "loss": 0.0253, "step": 28170 }, { "epoch": 0.23795148930780435, "grad_norm": 0.6613240838050842, "learning_rate": 9.989037664000178e-06, "loss": 0.0219, "step": 28180 }, { "epoch": 0.23803592915496824, "grad_norm": 0.6750952005386353, "learning_rate": 9.988988841112832e-06, "loss": 0.0174, "step": 28190 }, { "epoch": 0.2381203690021321, "grad_norm": 0.40647444128990173, "learning_rate": 9.988939909865672e-06, "loss": 0.0151, "step": 28200 }, { "epoch": 0.23820480884929598, "grad_norm": 1.0055055618286133, "learning_rate": 9.988890870259757e-06, "loss": 0.0292, "step": 28210 }, { "epoch": 0.23828924869645987, "grad_norm": 0.6529734134674072, "learning_rate": 9.988841722296157e-06, "loss": 0.0263, "step": 28220 }, { "epoch": 0.23837368854362373, "grad_norm": 0.8802583813667297, "learning_rate": 9.988792465975936e-06, "loss": 0.0237, "step": 28230 }, { "epoch": 0.23845812839078762, "grad_norm": 1.2474157810211182, "learning_rate": 9.988743101300166e-06, "loss": 0.0417, "step": 28240 }, { "epoch": 0.2385425682379515, "grad_norm": 0.33134225010871887, "learning_rate": 9.988693628269916e-06, "loss": 0.0172, "step": 28250 }, { "epoch": 0.23862700808511536, "grad_norm": 0.39738577604293823, "learning_rate": 9.988644046886265e-06, "loss": 0.0179, "step": 28260 }, { "epoch": 0.23871144793227925, "grad_norm": 0.8338103890419006, "learning_rate": 9.988594357150287e-06, "loss": 0.0259, "step": 28270 }, { "epoch": 0.2387958877794431, "grad_norm": 0.40315887331962585, "learning_rate": 9.98854455906306e-06, "loss": 0.0319, "step": 28280 }, { "epoch": 0.238880327626607, "grad_norm": 0.37541279196739197, "learning_rate": 9.988494652625668e-06, "loss": 0.0142, "step": 28290 }, { "epoch": 0.23896476747377088, "grad_norm": 0.3688196539878845, "learning_rate": 9.988444637839197e-06, "loss": 0.0209, "step": 28300 }, { "epoch": 0.23904920732093474, "grad_norm": 0.4554121196269989, "learning_rate": 9.988394514704728e-06, "loss": 0.023, "step": 28310 }, { "epoch": 0.23913364716809862, "grad_norm": 1.001141905784607, "learning_rate": 9.988344283223353e-06, "loss": 0.0176, "step": 28320 }, { "epoch": 0.2392180870152625, "grad_norm": 0.2164127081632614, "learning_rate": 9.988293943396164e-06, "loss": 0.0162, "step": 28330 }, { "epoch": 0.23930252686242637, "grad_norm": 0.3798365294933319, "learning_rate": 9.98824349522425e-06, "loss": 0.0189, "step": 28340 }, { "epoch": 0.23938696670959025, "grad_norm": 0.5902982354164124, "learning_rate": 9.988192938708712e-06, "loss": 0.0153, "step": 28350 }, { "epoch": 0.23947140655675414, "grad_norm": 0.9268251657485962, "learning_rate": 9.988142273850643e-06, "loss": 0.0203, "step": 28360 }, { "epoch": 0.239555846403918, "grad_norm": 0.8452203869819641, "learning_rate": 9.988091500651148e-06, "loss": 0.0252, "step": 28370 }, { "epoch": 0.23964028625108189, "grad_norm": 0.08295076340436935, "learning_rate": 9.988040619111327e-06, "loss": 0.0125, "step": 28380 }, { "epoch": 0.23972472609824577, "grad_norm": 0.5969346761703491, "learning_rate": 9.987989629232287e-06, "loss": 0.0181, "step": 28390 }, { "epoch": 0.23980916594540963, "grad_norm": 0.5815712809562683, "learning_rate": 9.987938531015133e-06, "loss": 0.0135, "step": 28400 }, { "epoch": 0.23989360579257352, "grad_norm": 1.0255465507507324, "learning_rate": 9.987887324460977e-06, "loss": 0.0201, "step": 28410 }, { "epoch": 0.2399780456397374, "grad_norm": 0.8715900182723999, "learning_rate": 9.98783600957093e-06, "loss": 0.0354, "step": 28420 }, { "epoch": 0.24006248548690126, "grad_norm": 0.28756797313690186, "learning_rate": 9.987784586346105e-06, "loss": 0.0162, "step": 28430 }, { "epoch": 0.24014692533406515, "grad_norm": 0.7890941500663757, "learning_rate": 9.987733054787624e-06, "loss": 0.0181, "step": 28440 }, { "epoch": 0.24023136518122903, "grad_norm": 0.48387411236763, "learning_rate": 9.987681414896601e-06, "loss": 0.0188, "step": 28450 }, { "epoch": 0.2403158050283929, "grad_norm": 0.3041033148765564, "learning_rate": 9.987629666674161e-06, "loss": 0.0214, "step": 28460 }, { "epoch": 0.24040024487555678, "grad_norm": 0.31495189666748047, "learning_rate": 9.987577810121428e-06, "loss": 0.0155, "step": 28470 }, { "epoch": 0.24048468472272067, "grad_norm": 0.9791492223739624, "learning_rate": 9.987525845239525e-06, "loss": 0.0191, "step": 28480 }, { "epoch": 0.24056912456988452, "grad_norm": 0.531602144241333, "learning_rate": 9.987473772029584e-06, "loss": 0.0209, "step": 28490 }, { "epoch": 0.2406535644170484, "grad_norm": 0.6208949089050293, "learning_rate": 9.987421590492732e-06, "loss": 0.0226, "step": 28500 }, { "epoch": 0.24073800426421227, "grad_norm": 0.7257529497146606, "learning_rate": 9.987369300630108e-06, "loss": 0.0228, "step": 28510 }, { "epoch": 0.24082244411137615, "grad_norm": 0.8816412091255188, "learning_rate": 9.987316902442843e-06, "loss": 0.0264, "step": 28520 }, { "epoch": 0.24090688395854004, "grad_norm": 0.6911431550979614, "learning_rate": 9.987264395932078e-06, "loss": 0.0221, "step": 28530 }, { "epoch": 0.2409913238057039, "grad_norm": 0.7957324385643005, "learning_rate": 9.98721178109895e-06, "loss": 0.0268, "step": 28540 }, { "epoch": 0.24107576365286779, "grad_norm": 0.5790352821350098, "learning_rate": 9.987159057944608e-06, "loss": 0.0183, "step": 28550 }, { "epoch": 0.24116020350003167, "grad_norm": 0.09503410756587982, "learning_rate": 9.98710622647019e-06, "loss": 0.0165, "step": 28560 }, { "epoch": 0.24124464334719553, "grad_norm": 0.6031726002693176, "learning_rate": 9.987053286676849e-06, "loss": 0.0213, "step": 28570 }, { "epoch": 0.24132908319435942, "grad_norm": 0.05226296931505203, "learning_rate": 9.987000238565731e-06, "loss": 0.0183, "step": 28580 }, { "epoch": 0.2414135230415233, "grad_norm": 0.5992928147315979, "learning_rate": 9.98694708213799e-06, "loss": 0.0292, "step": 28590 }, { "epoch": 0.24149796288868716, "grad_norm": 0.6039018034934998, "learning_rate": 9.986893817394782e-06, "loss": 0.0261, "step": 28600 }, { "epoch": 0.24158240273585105, "grad_norm": 0.597484827041626, "learning_rate": 9.98684044433726e-06, "loss": 0.0146, "step": 28610 }, { "epoch": 0.24166684258301493, "grad_norm": 0.19494076073169708, "learning_rate": 9.986786962966586e-06, "loss": 0.0163, "step": 28620 }, { "epoch": 0.2417512824301788, "grad_norm": 0.5847229957580566, "learning_rate": 9.98673337328392e-06, "loss": 0.0193, "step": 28630 }, { "epoch": 0.24183572227734268, "grad_norm": 0.13578484952449799, "learning_rate": 9.986679675290429e-06, "loss": 0.0188, "step": 28640 }, { "epoch": 0.24192016212450657, "grad_norm": 0.6360233426094055, "learning_rate": 9.986625868987278e-06, "loss": 0.0195, "step": 28650 }, { "epoch": 0.24200460197167042, "grad_norm": 0.2544478178024292, "learning_rate": 9.986571954375633e-06, "loss": 0.0143, "step": 28660 }, { "epoch": 0.2420890418188343, "grad_norm": 0.6551164388656616, "learning_rate": 9.986517931456667e-06, "loss": 0.0177, "step": 28670 }, { "epoch": 0.2421734816659982, "grad_norm": 0.6024217009544373, "learning_rate": 9.986463800231553e-06, "loss": 0.0152, "step": 28680 }, { "epoch": 0.24225792151316206, "grad_norm": 0.75615394115448, "learning_rate": 9.98640956070147e-06, "loss": 0.0198, "step": 28690 }, { "epoch": 0.24234236136032594, "grad_norm": 0.11999067664146423, "learning_rate": 9.98635521286759e-06, "loss": 0.028, "step": 28700 }, { "epoch": 0.24242680120748983, "grad_norm": 0.6479076147079468, "learning_rate": 9.986300756731098e-06, "loss": 0.024, "step": 28710 }, { "epoch": 0.2425112410546537, "grad_norm": 0.6058458685874939, "learning_rate": 9.986246192293176e-06, "loss": 0.0229, "step": 28720 }, { "epoch": 0.24259568090181757, "grad_norm": 0.28364259004592896, "learning_rate": 9.986191519555007e-06, "loss": 0.0212, "step": 28730 }, { "epoch": 0.24268012074898143, "grad_norm": 0.41315460205078125, "learning_rate": 9.98613673851778e-06, "loss": 0.0197, "step": 28740 }, { "epoch": 0.24276456059614532, "grad_norm": 1.1959506273269653, "learning_rate": 9.986081849182687e-06, "loss": 0.0125, "step": 28750 }, { "epoch": 0.2428490004433092, "grad_norm": 1.1281275749206543, "learning_rate": 9.986026851550916e-06, "loss": 0.0278, "step": 28760 }, { "epoch": 0.24293344029047306, "grad_norm": 0.2708589434623718, "learning_rate": 9.985971745623664e-06, "loss": 0.0233, "step": 28770 }, { "epoch": 0.24301788013763695, "grad_norm": 0.440250426530838, "learning_rate": 9.985916531402128e-06, "loss": 0.0227, "step": 28780 }, { "epoch": 0.24310231998480084, "grad_norm": 0.11755696684122086, "learning_rate": 9.985861208887504e-06, "loss": 0.0211, "step": 28790 }, { "epoch": 0.2431867598319647, "grad_norm": 0.7591932415962219, "learning_rate": 9.985805778081e-06, "loss": 0.0207, "step": 28800 }, { "epoch": 0.24327119967912858, "grad_norm": 0.3995685279369354, "learning_rate": 9.985750238983814e-06, "loss": 0.0211, "step": 28810 }, { "epoch": 0.24335563952629247, "grad_norm": 0.3917423188686371, "learning_rate": 9.985694591597153e-06, "loss": 0.0182, "step": 28820 }, { "epoch": 0.24344007937345633, "grad_norm": 0.6523813009262085, "learning_rate": 9.98563883592223e-06, "loss": 0.0146, "step": 28830 }, { "epoch": 0.2435245192206202, "grad_norm": 0.4701347351074219, "learning_rate": 9.985582971960253e-06, "loss": 0.0198, "step": 28840 }, { "epoch": 0.2436089590677841, "grad_norm": 0.5403415560722351, "learning_rate": 9.985526999712434e-06, "loss": 0.0165, "step": 28850 }, { "epoch": 0.24369339891494796, "grad_norm": 1.1160930395126343, "learning_rate": 9.98547091917999e-06, "loss": 0.0199, "step": 28860 }, { "epoch": 0.24377783876211184, "grad_norm": 0.17758704721927643, "learning_rate": 9.98541473036414e-06, "loss": 0.0284, "step": 28870 }, { "epoch": 0.24386227860927573, "grad_norm": 0.34004607796669006, "learning_rate": 9.985358433266103e-06, "loss": 0.0291, "step": 28880 }, { "epoch": 0.2439467184564396, "grad_norm": 1.0946862697601318, "learning_rate": 9.985302027887104e-06, "loss": 0.0211, "step": 28890 }, { "epoch": 0.24403115830360347, "grad_norm": 0.20475253462791443, "learning_rate": 9.985245514228366e-06, "loss": 0.0172, "step": 28900 }, { "epoch": 0.24411559815076736, "grad_norm": 0.619295597076416, "learning_rate": 9.985188892291116e-06, "loss": 0.0148, "step": 28910 }, { "epoch": 0.24420003799793122, "grad_norm": 0.6925793886184692, "learning_rate": 9.985132162076586e-06, "loss": 0.0132, "step": 28920 }, { "epoch": 0.2442844778450951, "grad_norm": 0.6430999040603638, "learning_rate": 9.985075323586007e-06, "loss": 0.0264, "step": 28930 }, { "epoch": 0.24436891769225896, "grad_norm": 0.4133574366569519, "learning_rate": 9.985018376820612e-06, "loss": 0.0263, "step": 28940 }, { "epoch": 0.24445335753942285, "grad_norm": 0.40369197726249695, "learning_rate": 9.98496132178164e-06, "loss": 0.0231, "step": 28950 }, { "epoch": 0.24453779738658674, "grad_norm": 0.24167312681674957, "learning_rate": 9.984904158470331e-06, "loss": 0.0171, "step": 28960 }, { "epoch": 0.2446222372337506, "grad_norm": 0.623212456703186, "learning_rate": 9.984846886887925e-06, "loss": 0.0251, "step": 28970 }, { "epoch": 0.24470667708091448, "grad_norm": 0.237675741314888, "learning_rate": 9.984789507035663e-06, "loss": 0.0172, "step": 28980 }, { "epoch": 0.24479111692807837, "grad_norm": 1.2338725328445435, "learning_rate": 9.984732018914798e-06, "loss": 0.0194, "step": 28990 }, { "epoch": 0.24487555677524223, "grad_norm": 0.614937961101532, "learning_rate": 9.984674422526575e-06, "loss": 0.0155, "step": 29000 }, { "epoch": 0.2449599966224061, "grad_norm": 0.33097249269485474, "learning_rate": 9.984616717872242e-06, "loss": 0.0162, "step": 29010 }, { "epoch": 0.24504443646957, "grad_norm": 0.5410364866256714, "learning_rate": 9.984558904953056e-06, "loss": 0.0251, "step": 29020 }, { "epoch": 0.24512887631673386, "grad_norm": 0.5007890462875366, "learning_rate": 9.984500983770275e-06, "loss": 0.0222, "step": 29030 }, { "epoch": 0.24521331616389774, "grad_norm": 0.4367752969264984, "learning_rate": 9.98444295432515e-06, "loss": 0.0143, "step": 29040 }, { "epoch": 0.24529775601106163, "grad_norm": 0.7068924903869629, "learning_rate": 9.984384816618949e-06, "loss": 0.0196, "step": 29050 }, { "epoch": 0.2453821958582255, "grad_norm": 0.8514867424964905, "learning_rate": 9.984326570652928e-06, "loss": 0.0222, "step": 29060 }, { "epoch": 0.24546663570538937, "grad_norm": 0.4500024616718292, "learning_rate": 9.984268216428359e-06, "loss": 0.0249, "step": 29070 }, { "epoch": 0.24555107555255326, "grad_norm": 0.3868659436702728, "learning_rate": 9.984209753946503e-06, "loss": 0.0121, "step": 29080 }, { "epoch": 0.24563551539971712, "grad_norm": 0.28440913558006287, "learning_rate": 9.984151183208633e-06, "loss": 0.0167, "step": 29090 }, { "epoch": 0.245719955246881, "grad_norm": 0.28998732566833496, "learning_rate": 9.984092504216021e-06, "loss": 0.0149, "step": 29100 }, { "epoch": 0.2458043950940449, "grad_norm": 0.48661312460899353, "learning_rate": 9.98403371696994e-06, "loss": 0.0301, "step": 29110 }, { "epoch": 0.24588883494120875, "grad_norm": 0.5851811170578003, "learning_rate": 9.983974821471672e-06, "loss": 0.0135, "step": 29120 }, { "epoch": 0.24597327478837264, "grad_norm": 0.7604092359542847, "learning_rate": 9.983915817722487e-06, "loss": 0.0141, "step": 29130 }, { "epoch": 0.24605771463553652, "grad_norm": 0.7290951609611511, "learning_rate": 9.983856705723675e-06, "loss": 0.0181, "step": 29140 }, { "epoch": 0.24614215448270038, "grad_norm": 0.2671697735786438, "learning_rate": 9.983797485476515e-06, "loss": 0.0092, "step": 29150 }, { "epoch": 0.24622659432986427, "grad_norm": 0.45442649722099304, "learning_rate": 9.983738156982296e-06, "loss": 0.0169, "step": 29160 }, { "epoch": 0.24631103417702813, "grad_norm": 0.5230826139450073, "learning_rate": 9.983678720242305e-06, "loss": 0.022, "step": 29170 }, { "epoch": 0.246395474024192, "grad_norm": 0.4570551812648773, "learning_rate": 9.983619175257833e-06, "loss": 0.0173, "step": 29180 }, { "epoch": 0.2464799138713559, "grad_norm": 0.5990657806396484, "learning_rate": 9.983559522030175e-06, "loss": 0.017, "step": 29190 }, { "epoch": 0.24656435371851976, "grad_norm": 0.4376397430896759, "learning_rate": 9.983499760560625e-06, "loss": 0.0146, "step": 29200 }, { "epoch": 0.24664879356568364, "grad_norm": 0.7973513007164001, "learning_rate": 9.98343989085048e-06, "loss": 0.0236, "step": 29210 }, { "epoch": 0.24673323341284753, "grad_norm": 1.173440933227539, "learning_rate": 9.983379912901044e-06, "loss": 0.0296, "step": 29220 }, { "epoch": 0.2468176732600114, "grad_norm": 0.7579876184463501, "learning_rate": 9.983319826713616e-06, "loss": 0.0163, "step": 29230 }, { "epoch": 0.24690211310717528, "grad_norm": 1.2568832635879517, "learning_rate": 9.983259632289501e-06, "loss": 0.0247, "step": 29240 }, { "epoch": 0.24698655295433916, "grad_norm": 0.7404568791389465, "learning_rate": 9.98319932963001e-06, "loss": 0.0279, "step": 29250 }, { "epoch": 0.24707099280150302, "grad_norm": 0.348677396774292, "learning_rate": 9.98313891873645e-06, "loss": 0.0139, "step": 29260 }, { "epoch": 0.2471554326486669, "grad_norm": 0.4100131690502167, "learning_rate": 9.983078399610135e-06, "loss": 0.0117, "step": 29270 }, { "epoch": 0.2472398724958308, "grad_norm": 1.092540979385376, "learning_rate": 9.983017772252376e-06, "loss": 0.0179, "step": 29280 }, { "epoch": 0.24732431234299465, "grad_norm": 0.2632239758968353, "learning_rate": 9.982957036664495e-06, "loss": 0.0117, "step": 29290 }, { "epoch": 0.24740875219015854, "grad_norm": 0.37263232469558716, "learning_rate": 9.982896192847807e-06, "loss": 0.0251, "step": 29300 }, { "epoch": 0.24749319203732242, "grad_norm": 0.34638339281082153, "learning_rate": 9.982835240803635e-06, "loss": 0.0197, "step": 29310 }, { "epoch": 0.24757763188448628, "grad_norm": 0.4488163888454437, "learning_rate": 9.982774180533302e-06, "loss": 0.0268, "step": 29320 }, { "epoch": 0.24766207173165017, "grad_norm": 0.6178228259086609, "learning_rate": 9.982713012038136e-06, "loss": 0.0238, "step": 29330 }, { "epoch": 0.24774651157881405, "grad_norm": 0.38688454031944275, "learning_rate": 9.982651735319464e-06, "loss": 0.0229, "step": 29340 }, { "epoch": 0.2478309514259779, "grad_norm": 0.2547280192375183, "learning_rate": 9.982590350378615e-06, "loss": 0.016, "step": 29350 }, { "epoch": 0.2479153912731418, "grad_norm": 0.536419153213501, "learning_rate": 9.982528857216929e-06, "loss": 0.0198, "step": 29360 }, { "epoch": 0.24799983112030569, "grad_norm": 0.9991722702980042, "learning_rate": 9.982467255835734e-06, "loss": 0.016, "step": 29370 }, { "epoch": 0.24808427096746954, "grad_norm": 0.4468851089477539, "learning_rate": 9.98240554623637e-06, "loss": 0.0371, "step": 29380 }, { "epoch": 0.24816871081463343, "grad_norm": 0.8915745615959167, "learning_rate": 9.982343728420183e-06, "loss": 0.0206, "step": 29390 }, { "epoch": 0.2482531506617973, "grad_norm": 0.5386895537376404, "learning_rate": 9.982281802388508e-06, "loss": 0.0204, "step": 29400 }, { "epoch": 0.24833759050896118, "grad_norm": 0.6031400561332703, "learning_rate": 9.982219768142695e-06, "loss": 0.0154, "step": 29410 }, { "epoch": 0.24842203035612506, "grad_norm": 0.24478717148303986, "learning_rate": 9.982157625684088e-06, "loss": 0.0103, "step": 29420 }, { "epoch": 0.24850647020328892, "grad_norm": 1.0174806118011475, "learning_rate": 9.982095375014039e-06, "loss": 0.0254, "step": 29430 }, { "epoch": 0.2485909100504528, "grad_norm": 0.6276974081993103, "learning_rate": 9.982033016133898e-06, "loss": 0.0158, "step": 29440 }, { "epoch": 0.2486753498976167, "grad_norm": 0.4624166488647461, "learning_rate": 9.981970549045023e-06, "loss": 0.02, "step": 29450 }, { "epoch": 0.24875978974478055, "grad_norm": 0.34913742542266846, "learning_rate": 9.981907973748767e-06, "loss": 0.0133, "step": 29460 }, { "epoch": 0.24884422959194444, "grad_norm": 1.219577431678772, "learning_rate": 9.981845290246493e-06, "loss": 0.0258, "step": 29470 }, { "epoch": 0.24892866943910832, "grad_norm": 0.49726665019989014, "learning_rate": 9.981782498539558e-06, "loss": 0.0187, "step": 29480 }, { "epoch": 0.24901310928627218, "grad_norm": 0.514760434627533, "learning_rate": 9.98171959862933e-06, "loss": 0.0179, "step": 29490 }, { "epoch": 0.24909754913343607, "grad_norm": 0.34128379821777344, "learning_rate": 9.98165659051717e-06, "loss": 0.0119, "step": 29500 }, { "epoch": 0.24918198898059996, "grad_norm": 0.40064942836761475, "learning_rate": 9.981593474204452e-06, "loss": 0.0121, "step": 29510 }, { "epoch": 0.24926642882776381, "grad_norm": 1.4801392555236816, "learning_rate": 9.981530249692544e-06, "loss": 0.0295, "step": 29520 }, { "epoch": 0.2493508686749277, "grad_norm": 0.4211699664592743, "learning_rate": 9.98146691698282e-06, "loss": 0.0231, "step": 29530 }, { "epoch": 0.2494353085220916, "grad_norm": 0.18023066222667694, "learning_rate": 9.981403476076654e-06, "loss": 0.0096, "step": 29540 }, { "epoch": 0.24951974836925545, "grad_norm": 0.361325204372406, "learning_rate": 9.981339926975428e-06, "loss": 0.0165, "step": 29550 }, { "epoch": 0.24960418821641933, "grad_norm": 0.6639896631240845, "learning_rate": 9.981276269680518e-06, "loss": 0.0338, "step": 29560 }, { "epoch": 0.24968862806358322, "grad_norm": 0.3827464282512665, "learning_rate": 9.981212504193307e-06, "loss": 0.021, "step": 29570 }, { "epoch": 0.24977306791074708, "grad_norm": 0.8889147043228149, "learning_rate": 9.981148630515183e-06, "loss": 0.0211, "step": 29580 }, { "epoch": 0.24985750775791096, "grad_norm": 1.0647786855697632, "learning_rate": 9.98108464864753e-06, "loss": 0.0302, "step": 29590 }, { "epoch": 0.24994194760507482, "grad_norm": 0.6159370541572571, "learning_rate": 9.981020558591742e-06, "loss": 0.0182, "step": 29600 }, { "epoch": 0.2500263874522387, "grad_norm": 0.8068235516548157, "learning_rate": 9.980956360349206e-06, "loss": 0.0233, "step": 29610 }, { "epoch": 0.2501108272994026, "grad_norm": 0.475574254989624, "learning_rate": 9.980892053921317e-06, "loss": 0.0171, "step": 29620 }, { "epoch": 0.2501952671465665, "grad_norm": 0.7134497761726379, "learning_rate": 9.980827639309476e-06, "loss": 0.0227, "step": 29630 }, { "epoch": 0.25027970699373037, "grad_norm": 0.8341511487960815, "learning_rate": 9.980763116515077e-06, "loss": 0.0164, "step": 29640 }, { "epoch": 0.2503641468408942, "grad_norm": 0.9115257859230042, "learning_rate": 9.980698485539526e-06, "loss": 0.0159, "step": 29650 }, { "epoch": 0.2504485866880581, "grad_norm": 0.8469298481941223, "learning_rate": 9.980633746384223e-06, "loss": 0.0189, "step": 29660 }, { "epoch": 0.25053302653522197, "grad_norm": 0.8569917678833008, "learning_rate": 9.980568899050577e-06, "loss": 0.0199, "step": 29670 }, { "epoch": 0.25061746638238586, "grad_norm": 0.39111629128456116, "learning_rate": 9.980503943539994e-06, "loss": 0.0183, "step": 29680 }, { "epoch": 0.25070190622954974, "grad_norm": 0.3144131600856781, "learning_rate": 9.980438879853886e-06, "loss": 0.0228, "step": 29690 }, { "epoch": 0.25078634607671363, "grad_norm": 0.031535644084215164, "learning_rate": 9.980373707993668e-06, "loss": 0.0205, "step": 29700 }, { "epoch": 0.25087078592387746, "grad_norm": 0.4735458195209503, "learning_rate": 9.98030842796075e-06, "loss": 0.0227, "step": 29710 }, { "epoch": 0.25095522577104135, "grad_norm": 0.46295079588890076, "learning_rate": 9.980243039756556e-06, "loss": 0.0152, "step": 29720 }, { "epoch": 0.25103966561820523, "grad_norm": 0.6349384784698486, "learning_rate": 9.980177543382503e-06, "loss": 0.0185, "step": 29730 }, { "epoch": 0.2511241054653691, "grad_norm": 0.6940966248512268, "learning_rate": 9.980111938840016e-06, "loss": 0.0237, "step": 29740 }, { "epoch": 0.251208545312533, "grad_norm": 0.886567234992981, "learning_rate": 9.980046226130516e-06, "loss": 0.0246, "step": 29750 }, { "epoch": 0.25129298515969684, "grad_norm": 0.6488938331604004, "learning_rate": 9.979980405255434e-06, "loss": 0.0179, "step": 29760 }, { "epoch": 0.2513774250068607, "grad_norm": 0.4642854332923889, "learning_rate": 9.979914476216196e-06, "loss": 0.0138, "step": 29770 }, { "epoch": 0.2514618648540246, "grad_norm": 0.5779820680618286, "learning_rate": 9.97984843901424e-06, "loss": 0.0155, "step": 29780 }, { "epoch": 0.2515463047011885, "grad_norm": 0.3822435736656189, "learning_rate": 9.979782293650993e-06, "loss": 0.0167, "step": 29790 }, { "epoch": 0.2516307445483524, "grad_norm": 0.761596143245697, "learning_rate": 9.979716040127895e-06, "loss": 0.0174, "step": 29800 }, { "epoch": 0.25171518439551627, "grad_norm": 0.7039207816123962, "learning_rate": 9.979649678446386e-06, "loss": 0.0227, "step": 29810 }, { "epoch": 0.2517996242426801, "grad_norm": 0.46505793929100037, "learning_rate": 9.979583208607906e-06, "loss": 0.0168, "step": 29820 }, { "epoch": 0.251884064089844, "grad_norm": 0.4768184423446655, "learning_rate": 9.979516630613898e-06, "loss": 0.019, "step": 29830 }, { "epoch": 0.25196850393700787, "grad_norm": 0.5971975922584534, "learning_rate": 9.979449944465813e-06, "loss": 0.0278, "step": 29840 }, { "epoch": 0.25205294378417176, "grad_norm": 0.8063632845878601, "learning_rate": 9.979383150165092e-06, "loss": 0.0224, "step": 29850 }, { "epoch": 0.25213738363133564, "grad_norm": 0.3089672327041626, "learning_rate": 9.979316247713191e-06, "loss": 0.0171, "step": 29860 }, { "epoch": 0.25222182347849953, "grad_norm": 0.7943946719169617, "learning_rate": 9.979249237111561e-06, "loss": 0.0271, "step": 29870 }, { "epoch": 0.25230626332566336, "grad_norm": 0.5527085065841675, "learning_rate": 9.979182118361658e-06, "loss": 0.0208, "step": 29880 }, { "epoch": 0.25239070317282725, "grad_norm": 0.5885403752326965, "learning_rate": 9.97911489146494e-06, "loss": 0.0149, "step": 29890 }, { "epoch": 0.25247514301999113, "grad_norm": 0.5967556238174438, "learning_rate": 9.979047556422867e-06, "loss": 0.0155, "step": 29900 }, { "epoch": 0.252559582867155, "grad_norm": 0.6926714777946472, "learning_rate": 9.9789801132369e-06, "loss": 0.0163, "step": 29910 }, { "epoch": 0.2526440227143189, "grad_norm": 0.6515659093856812, "learning_rate": 9.978912561908508e-06, "loss": 0.0328, "step": 29920 }, { "epoch": 0.25272846256148274, "grad_norm": 0.7469853162765503, "learning_rate": 9.978844902439152e-06, "loss": 0.0173, "step": 29930 }, { "epoch": 0.2528129024086466, "grad_norm": 0.785397469997406, "learning_rate": 9.978777134830307e-06, "loss": 0.0194, "step": 29940 }, { "epoch": 0.2528973422558105, "grad_norm": 0.6930789351463318, "learning_rate": 9.978709259083443e-06, "loss": 0.0306, "step": 29950 }, { "epoch": 0.2529817821029744, "grad_norm": 0.40294399857521057, "learning_rate": 9.978641275200032e-06, "loss": 0.0201, "step": 29960 }, { "epoch": 0.2530662219501383, "grad_norm": 1.364675760269165, "learning_rate": 9.978573183181555e-06, "loss": 0.0325, "step": 29970 }, { "epoch": 0.25315066179730217, "grad_norm": 0.7984464168548584, "learning_rate": 9.97850498302949e-06, "loss": 0.025, "step": 29980 }, { "epoch": 0.253235101644466, "grad_norm": 0.9968062043190002, "learning_rate": 9.978436674745313e-06, "loss": 0.0255, "step": 29990 }, { "epoch": 0.2533195414916299, "grad_norm": 0.25299981236457825, "learning_rate": 9.978368258330514e-06, "loss": 0.0134, "step": 30000 }, { "epoch": 0.25340398133879377, "grad_norm": 0.5301281213760376, "learning_rate": 9.978299733786576e-06, "loss": 0.0221, "step": 30010 }, { "epoch": 0.25348842118595766, "grad_norm": 0.4672926664352417, "learning_rate": 9.978231101114989e-06, "loss": 0.0219, "step": 30020 }, { "epoch": 0.25357286103312154, "grad_norm": 0.3254407048225403, "learning_rate": 9.978162360317241e-06, "loss": 0.0202, "step": 30030 }, { "epoch": 0.25365730088028543, "grad_norm": 0.5956305861473083, "learning_rate": 9.978093511394827e-06, "loss": 0.0169, "step": 30040 }, { "epoch": 0.25374174072744926, "grad_norm": 0.3253295123577118, "learning_rate": 9.978024554349241e-06, "loss": 0.0231, "step": 30050 }, { "epoch": 0.25382618057461315, "grad_norm": 0.5662496089935303, "learning_rate": 9.977955489181985e-06, "loss": 0.0153, "step": 30060 }, { "epoch": 0.25391062042177703, "grad_norm": 0.2668730616569519, "learning_rate": 9.977886315894554e-06, "loss": 0.0159, "step": 30070 }, { "epoch": 0.2539950602689409, "grad_norm": 0.15581251680850983, "learning_rate": 9.977817034488452e-06, "loss": 0.015, "step": 30080 }, { "epoch": 0.2540795001161048, "grad_norm": 0.14924421906471252, "learning_rate": 9.977747644965184e-06, "loss": 0.0148, "step": 30090 }, { "epoch": 0.2541639399632687, "grad_norm": 0.695310652256012, "learning_rate": 9.977678147326257e-06, "loss": 0.0173, "step": 30100 }, { "epoch": 0.2542483798104325, "grad_norm": 0.25675535202026367, "learning_rate": 9.97760854157318e-06, "loss": 0.02, "step": 30110 }, { "epoch": 0.2543328196575964, "grad_norm": 0.64629727602005, "learning_rate": 9.977538827707468e-06, "loss": 0.0182, "step": 30120 }, { "epoch": 0.2544172595047603, "grad_norm": 0.5590345859527588, "learning_rate": 9.977469005730631e-06, "loss": 0.0266, "step": 30130 }, { "epoch": 0.2545016993519242, "grad_norm": 0.6001648902893066, "learning_rate": 9.977399075644187e-06, "loss": 0.0146, "step": 30140 }, { "epoch": 0.25458613919908807, "grad_norm": 0.8319727182388306, "learning_rate": 9.977329037449657e-06, "loss": 0.0182, "step": 30150 }, { "epoch": 0.2546705790462519, "grad_norm": 0.475966215133667, "learning_rate": 9.977258891148558e-06, "loss": 0.0096, "step": 30160 }, { "epoch": 0.2547550188934158, "grad_norm": 0.727107048034668, "learning_rate": 9.977188636742416e-06, "loss": 0.0136, "step": 30170 }, { "epoch": 0.25483945874057967, "grad_norm": 0.592562735080719, "learning_rate": 9.977118274232758e-06, "loss": 0.0213, "step": 30180 }, { "epoch": 0.25492389858774356, "grad_norm": 0.39501485228538513, "learning_rate": 9.977047803621111e-06, "loss": 0.0196, "step": 30190 }, { "epoch": 0.25500833843490744, "grad_norm": 0.13036194443702698, "learning_rate": 9.976977224909004e-06, "loss": 0.0222, "step": 30200 }, { "epoch": 0.25509277828207133, "grad_norm": 0.4159255623817444, "learning_rate": 9.976906538097974e-06, "loss": 0.0167, "step": 30210 }, { "epoch": 0.25517721812923516, "grad_norm": 0.5453150272369385, "learning_rate": 9.976835743189552e-06, "loss": 0.0157, "step": 30220 }, { "epoch": 0.25526165797639905, "grad_norm": 0.3267778158187866, "learning_rate": 9.976764840185278e-06, "loss": 0.0233, "step": 30230 }, { "epoch": 0.25534609782356293, "grad_norm": 0.3823440968990326, "learning_rate": 9.97669382908669e-06, "loss": 0.0154, "step": 30240 }, { "epoch": 0.2554305376707268, "grad_norm": 0.16570916771888733, "learning_rate": 9.976622709895334e-06, "loss": 0.0235, "step": 30250 }, { "epoch": 0.2555149775178907, "grad_norm": 0.6135692000389099, "learning_rate": 9.976551482612752e-06, "loss": 0.0166, "step": 30260 }, { "epoch": 0.2555994173650546, "grad_norm": 0.4300699830055237, "learning_rate": 9.976480147240491e-06, "loss": 0.019, "step": 30270 }, { "epoch": 0.2556838572122184, "grad_norm": 0.4325074553489685, "learning_rate": 9.976408703780103e-06, "loss": 0.016, "step": 30280 }, { "epoch": 0.2557682970593823, "grad_norm": 0.09626437723636627, "learning_rate": 9.976337152233135e-06, "loss": 0.0186, "step": 30290 }, { "epoch": 0.2558527369065462, "grad_norm": 0.24870027601718903, "learning_rate": 9.976265492601145e-06, "loss": 0.0177, "step": 30300 }, { "epoch": 0.2559371767537101, "grad_norm": 0.7707851529121399, "learning_rate": 9.976193724885687e-06, "loss": 0.0225, "step": 30310 }, { "epoch": 0.25602161660087397, "grad_norm": 0.5098717212677002, "learning_rate": 9.976121849088324e-06, "loss": 0.0221, "step": 30320 }, { "epoch": 0.25610605644803786, "grad_norm": 0.4100222885608673, "learning_rate": 9.976049865210611e-06, "loss": 0.0322, "step": 30330 }, { "epoch": 0.2561904962952017, "grad_norm": 0.4755041301250458, "learning_rate": 9.975977773254117e-06, "loss": 0.0127, "step": 30340 }, { "epoch": 0.2562749361423656, "grad_norm": 0.4978727102279663, "learning_rate": 9.975905573220403e-06, "loss": 0.0193, "step": 30350 }, { "epoch": 0.25635937598952946, "grad_norm": 0.3983165919780731, "learning_rate": 9.975833265111042e-06, "loss": 0.0175, "step": 30360 }, { "epoch": 0.25644381583669335, "grad_norm": 0.46520015597343445, "learning_rate": 9.9757608489276e-06, "loss": 0.0121, "step": 30370 }, { "epoch": 0.25652825568385723, "grad_norm": 0.463880717754364, "learning_rate": 9.975688324671653e-06, "loss": 0.0242, "step": 30380 }, { "epoch": 0.25661269553102106, "grad_norm": 0.5025882124900818, "learning_rate": 9.975615692344776e-06, "loss": 0.0236, "step": 30390 }, { "epoch": 0.25669713537818495, "grad_norm": 0.7237763404846191, "learning_rate": 9.975542951948545e-06, "loss": 0.0251, "step": 30400 }, { "epoch": 0.25678157522534883, "grad_norm": 0.2925538122653961, "learning_rate": 9.975470103484542e-06, "loss": 0.0203, "step": 30410 }, { "epoch": 0.2568660150725127, "grad_norm": 0.47405746579170227, "learning_rate": 9.975397146954346e-06, "loss": 0.0231, "step": 30420 }, { "epoch": 0.2569504549196766, "grad_norm": 0.6293846368789673, "learning_rate": 9.975324082359543e-06, "loss": 0.0179, "step": 30430 }, { "epoch": 0.2570348947668405, "grad_norm": 0.3752853572368622, "learning_rate": 9.97525090970172e-06, "loss": 0.0228, "step": 30440 }, { "epoch": 0.2571193346140043, "grad_norm": 0.5348670482635498, "learning_rate": 9.975177628982471e-06, "loss": 0.0181, "step": 30450 }, { "epoch": 0.2572037744611682, "grad_norm": 0.4895407557487488, "learning_rate": 9.97510424020338e-06, "loss": 0.0216, "step": 30460 }, { "epoch": 0.2572882143083321, "grad_norm": 0.8707072734832764, "learning_rate": 9.975030743366046e-06, "loss": 0.0236, "step": 30470 }, { "epoch": 0.257372654155496, "grad_norm": 0.2965875267982483, "learning_rate": 9.974957138472063e-06, "loss": 0.0116, "step": 30480 }, { "epoch": 0.25745709400265987, "grad_norm": 1.2431056499481201, "learning_rate": 9.97488342552303e-06, "loss": 0.0345, "step": 30490 }, { "epoch": 0.25754153384982376, "grad_norm": 0.46462008357048035, "learning_rate": 9.974809604520548e-06, "loss": 0.0171, "step": 30500 }, { "epoch": 0.2576259736969876, "grad_norm": 0.8606308102607727, "learning_rate": 9.974735675466223e-06, "loss": 0.031, "step": 30510 }, { "epoch": 0.2577104135441515, "grad_norm": 0.5317873358726501, "learning_rate": 9.974661638361655e-06, "loss": 0.02, "step": 30520 }, { "epoch": 0.25779485339131536, "grad_norm": 0.8534569144248962, "learning_rate": 9.974587493208458e-06, "loss": 0.0165, "step": 30530 }, { "epoch": 0.25787929323847925, "grad_norm": 0.3702278137207031, "learning_rate": 9.97451324000824e-06, "loss": 0.0115, "step": 30540 }, { "epoch": 0.25796373308564313, "grad_norm": 0.4451132118701935, "learning_rate": 9.974438878762615e-06, "loss": 0.0153, "step": 30550 }, { "epoch": 0.258048172932807, "grad_norm": 0.5786339044570923, "learning_rate": 9.974364409473195e-06, "loss": 0.0237, "step": 30560 }, { "epoch": 0.25813261277997085, "grad_norm": 0.3176895081996918, "learning_rate": 9.9742898321416e-06, "loss": 0.0359, "step": 30570 }, { "epoch": 0.25821705262713474, "grad_norm": 0.21043314039707184, "learning_rate": 9.974215146769449e-06, "loss": 0.0261, "step": 30580 }, { "epoch": 0.2583014924742986, "grad_norm": 0.8572973012924194, "learning_rate": 9.974140353358365e-06, "loss": 0.0215, "step": 30590 }, { "epoch": 0.2583859323214625, "grad_norm": 0.6523663997650146, "learning_rate": 9.974065451909971e-06, "loss": 0.0231, "step": 30600 }, { "epoch": 0.2584703721686264, "grad_norm": 0.4298763573169708, "learning_rate": 9.973990442425895e-06, "loss": 0.0269, "step": 30610 }, { "epoch": 0.2585548120157902, "grad_norm": 0.4993308484554291, "learning_rate": 9.973915324907767e-06, "loss": 0.0245, "step": 30620 }, { "epoch": 0.2586392518629541, "grad_norm": 0.4612370729446411, "learning_rate": 9.973840099357214e-06, "loss": 0.0123, "step": 30630 }, { "epoch": 0.258723691710118, "grad_norm": 0.5159693360328674, "learning_rate": 9.973764765775875e-06, "loss": 0.0168, "step": 30640 }, { "epoch": 0.2588081315572819, "grad_norm": 0.742645263671875, "learning_rate": 9.973689324165385e-06, "loss": 0.015, "step": 30650 }, { "epoch": 0.25889257140444577, "grad_norm": 0.5526829957962036, "learning_rate": 9.973613774527384e-06, "loss": 0.019, "step": 30660 }, { "epoch": 0.25897701125160966, "grad_norm": 0.3806806802749634, "learning_rate": 9.973538116863509e-06, "loss": 0.0281, "step": 30670 }, { "epoch": 0.2590614510987735, "grad_norm": 0.5968505144119263, "learning_rate": 9.973462351175404e-06, "loss": 0.0186, "step": 30680 }, { "epoch": 0.2591458909459374, "grad_norm": 0.6246885657310486, "learning_rate": 9.973386477464717e-06, "loss": 0.0187, "step": 30690 }, { "epoch": 0.25923033079310126, "grad_norm": 0.7007378339767456, "learning_rate": 9.973310495733096e-06, "loss": 0.0158, "step": 30700 }, { "epoch": 0.25931477064026515, "grad_norm": 0.585655689239502, "learning_rate": 9.973234405982188e-06, "loss": 0.0177, "step": 30710 }, { "epoch": 0.25939921048742903, "grad_norm": 0.9328681826591492, "learning_rate": 9.97315820821365e-06, "loss": 0.0142, "step": 30720 }, { "epoch": 0.2594836503345929, "grad_norm": 0.04486022889614105, "learning_rate": 9.973081902429133e-06, "loss": 0.0147, "step": 30730 }, { "epoch": 0.25956809018175675, "grad_norm": 1.3308161497116089, "learning_rate": 9.973005488630298e-06, "loss": 0.0326, "step": 30740 }, { "epoch": 0.25965253002892064, "grad_norm": 0.37607672810554504, "learning_rate": 9.9729289668188e-06, "loss": 0.0166, "step": 30750 }, { "epoch": 0.2597369698760845, "grad_norm": 2.00325608253479, "learning_rate": 9.972852336996308e-06, "loss": 0.0215, "step": 30760 }, { "epoch": 0.2598214097232484, "grad_norm": 0.5261793732643127, "learning_rate": 9.97277559916448e-06, "loss": 0.0273, "step": 30770 }, { "epoch": 0.2599058495704123, "grad_norm": 0.8390325903892517, "learning_rate": 9.972698753324984e-06, "loss": 0.0271, "step": 30780 }, { "epoch": 0.2599902894175762, "grad_norm": 0.5242083072662354, "learning_rate": 9.972621799479492e-06, "loss": 0.0156, "step": 30790 }, { "epoch": 0.26007472926474, "grad_norm": 0.172367125749588, "learning_rate": 9.972544737629672e-06, "loss": 0.0201, "step": 30800 }, { "epoch": 0.2601591691119039, "grad_norm": 0.2697506844997406, "learning_rate": 9.9724675677772e-06, "loss": 0.0097, "step": 30810 }, { "epoch": 0.2602436089590678, "grad_norm": 0.38386663794517517, "learning_rate": 9.97239028992375e-06, "loss": 0.0231, "step": 30820 }, { "epoch": 0.26032804880623167, "grad_norm": 1.1297876834869385, "learning_rate": 9.972312904071005e-06, "loss": 0.0171, "step": 30830 }, { "epoch": 0.26041248865339556, "grad_norm": 1.4755454063415527, "learning_rate": 9.97223541022064e-06, "loss": 0.0251, "step": 30840 }, { "epoch": 0.2604969285005594, "grad_norm": 0.15817701816558838, "learning_rate": 9.972157808374341e-06, "loss": 0.012, "step": 30850 }, { "epoch": 0.2605813683477233, "grad_norm": 0.730654776096344, "learning_rate": 9.972080098533794e-06, "loss": 0.0205, "step": 30860 }, { "epoch": 0.26066580819488716, "grad_norm": 0.7963147759437561, "learning_rate": 9.972002280700685e-06, "loss": 0.0192, "step": 30870 }, { "epoch": 0.26075024804205105, "grad_norm": 0.5627468228340149, "learning_rate": 9.971924354876707e-06, "loss": 0.0329, "step": 30880 }, { "epoch": 0.26083468788921493, "grad_norm": 0.202985480427742, "learning_rate": 9.97184632106355e-06, "loss": 0.0232, "step": 30890 }, { "epoch": 0.2609191277363788, "grad_norm": 0.5431788563728333, "learning_rate": 9.971768179262911e-06, "loss": 0.0241, "step": 30900 }, { "epoch": 0.26100356758354265, "grad_norm": 0.3612212836742401, "learning_rate": 9.971689929476485e-06, "loss": 0.0115, "step": 30910 }, { "epoch": 0.26108800743070654, "grad_norm": 0.9148202538490295, "learning_rate": 9.971611571705973e-06, "loss": 0.0157, "step": 30920 }, { "epoch": 0.2611724472778704, "grad_norm": 0.4495215117931366, "learning_rate": 9.971533105953076e-06, "loss": 0.0185, "step": 30930 }, { "epoch": 0.2612568871250343, "grad_norm": 0.2218562215566635, "learning_rate": 9.971454532219498e-06, "loss": 0.0154, "step": 30940 }, { "epoch": 0.2613413269721982, "grad_norm": 0.6406641006469727, "learning_rate": 9.971375850506945e-06, "loss": 0.0356, "step": 30950 }, { "epoch": 0.2614257668193621, "grad_norm": 0.5908781886100769, "learning_rate": 9.97129706081713e-06, "loss": 0.0142, "step": 30960 }, { "epoch": 0.2615102066665259, "grad_norm": 0.7900339365005493, "learning_rate": 9.97121816315176e-06, "loss": 0.0165, "step": 30970 }, { "epoch": 0.2615946465136898, "grad_norm": 0.29471421241760254, "learning_rate": 9.971139157512551e-06, "loss": 0.0182, "step": 30980 }, { "epoch": 0.2616790863608537, "grad_norm": 1.2588107585906982, "learning_rate": 9.971060043901219e-06, "loss": 0.0183, "step": 30990 }, { "epoch": 0.26176352620801757, "grad_norm": 0.21671201288700104, "learning_rate": 9.97098082231948e-06, "loss": 0.0102, "step": 31000 }, { "epoch": 0.26184796605518146, "grad_norm": 0.5232071876525879, "learning_rate": 9.970901492769056e-06, "loss": 0.0119, "step": 31010 }, { "epoch": 0.26193240590234534, "grad_norm": 0.40193554759025574, "learning_rate": 9.970822055251673e-06, "loss": 0.0194, "step": 31020 }, { "epoch": 0.2620168457495092, "grad_norm": 0.1844455599784851, "learning_rate": 9.97074250976905e-06, "loss": 0.0237, "step": 31030 }, { "epoch": 0.26210128559667306, "grad_norm": 1.0756996870040894, "learning_rate": 9.97066285632292e-06, "loss": 0.0195, "step": 31040 }, { "epoch": 0.26218572544383695, "grad_norm": 0.42732661962509155, "learning_rate": 9.970583094915011e-06, "loss": 0.023, "step": 31050 }, { "epoch": 0.26227016529100083, "grad_norm": 0.5824329257011414, "learning_rate": 9.970503225547056e-06, "loss": 0.0175, "step": 31060 }, { "epoch": 0.2623546051381647, "grad_norm": 0.24478554725646973, "learning_rate": 9.97042324822079e-06, "loss": 0.0232, "step": 31070 }, { "epoch": 0.26243904498532855, "grad_norm": 0.6304228901863098, "learning_rate": 9.970343162937949e-06, "loss": 0.0159, "step": 31080 }, { "epoch": 0.26252348483249244, "grad_norm": 0.5335085391998291, "learning_rate": 9.970262969700273e-06, "loss": 0.0193, "step": 31090 }, { "epoch": 0.2626079246796563, "grad_norm": 0.6910014748573303, "learning_rate": 9.970182668509502e-06, "loss": 0.0211, "step": 31100 }, { "epoch": 0.2626923645268202, "grad_norm": 0.45535388588905334, "learning_rate": 9.970102259367384e-06, "loss": 0.0191, "step": 31110 }, { "epoch": 0.2627768043739841, "grad_norm": 0.4018721282482147, "learning_rate": 9.970021742275663e-06, "loss": 0.0208, "step": 31120 }, { "epoch": 0.262861244221148, "grad_norm": 0.6525165438652039, "learning_rate": 9.96994111723609e-06, "loss": 0.0169, "step": 31130 }, { "epoch": 0.2629456840683118, "grad_norm": 0.18723241984844208, "learning_rate": 9.969860384250411e-06, "loss": 0.0122, "step": 31140 }, { "epoch": 0.2630301239154757, "grad_norm": 0.5771686434745789, "learning_rate": 9.969779543320387e-06, "loss": 0.0129, "step": 31150 }, { "epoch": 0.2631145637626396, "grad_norm": 0.8115028142929077, "learning_rate": 9.969698594447766e-06, "loss": 0.0116, "step": 31160 }, { "epoch": 0.2631990036098035, "grad_norm": 0.36012962460517883, "learning_rate": 9.96961753763431e-06, "loss": 0.0201, "step": 31170 }, { "epoch": 0.26328344345696736, "grad_norm": 0.7942624092102051, "learning_rate": 9.969536372881783e-06, "loss": 0.0189, "step": 31180 }, { "epoch": 0.26336788330413125, "grad_norm": 0.5167185664176941, "learning_rate": 9.969455100191941e-06, "loss": 0.0106, "step": 31190 }, { "epoch": 0.2634523231512951, "grad_norm": 0.5703794360160828, "learning_rate": 9.969373719566553e-06, "loss": 0.0219, "step": 31200 }, { "epoch": 0.26353676299845896, "grad_norm": 0.684531569480896, "learning_rate": 9.969292231007389e-06, "loss": 0.0293, "step": 31210 }, { "epoch": 0.26362120284562285, "grad_norm": 0.11221698671579361, "learning_rate": 9.969210634516213e-06, "loss": 0.023, "step": 31220 }, { "epoch": 0.26370564269278673, "grad_norm": 0.5179287791252136, "learning_rate": 9.9691289300948e-06, "loss": 0.015, "step": 31230 }, { "epoch": 0.2637900825399506, "grad_norm": 0.30938535928726196, "learning_rate": 9.969047117744926e-06, "loss": 0.0227, "step": 31240 }, { "epoch": 0.26387452238711445, "grad_norm": 0.5269781351089478, "learning_rate": 9.968965197468367e-06, "loss": 0.0192, "step": 31250 }, { "epoch": 0.26395896223427834, "grad_norm": 0.7212033271789551, "learning_rate": 9.968883169266902e-06, "loss": 0.0186, "step": 31260 }, { "epoch": 0.2640434020814422, "grad_norm": 0.4305391013622284, "learning_rate": 9.968801033142312e-06, "loss": 0.0231, "step": 31270 }, { "epoch": 0.2641278419286061, "grad_norm": 0.8708764910697937, "learning_rate": 9.968718789096382e-06, "loss": 0.0204, "step": 31280 }, { "epoch": 0.26421228177577, "grad_norm": 0.438688725233078, "learning_rate": 9.9686364371309e-06, "loss": 0.0207, "step": 31290 }, { "epoch": 0.2642967216229339, "grad_norm": 0.6528160572052002, "learning_rate": 9.968553977247652e-06, "loss": 0.0186, "step": 31300 }, { "epoch": 0.2643811614700977, "grad_norm": 0.44447779655456543, "learning_rate": 9.968471409448429e-06, "loss": 0.0144, "step": 31310 }, { "epoch": 0.2644656013172616, "grad_norm": 0.4124283194541931, "learning_rate": 9.968388733735025e-06, "loss": 0.0167, "step": 31320 }, { "epoch": 0.2645500411644255, "grad_norm": 0.4034607410430908, "learning_rate": 9.968305950109236e-06, "loss": 0.026, "step": 31330 }, { "epoch": 0.2646344810115894, "grad_norm": 0.8807868957519531, "learning_rate": 9.968223058572859e-06, "loss": 0.0195, "step": 31340 }, { "epoch": 0.26471892085875326, "grad_norm": 0.6523179411888123, "learning_rate": 9.968140059127697e-06, "loss": 0.0227, "step": 31350 }, { "epoch": 0.26480336070591715, "grad_norm": 0.07206271588802338, "learning_rate": 9.96805695177555e-06, "loss": 0.0168, "step": 31360 }, { "epoch": 0.264887800553081, "grad_norm": 0.27363720536231995, "learning_rate": 9.967973736518224e-06, "loss": 0.0129, "step": 31370 }, { "epoch": 0.26497224040024486, "grad_norm": 0.16993007063865662, "learning_rate": 9.967890413357527e-06, "loss": 0.0231, "step": 31380 }, { "epoch": 0.26505668024740875, "grad_norm": 0.8960774540901184, "learning_rate": 9.967806982295267e-06, "loss": 0.0168, "step": 31390 }, { "epoch": 0.26514112009457264, "grad_norm": 0.5051686763763428, "learning_rate": 9.967723443333259e-06, "loss": 0.0291, "step": 31400 }, { "epoch": 0.2652255599417365, "grad_norm": 0.2725476026535034, "learning_rate": 9.967639796473316e-06, "loss": 0.0117, "step": 31410 }, { "epoch": 0.2653099997889004, "grad_norm": 0.4252438545227051, "learning_rate": 9.967556041717252e-06, "loss": 0.0207, "step": 31420 }, { "epoch": 0.26539443963606424, "grad_norm": 0.7883577942848206, "learning_rate": 9.967472179066891e-06, "loss": 0.0271, "step": 31430 }, { "epoch": 0.2654788794832281, "grad_norm": 0.5528185367584229, "learning_rate": 9.967388208524051e-06, "loss": 0.0219, "step": 31440 }, { "epoch": 0.265563319330392, "grad_norm": 0.4367256760597229, "learning_rate": 9.967304130090559e-06, "loss": 0.0232, "step": 31450 }, { "epoch": 0.2656477591775559, "grad_norm": 0.022975781932473183, "learning_rate": 9.967219943768239e-06, "loss": 0.0128, "step": 31460 }, { "epoch": 0.2657321990247198, "grad_norm": 0.11631780862808228, "learning_rate": 9.967135649558917e-06, "loss": 0.014, "step": 31470 }, { "epoch": 0.2658166388718836, "grad_norm": 0.2766086161136627, "learning_rate": 9.967051247464427e-06, "loss": 0.0195, "step": 31480 }, { "epoch": 0.2659010787190475, "grad_norm": 0.3897351622581482, "learning_rate": 9.966966737486605e-06, "loss": 0.0159, "step": 31490 }, { "epoch": 0.2659855185662114, "grad_norm": 0.4146277606487274, "learning_rate": 9.96688211962728e-06, "loss": 0.0209, "step": 31500 }, { "epoch": 0.2660699584133753, "grad_norm": 0.6780491471290588, "learning_rate": 9.966797393888294e-06, "loss": 0.0243, "step": 31510 }, { "epoch": 0.26615439826053916, "grad_norm": 0.28837642073631287, "learning_rate": 9.966712560271488e-06, "loss": 0.0196, "step": 31520 }, { "epoch": 0.26623883810770305, "grad_norm": 0.5117365121841431, "learning_rate": 9.9666276187787e-06, "loss": 0.018, "step": 31530 }, { "epoch": 0.2663232779548669, "grad_norm": 0.44361841678619385, "learning_rate": 9.966542569411782e-06, "loss": 0.014, "step": 31540 }, { "epoch": 0.26640771780203076, "grad_norm": 0.6194760799407959, "learning_rate": 9.966457412172574e-06, "loss": 0.021, "step": 31550 }, { "epoch": 0.26649215764919465, "grad_norm": 0.39461544156074524, "learning_rate": 9.966372147062928e-06, "loss": 0.0212, "step": 31560 }, { "epoch": 0.26657659749635854, "grad_norm": 0.8371002674102783, "learning_rate": 9.9662867740847e-06, "loss": 0.0212, "step": 31570 }, { "epoch": 0.2666610373435224, "grad_norm": 0.7268098592758179, "learning_rate": 9.966201293239737e-06, "loss": 0.018, "step": 31580 }, { "epoch": 0.2667454771906863, "grad_norm": 0.8164940476417542, "learning_rate": 9.966115704529902e-06, "loss": 0.0215, "step": 31590 }, { "epoch": 0.26682991703785014, "grad_norm": 0.45878300070762634, "learning_rate": 9.96603000795705e-06, "loss": 0.0189, "step": 31600 }, { "epoch": 0.266914356885014, "grad_norm": 0.4588182866573334, "learning_rate": 9.965944203523046e-06, "loss": 0.0118, "step": 31610 }, { "epoch": 0.2669987967321779, "grad_norm": 0.22785501182079315, "learning_rate": 9.96585829122975e-06, "loss": 0.024, "step": 31620 }, { "epoch": 0.2670832365793418, "grad_norm": 0.5233561992645264, "learning_rate": 9.965772271079031e-06, "loss": 0.014, "step": 31630 }, { "epoch": 0.2671676764265057, "grad_norm": 0.8941472768783569, "learning_rate": 9.965686143072755e-06, "loss": 0.0152, "step": 31640 }, { "epoch": 0.26725211627366957, "grad_norm": 0.45231562852859497, "learning_rate": 9.965599907212792e-06, "loss": 0.0322, "step": 31650 }, { "epoch": 0.2673365561208334, "grad_norm": 0.6326651573181152, "learning_rate": 9.96551356350102e-06, "loss": 0.0196, "step": 31660 }, { "epoch": 0.2674209959679973, "grad_norm": 0.738315999507904, "learning_rate": 9.965427111939309e-06, "loss": 0.0228, "step": 31670 }, { "epoch": 0.2675054358151612, "grad_norm": 0.36434584856033325, "learning_rate": 9.96534055252954e-06, "loss": 0.0208, "step": 31680 }, { "epoch": 0.26758987566232506, "grad_norm": 0.6563447713851929, "learning_rate": 9.965253885273589e-06, "loss": 0.0234, "step": 31690 }, { "epoch": 0.26767431550948895, "grad_norm": 0.020274614915251732, "learning_rate": 9.965167110173342e-06, "loss": 0.0102, "step": 31700 }, { "epoch": 0.2677587553566528, "grad_norm": 0.38485798239707947, "learning_rate": 9.965080227230684e-06, "loss": 0.0199, "step": 31710 }, { "epoch": 0.26784319520381666, "grad_norm": 0.7031338810920715, "learning_rate": 9.964993236447498e-06, "loss": 0.0203, "step": 31720 }, { "epoch": 0.26792763505098055, "grad_norm": 0.3365587294101715, "learning_rate": 9.964906137825679e-06, "loss": 0.0091, "step": 31730 }, { "epoch": 0.26801207489814444, "grad_norm": 0.5753729939460754, "learning_rate": 9.964818931367116e-06, "loss": 0.0154, "step": 31740 }, { "epoch": 0.2680965147453083, "grad_norm": 0.5713234543800354, "learning_rate": 9.964731617073703e-06, "loss": 0.0139, "step": 31750 }, { "epoch": 0.2681809545924722, "grad_norm": 1.1906596422195435, "learning_rate": 9.964644194947336e-06, "loss": 0.0287, "step": 31760 }, { "epoch": 0.26826539443963604, "grad_norm": 0.14363141357898712, "learning_rate": 9.964556664989914e-06, "loss": 0.0147, "step": 31770 }, { "epoch": 0.2683498342867999, "grad_norm": 0.27166417241096497, "learning_rate": 9.96446902720334e-06, "loss": 0.0123, "step": 31780 }, { "epoch": 0.2684342741339638, "grad_norm": 0.9138587713241577, "learning_rate": 9.964381281589515e-06, "loss": 0.0165, "step": 31790 }, { "epoch": 0.2685187139811277, "grad_norm": 0.2876591086387634, "learning_rate": 9.964293428150347e-06, "loss": 0.0196, "step": 31800 }, { "epoch": 0.2686031538282916, "grad_norm": 1.0678210258483887, "learning_rate": 9.964205466887743e-06, "loss": 0.0105, "step": 31810 }, { "epoch": 0.26868759367545547, "grad_norm": 0.3973991572856903, "learning_rate": 9.964117397803613e-06, "loss": 0.0228, "step": 31820 }, { "epoch": 0.2687720335226193, "grad_norm": 1.20455002784729, "learning_rate": 9.96402922089987e-06, "loss": 0.0179, "step": 31830 }, { "epoch": 0.2688564733697832, "grad_norm": 0.464809387922287, "learning_rate": 9.963940936178429e-06, "loss": 0.0191, "step": 31840 }, { "epoch": 0.2689409132169471, "grad_norm": 0.25439518690109253, "learning_rate": 9.963852543641209e-06, "loss": 0.0137, "step": 31850 }, { "epoch": 0.26902535306411096, "grad_norm": 0.628318190574646, "learning_rate": 9.96376404329013e-06, "loss": 0.0238, "step": 31860 }, { "epoch": 0.26910979291127485, "grad_norm": 0.4020388126373291, "learning_rate": 9.96367543512711e-06, "loss": 0.0257, "step": 31870 }, { "epoch": 0.26919423275843873, "grad_norm": 1.9825400114059448, "learning_rate": 9.96358671915408e-06, "loss": 0.0137, "step": 31880 }, { "epoch": 0.26927867260560256, "grad_norm": 0.6685002446174622, "learning_rate": 9.963497895372962e-06, "loss": 0.0227, "step": 31890 }, { "epoch": 0.26936311245276645, "grad_norm": 0.37158042192459106, "learning_rate": 9.963408963785687e-06, "loss": 0.0176, "step": 31900 }, { "epoch": 0.26944755229993034, "grad_norm": 0.37811174988746643, "learning_rate": 9.963319924394186e-06, "loss": 0.0243, "step": 31910 }, { "epoch": 0.2695319921470942, "grad_norm": 0.5302055478096008, "learning_rate": 9.963230777200395e-06, "loss": 0.022, "step": 31920 }, { "epoch": 0.2696164319942581, "grad_norm": 0.1775522381067276, "learning_rate": 9.963141522206246e-06, "loss": 0.0182, "step": 31930 }, { "epoch": 0.26970087184142194, "grad_norm": 0.7672941088676453, "learning_rate": 9.963052159413683e-06, "loss": 0.0194, "step": 31940 }, { "epoch": 0.2697853116885858, "grad_norm": 0.5177119970321655, "learning_rate": 9.962962688824642e-06, "loss": 0.0229, "step": 31950 }, { "epoch": 0.2698697515357497, "grad_norm": 0.6738927364349365, "learning_rate": 9.962873110441069e-06, "loss": 0.0141, "step": 31960 }, { "epoch": 0.2699541913829136, "grad_norm": 1.089207410812378, "learning_rate": 9.962783424264908e-06, "loss": 0.0185, "step": 31970 }, { "epoch": 0.2700386312300775, "grad_norm": 0.33936238288879395, "learning_rate": 9.96269363029811e-06, "loss": 0.0157, "step": 31980 }, { "epoch": 0.2701230710772414, "grad_norm": 0.5165587663650513, "learning_rate": 9.962603728542621e-06, "loss": 0.0133, "step": 31990 }, { "epoch": 0.2702075109244052, "grad_norm": 0.4479838013648987, "learning_rate": 9.9625137190004e-06, "loss": 0.0157, "step": 32000 }, { "epoch": 0.2702919507715691, "grad_norm": 0.5603545904159546, "learning_rate": 9.962423601673394e-06, "loss": 0.017, "step": 32010 }, { "epoch": 0.270376390618733, "grad_norm": 0.6772624850273132, "learning_rate": 9.962333376563565e-06, "loss": 0.0171, "step": 32020 }, { "epoch": 0.27046083046589686, "grad_norm": 0.8725746273994446, "learning_rate": 9.962243043672872e-06, "loss": 0.0173, "step": 32030 }, { "epoch": 0.27054527031306075, "grad_norm": 0.5975387096405029, "learning_rate": 9.962152603003278e-06, "loss": 0.023, "step": 32040 }, { "epoch": 0.27062971016022463, "grad_norm": 0.6287230849266052, "learning_rate": 9.962062054556745e-06, "loss": 0.0174, "step": 32050 }, { "epoch": 0.27071415000738847, "grad_norm": 0.04676147550344467, "learning_rate": 9.961971398335242e-06, "loss": 0.0146, "step": 32060 }, { "epoch": 0.27079858985455235, "grad_norm": 0.658721923828125, "learning_rate": 9.961880634340737e-06, "loss": 0.0173, "step": 32070 }, { "epoch": 0.27088302970171624, "grad_norm": 0.669158399105072, "learning_rate": 9.961789762575202e-06, "loss": 0.0238, "step": 32080 }, { "epoch": 0.2709674695488801, "grad_norm": 0.7150314450263977, "learning_rate": 9.96169878304061e-06, "loss": 0.019, "step": 32090 }, { "epoch": 0.271051909396044, "grad_norm": 0.5041337013244629, "learning_rate": 9.961607695738935e-06, "loss": 0.0212, "step": 32100 }, { "epoch": 0.2711363492432079, "grad_norm": 0.2644268274307251, "learning_rate": 9.961516500672159e-06, "loss": 0.0193, "step": 32110 }, { "epoch": 0.27122078909037173, "grad_norm": 0.436823308467865, "learning_rate": 9.961425197842262e-06, "loss": 0.0203, "step": 32120 }, { "epoch": 0.2713052289375356, "grad_norm": 0.9060547947883606, "learning_rate": 9.961333787251226e-06, "loss": 0.0127, "step": 32130 }, { "epoch": 0.2713896687846995, "grad_norm": 1.207842230796814, "learning_rate": 9.961242268901036e-06, "loss": 0.0426, "step": 32140 }, { "epoch": 0.2714741086318634, "grad_norm": 0.7119345664978027, "learning_rate": 9.961150642793682e-06, "loss": 0.0184, "step": 32150 }, { "epoch": 0.2715585484790273, "grad_norm": 0.5939359664916992, "learning_rate": 9.96105890893115e-06, "loss": 0.0142, "step": 32160 }, { "epoch": 0.2716429883261911, "grad_norm": 0.9389460682868958, "learning_rate": 9.96096706731544e-06, "loss": 0.0198, "step": 32170 }, { "epoch": 0.271727428173355, "grad_norm": 0.7123138904571533, "learning_rate": 9.960875117948538e-06, "loss": 0.0165, "step": 32180 }, { "epoch": 0.2718118680205189, "grad_norm": 0.8481554388999939, "learning_rate": 9.960783060832446e-06, "loss": 0.0238, "step": 32190 }, { "epoch": 0.27189630786768276, "grad_norm": 0.5666028261184692, "learning_rate": 9.960690895969161e-06, "loss": 0.0163, "step": 32200 }, { "epoch": 0.27198074771484665, "grad_norm": 0.5559476017951965, "learning_rate": 9.96059862336069e-06, "loss": 0.0168, "step": 32210 }, { "epoch": 0.27206518756201054, "grad_norm": 1.1006417274475098, "learning_rate": 9.96050624300903e-06, "loss": 0.0328, "step": 32220 }, { "epoch": 0.27214962740917437, "grad_norm": 1.1536259651184082, "learning_rate": 9.960413754916192e-06, "loss": 0.0233, "step": 32230 }, { "epoch": 0.27223406725633825, "grad_norm": 0.27857640385627747, "learning_rate": 9.960321159084183e-06, "loss": 0.0187, "step": 32240 }, { "epoch": 0.27231850710350214, "grad_norm": 0.4175097942352295, "learning_rate": 9.960228455515015e-06, "loss": 0.0398, "step": 32250 }, { "epoch": 0.272402946950666, "grad_norm": 0.5039910674095154, "learning_rate": 9.960135644210702e-06, "loss": 0.0151, "step": 32260 }, { "epoch": 0.2724873867978299, "grad_norm": 0.2074354887008667, "learning_rate": 9.96004272517326e-06, "loss": 0.0228, "step": 32270 }, { "epoch": 0.2725718266449938, "grad_norm": 0.6155595779418945, "learning_rate": 9.959949698404705e-06, "loss": 0.0251, "step": 32280 }, { "epoch": 0.27265626649215763, "grad_norm": 0.09759145975112915, "learning_rate": 9.95985656390706e-06, "loss": 0.0201, "step": 32290 }, { "epoch": 0.2727407063393215, "grad_norm": 0.5485548377037048, "learning_rate": 9.959763321682347e-06, "loss": 0.022, "step": 32300 }, { "epoch": 0.2728251461864854, "grad_norm": 0.05281506106257439, "learning_rate": 9.95966997173259e-06, "loss": 0.0216, "step": 32310 }, { "epoch": 0.2729095860336493, "grad_norm": 0.6072671413421631, "learning_rate": 9.959576514059818e-06, "loss": 0.0288, "step": 32320 }, { "epoch": 0.2729940258808132, "grad_norm": 0.6112706065177917, "learning_rate": 9.95948294866606e-06, "loss": 0.0242, "step": 32330 }, { "epoch": 0.27307846572797706, "grad_norm": 0.25618305802345276, "learning_rate": 9.959389275553348e-06, "loss": 0.0168, "step": 32340 }, { "epoch": 0.2731629055751409, "grad_norm": 0.6797187924385071, "learning_rate": 9.959295494723719e-06, "loss": 0.0158, "step": 32350 }, { "epoch": 0.2732473454223048, "grad_norm": 1.0884523391723633, "learning_rate": 9.959201606179207e-06, "loss": 0.0205, "step": 32360 }, { "epoch": 0.27333178526946866, "grad_norm": 1.0484265089035034, "learning_rate": 9.959107609921854e-06, "loss": 0.0194, "step": 32370 }, { "epoch": 0.27341622511663255, "grad_norm": 0.3147123157978058, "learning_rate": 9.959013505953699e-06, "loss": 0.0107, "step": 32380 }, { "epoch": 0.27350066496379644, "grad_norm": 0.5059590935707092, "learning_rate": 9.958919294276785e-06, "loss": 0.0155, "step": 32390 }, { "epoch": 0.27358510481096027, "grad_norm": 0.5914462804794312, "learning_rate": 9.958824974893163e-06, "loss": 0.0119, "step": 32400 }, { "epoch": 0.27366954465812415, "grad_norm": 0.26920613646507263, "learning_rate": 9.958730547804876e-06, "loss": 0.0134, "step": 32410 }, { "epoch": 0.27375398450528804, "grad_norm": 1.00648832321167, "learning_rate": 9.958636013013978e-06, "loss": 0.0145, "step": 32420 }, { "epoch": 0.2738384243524519, "grad_norm": 0.478030800819397, "learning_rate": 9.958541370522523e-06, "loss": 0.0187, "step": 32430 }, { "epoch": 0.2739228641996158, "grad_norm": 1.1866015195846558, "learning_rate": 9.958446620332567e-06, "loss": 0.0276, "step": 32440 }, { "epoch": 0.2740073040467797, "grad_norm": 0.8873000144958496, "learning_rate": 9.958351762446164e-06, "loss": 0.0265, "step": 32450 }, { "epoch": 0.27409174389394353, "grad_norm": 0.6494988799095154, "learning_rate": 9.958256796865378e-06, "loss": 0.0179, "step": 32460 }, { "epoch": 0.2741761837411074, "grad_norm": 0.09672677516937256, "learning_rate": 9.95816172359227e-06, "loss": 0.0203, "step": 32470 }, { "epoch": 0.2742606235882713, "grad_norm": 0.9334750175476074, "learning_rate": 9.958066542628905e-06, "loss": 0.0283, "step": 32480 }, { "epoch": 0.2743450634354352, "grad_norm": 0.7038432955741882, "learning_rate": 9.957971253977352e-06, "loss": 0.0202, "step": 32490 }, { "epoch": 0.2744295032825991, "grad_norm": 0.38776353001594543, "learning_rate": 9.957875857639678e-06, "loss": 0.017, "step": 32500 }, { "epoch": 0.27451394312976296, "grad_norm": 0.2422052025794983, "learning_rate": 9.957780353617956e-06, "loss": 0.0232, "step": 32510 }, { "epoch": 0.2745983829769268, "grad_norm": 0.2258487343788147, "learning_rate": 9.957684741914262e-06, "loss": 0.0167, "step": 32520 }, { "epoch": 0.2746828228240907, "grad_norm": 0.36376166343688965, "learning_rate": 9.95758902253067e-06, "loss": 0.0285, "step": 32530 }, { "epoch": 0.27476726267125456, "grad_norm": 0.2336658388376236, "learning_rate": 9.957493195469263e-06, "loss": 0.022, "step": 32540 }, { "epoch": 0.27485170251841845, "grad_norm": 0.924278974533081, "learning_rate": 9.957397260732117e-06, "loss": 0.0144, "step": 32550 }, { "epoch": 0.27493614236558234, "grad_norm": 0.3513277769088745, "learning_rate": 9.957301218321319e-06, "loss": 0.0174, "step": 32560 }, { "epoch": 0.27502058221274617, "grad_norm": 0.8567861914634705, "learning_rate": 9.957205068238955e-06, "loss": 0.0159, "step": 32570 }, { "epoch": 0.27510502205991005, "grad_norm": 0.2497595101594925, "learning_rate": 9.957108810487113e-06, "loss": 0.0164, "step": 32580 }, { "epoch": 0.27518946190707394, "grad_norm": 0.004490259103477001, "learning_rate": 9.957012445067881e-06, "loss": 0.0298, "step": 32590 }, { "epoch": 0.2752739017542378, "grad_norm": 0.5017295479774475, "learning_rate": 9.956915971983357e-06, "loss": 0.0173, "step": 32600 }, { "epoch": 0.2753583416014017, "grad_norm": 0.6057491898536682, "learning_rate": 9.956819391235633e-06, "loss": 0.0259, "step": 32610 }, { "epoch": 0.2754427814485656, "grad_norm": 0.5589662790298462, "learning_rate": 9.956722702826809e-06, "loss": 0.0169, "step": 32620 }, { "epoch": 0.27552722129572943, "grad_norm": 0.11609680950641632, "learning_rate": 9.95662590675898e-06, "loss": 0.0169, "step": 32630 }, { "epoch": 0.2756116611428933, "grad_norm": 0.25376707315444946, "learning_rate": 9.956529003034256e-06, "loss": 0.0148, "step": 32640 }, { "epoch": 0.2756961009900572, "grad_norm": 0.6144448518753052, "learning_rate": 9.956431991654736e-06, "loss": 0.0228, "step": 32650 }, { "epoch": 0.2757805408372211, "grad_norm": 0.4190514087677002, "learning_rate": 9.956334872622529e-06, "loss": 0.0205, "step": 32660 }, { "epoch": 0.275864980684385, "grad_norm": 0.43087291717529297, "learning_rate": 9.956237645939744e-06, "loss": 0.0184, "step": 32670 }, { "epoch": 0.27594942053154886, "grad_norm": 0.6216587424278259, "learning_rate": 9.956140311608494e-06, "loss": 0.0243, "step": 32680 }, { "epoch": 0.2760338603787127, "grad_norm": 0.31305843591690063, "learning_rate": 9.95604286963089e-06, "loss": 0.0148, "step": 32690 }, { "epoch": 0.2761183002258766, "grad_norm": 0.8689010739326477, "learning_rate": 9.955945320009051e-06, "loss": 0.0148, "step": 32700 }, { "epoch": 0.27620274007304046, "grad_norm": 0.2099868655204773, "learning_rate": 9.955847662745096e-06, "loss": 0.0169, "step": 32710 }, { "epoch": 0.27628717992020435, "grad_norm": 0.5944865942001343, "learning_rate": 9.955749897841146e-06, "loss": 0.0189, "step": 32720 }, { "epoch": 0.27637161976736824, "grad_norm": 0.14789429306983948, "learning_rate": 9.955652025299323e-06, "loss": 0.0177, "step": 32730 }, { "epoch": 0.2764560596145321, "grad_norm": 0.5419133901596069, "learning_rate": 9.955554045121753e-06, "loss": 0.0215, "step": 32740 }, { "epoch": 0.27654049946169595, "grad_norm": 0.5416597127914429, "learning_rate": 9.955455957310565e-06, "loss": 0.0146, "step": 32750 }, { "epoch": 0.27662493930885984, "grad_norm": 0.37889862060546875, "learning_rate": 9.95535776186789e-06, "loss": 0.0184, "step": 32760 }, { "epoch": 0.2767093791560237, "grad_norm": 0.5643467903137207, "learning_rate": 9.955259458795859e-06, "loss": 0.0137, "step": 32770 }, { "epoch": 0.2767938190031876, "grad_norm": 0.18445560336112976, "learning_rate": 9.955161048096607e-06, "loss": 0.0099, "step": 32780 }, { "epoch": 0.2768782588503515, "grad_norm": 0.7607331871986389, "learning_rate": 9.955062529772273e-06, "loss": 0.0151, "step": 32790 }, { "epoch": 0.27696269869751533, "grad_norm": 0.44541606307029724, "learning_rate": 9.954963903824996e-06, "loss": 0.0227, "step": 32800 }, { "epoch": 0.2770471385446792, "grad_norm": 0.5615016222000122, "learning_rate": 9.95486517025692e-06, "loss": 0.0179, "step": 32810 }, { "epoch": 0.2771315783918431, "grad_norm": 0.5713908076286316, "learning_rate": 9.954766329070186e-06, "loss": 0.0142, "step": 32820 }, { "epoch": 0.277216018239007, "grad_norm": 0.48136940598487854, "learning_rate": 9.954667380266945e-06, "loss": 0.0204, "step": 32830 }, { "epoch": 0.2773004580861709, "grad_norm": 0.905089259147644, "learning_rate": 9.954568323849341e-06, "loss": 0.0219, "step": 32840 }, { "epoch": 0.27738489793333476, "grad_norm": 0.5623021721839905, "learning_rate": 9.95446915981953e-06, "loss": 0.0187, "step": 32850 }, { "epoch": 0.2774693377804986, "grad_norm": 0.7500203847885132, "learning_rate": 9.954369888179664e-06, "loss": 0.0213, "step": 32860 }, { "epoch": 0.2775537776276625, "grad_norm": 0.8226437568664551, "learning_rate": 9.954270508931898e-06, "loss": 0.0251, "step": 32870 }, { "epoch": 0.27763821747482637, "grad_norm": 1.4775960445404053, "learning_rate": 9.954171022078394e-06, "loss": 0.0148, "step": 32880 }, { "epoch": 0.27772265732199025, "grad_norm": 0.3018277585506439, "learning_rate": 9.954071427621308e-06, "loss": 0.0268, "step": 32890 }, { "epoch": 0.27780709716915414, "grad_norm": 0.7135041952133179, "learning_rate": 9.953971725562807e-06, "loss": 0.0215, "step": 32900 }, { "epoch": 0.277891537016318, "grad_norm": 0.014808542095124722, "learning_rate": 9.953871915905056e-06, "loss": 0.0117, "step": 32910 }, { "epoch": 0.27797597686348186, "grad_norm": 0.46770474314689636, "learning_rate": 9.953771998650222e-06, "loss": 0.0156, "step": 32920 }, { "epoch": 0.27806041671064574, "grad_norm": 0.2674468159675598, "learning_rate": 9.953671973800474e-06, "loss": 0.0252, "step": 32930 }, { "epoch": 0.2781448565578096, "grad_norm": 0.5605782270431519, "learning_rate": 9.953571841357987e-06, "loss": 0.0228, "step": 32940 }, { "epoch": 0.2782292964049735, "grad_norm": 0.37425920367240906, "learning_rate": 9.953471601324935e-06, "loss": 0.0159, "step": 32950 }, { "epoch": 0.2783137362521374, "grad_norm": 0.6254774928092957, "learning_rate": 9.953371253703494e-06, "loss": 0.0142, "step": 32960 }, { "epoch": 0.2783981760993013, "grad_norm": 0.5738996267318726, "learning_rate": 9.953270798495845e-06, "loss": 0.0146, "step": 32970 }, { "epoch": 0.2784826159464651, "grad_norm": 0.5753719210624695, "learning_rate": 9.953170235704168e-06, "loss": 0.0168, "step": 32980 }, { "epoch": 0.278567055793629, "grad_norm": 0.48455020785331726, "learning_rate": 9.953069565330651e-06, "loss": 0.0335, "step": 32990 }, { "epoch": 0.2786514956407929, "grad_norm": 0.26608943939208984, "learning_rate": 9.952968787377476e-06, "loss": 0.0185, "step": 33000 }, { "epoch": 0.2787359354879568, "grad_norm": 0.8487337827682495, "learning_rate": 9.952867901846835e-06, "loss": 0.0176, "step": 33010 }, { "epoch": 0.27882037533512066, "grad_norm": 0.4420529305934906, "learning_rate": 9.952766908740916e-06, "loss": 0.0132, "step": 33020 }, { "epoch": 0.2789048151822845, "grad_norm": 0.5399568676948547, "learning_rate": 9.952665808061916e-06, "loss": 0.0163, "step": 33030 }, { "epoch": 0.2789892550294484, "grad_norm": 0.7666570544242859, "learning_rate": 9.95256459981203e-06, "loss": 0.0205, "step": 33040 }, { "epoch": 0.27907369487661227, "grad_norm": 0.8458275198936462, "learning_rate": 9.952463283993456e-06, "loss": 0.0179, "step": 33050 }, { "epoch": 0.27915813472377615, "grad_norm": 0.6363041400909424, "learning_rate": 9.952361860608393e-06, "loss": 0.0256, "step": 33060 }, { "epoch": 0.27924257457094004, "grad_norm": 0.39787477254867554, "learning_rate": 9.952260329659047e-06, "loss": 0.0176, "step": 33070 }, { "epoch": 0.2793270144181039, "grad_norm": 0.5680182576179504, "learning_rate": 9.95215869114762e-06, "loss": 0.0155, "step": 33080 }, { "epoch": 0.27941145426526776, "grad_norm": 0.5320135951042175, "learning_rate": 9.952056945076322e-06, "loss": 0.018, "step": 33090 }, { "epoch": 0.27949589411243164, "grad_norm": 0.7435501217842102, "learning_rate": 9.95195509144736e-06, "loss": 0.0243, "step": 33100 }, { "epoch": 0.27958033395959553, "grad_norm": 0.2118067741394043, "learning_rate": 9.95185313026295e-06, "loss": 0.0212, "step": 33110 }, { "epoch": 0.2796647738067594, "grad_norm": 0.4078472852706909, "learning_rate": 9.951751061525305e-06, "loss": 0.011, "step": 33120 }, { "epoch": 0.2797492136539233, "grad_norm": 0.48513442277908325, "learning_rate": 9.95164888523664e-06, "loss": 0.0239, "step": 33130 }, { "epoch": 0.2798336535010872, "grad_norm": 0.3983476161956787, "learning_rate": 9.951546601399178e-06, "loss": 0.0137, "step": 33140 }, { "epoch": 0.279918093348251, "grad_norm": 0.6034990549087524, "learning_rate": 9.951444210015138e-06, "loss": 0.0148, "step": 33150 }, { "epoch": 0.2800025331954149, "grad_norm": 0.3045342266559601, "learning_rate": 9.951341711086742e-06, "loss": 0.0223, "step": 33160 }, { "epoch": 0.2800869730425788, "grad_norm": 2.7598979473114014, "learning_rate": 9.951239104616222e-06, "loss": 0.0195, "step": 33170 }, { "epoch": 0.2801714128897427, "grad_norm": 0.6319003105163574, "learning_rate": 9.9511363906058e-06, "loss": 0.0148, "step": 33180 }, { "epoch": 0.28025585273690656, "grad_norm": 1.4447189569473267, "learning_rate": 9.951033569057714e-06, "loss": 0.0219, "step": 33190 }, { "epoch": 0.28034029258407045, "grad_norm": 0.5764628052711487, "learning_rate": 9.95093063997419e-06, "loss": 0.014, "step": 33200 }, { "epoch": 0.2804247324312343, "grad_norm": 0.10036712139844894, "learning_rate": 9.950827603357469e-06, "loss": 0.0109, "step": 33210 }, { "epoch": 0.28050917227839817, "grad_norm": 1.0784661769866943, "learning_rate": 9.950724459209787e-06, "loss": 0.0222, "step": 33220 }, { "epoch": 0.28059361212556205, "grad_norm": 0.4723028838634491, "learning_rate": 9.950621207533382e-06, "loss": 0.0264, "step": 33230 }, { "epoch": 0.28067805197272594, "grad_norm": 0.6222143769264221, "learning_rate": 9.9505178483305e-06, "loss": 0.0211, "step": 33240 }, { "epoch": 0.2807624918198898, "grad_norm": 0.7833085060119629, "learning_rate": 9.950414381603386e-06, "loss": 0.0185, "step": 33250 }, { "epoch": 0.28084693166705366, "grad_norm": 0.3497200906276703, "learning_rate": 9.950310807354286e-06, "loss": 0.018, "step": 33260 }, { "epoch": 0.28093137151421754, "grad_norm": 0.3493841588497162, "learning_rate": 9.950207125585448e-06, "loss": 0.0175, "step": 33270 }, { "epoch": 0.28101581136138143, "grad_norm": 0.5128738880157471, "learning_rate": 9.950103336299126e-06, "loss": 0.0175, "step": 33280 }, { "epoch": 0.2811002512085453, "grad_norm": 1.6706187725067139, "learning_rate": 9.949999439497575e-06, "loss": 0.0216, "step": 33290 }, { "epoch": 0.2811846910557092, "grad_norm": 0.569985032081604, "learning_rate": 9.94989543518305e-06, "loss": 0.0175, "step": 33300 }, { "epoch": 0.2812691309028731, "grad_norm": 0.45252498984336853, "learning_rate": 9.949791323357812e-06, "loss": 0.0171, "step": 33310 }, { "epoch": 0.2813535707500369, "grad_norm": 0.3947322368621826, "learning_rate": 9.949687104024118e-06, "loss": 0.0185, "step": 33320 }, { "epoch": 0.2814380105972008, "grad_norm": 0.3520180583000183, "learning_rate": 9.949582777184237e-06, "loss": 0.0217, "step": 33330 }, { "epoch": 0.2815224504443647, "grad_norm": 1.1773016452789307, "learning_rate": 9.94947834284043e-06, "loss": 0.0147, "step": 33340 }, { "epoch": 0.2816068902915286, "grad_norm": 0.6197263598442078, "learning_rate": 9.949373800994968e-06, "loss": 0.0146, "step": 33350 }, { "epoch": 0.28169133013869246, "grad_norm": 0.3653547167778015, "learning_rate": 9.949269151650123e-06, "loss": 0.0091, "step": 33360 }, { "epoch": 0.28177576998585635, "grad_norm": 0.5925778150558472, "learning_rate": 9.949164394808163e-06, "loss": 0.0365, "step": 33370 }, { "epoch": 0.2818602098330202, "grad_norm": 0.8721939325332642, "learning_rate": 9.94905953047137e-06, "loss": 0.0249, "step": 33380 }, { "epoch": 0.28194464968018407, "grad_norm": 1.4073139429092407, "learning_rate": 9.948954558642017e-06, "loss": 0.021, "step": 33390 }, { "epoch": 0.28202908952734795, "grad_norm": 0.7631996273994446, "learning_rate": 9.948849479322385e-06, "loss": 0.0252, "step": 33400 }, { "epoch": 0.28211352937451184, "grad_norm": 0.6431448459625244, "learning_rate": 9.948744292514757e-06, "loss": 0.0213, "step": 33410 }, { "epoch": 0.2821979692216757, "grad_norm": 0.7452500462532043, "learning_rate": 9.948638998221415e-06, "loss": 0.0169, "step": 33420 }, { "epoch": 0.2822824090688396, "grad_norm": 0.6430293917655945, "learning_rate": 9.94853359644465e-06, "loss": 0.0142, "step": 33430 }, { "epoch": 0.28236684891600344, "grad_norm": 0.8165168762207031, "learning_rate": 9.948428087186749e-06, "loss": 0.0295, "step": 33440 }, { "epoch": 0.28245128876316733, "grad_norm": 0.2705006003379822, "learning_rate": 9.948322470450003e-06, "loss": 0.0174, "step": 33450 }, { "epoch": 0.2825357286103312, "grad_norm": 0.8484729528427124, "learning_rate": 9.948216746236707e-06, "loss": 0.0275, "step": 33460 }, { "epoch": 0.2826201684574951, "grad_norm": 0.432774156332016, "learning_rate": 9.948110914549159e-06, "loss": 0.0112, "step": 33470 }, { "epoch": 0.282704608304659, "grad_norm": 0.46947404742240906, "learning_rate": 9.948004975389655e-06, "loss": 0.0203, "step": 33480 }, { "epoch": 0.2827890481518228, "grad_norm": 0.5079475045204163, "learning_rate": 9.947898928760496e-06, "loss": 0.0156, "step": 33490 }, { "epoch": 0.2828734879989867, "grad_norm": 0.35750049352645874, "learning_rate": 9.947792774663987e-06, "loss": 0.0181, "step": 33500 }, { "epoch": 0.2829579278461506, "grad_norm": 0.9347054362297058, "learning_rate": 9.947686513102434e-06, "loss": 0.0193, "step": 33510 }, { "epoch": 0.2830423676933145, "grad_norm": 0.6692250370979309, "learning_rate": 9.947580144078142e-06, "loss": 0.0157, "step": 33520 }, { "epoch": 0.28312680754047836, "grad_norm": 0.2844240665435791, "learning_rate": 9.947473667593424e-06, "loss": 0.0227, "step": 33530 }, { "epoch": 0.28321124738764225, "grad_norm": 0.3096883296966553, "learning_rate": 9.94736708365059e-06, "loss": 0.0141, "step": 33540 }, { "epoch": 0.2832956872348061, "grad_norm": 0.5385245680809021, "learning_rate": 9.94726039225196e-06, "loss": 0.0153, "step": 33550 }, { "epoch": 0.28338012708196997, "grad_norm": 0.29282015562057495, "learning_rate": 9.947153593399846e-06, "loss": 0.0187, "step": 33560 }, { "epoch": 0.28346456692913385, "grad_norm": 0.2665086090564728, "learning_rate": 9.94704668709657e-06, "loss": 0.0095, "step": 33570 }, { "epoch": 0.28354900677629774, "grad_norm": 0.7755261063575745, "learning_rate": 9.946939673344453e-06, "loss": 0.0191, "step": 33580 }, { "epoch": 0.2836334466234616, "grad_norm": 0.7155791521072388, "learning_rate": 9.946832552145822e-06, "loss": 0.0242, "step": 33590 }, { "epoch": 0.2837178864706255, "grad_norm": 0.6072282195091248, "learning_rate": 9.946725323503e-06, "loss": 0.012, "step": 33600 }, { "epoch": 0.28380232631778934, "grad_norm": 0.8513587713241577, "learning_rate": 9.946617987418318e-06, "loss": 0.0293, "step": 33610 }, { "epoch": 0.28388676616495323, "grad_norm": 3.935724973678589, "learning_rate": 9.946510543894107e-06, "loss": 0.0189, "step": 33620 }, { "epoch": 0.2839712060121171, "grad_norm": 0.727333664894104, "learning_rate": 9.9464029929327e-06, "loss": 0.0379, "step": 33630 }, { "epoch": 0.284055645859281, "grad_norm": 0.016479937359690666, "learning_rate": 9.946295334536435e-06, "loss": 0.0196, "step": 33640 }, { "epoch": 0.2841400857064449, "grad_norm": 1.1228033304214478, "learning_rate": 9.946187568707647e-06, "loss": 0.0213, "step": 33650 }, { "epoch": 0.2842245255536088, "grad_norm": 0.47669583559036255, "learning_rate": 9.94607969544868e-06, "loss": 0.0335, "step": 33660 }, { "epoch": 0.2843089654007726, "grad_norm": 0.47225263714790344, "learning_rate": 9.945971714761875e-06, "loss": 0.0151, "step": 33670 }, { "epoch": 0.2843934052479365, "grad_norm": 0.5785596370697021, "learning_rate": 9.945863626649578e-06, "loss": 0.0275, "step": 33680 }, { "epoch": 0.2844778450951004, "grad_norm": 0.8346824049949646, "learning_rate": 9.945755431114138e-06, "loss": 0.0177, "step": 33690 }, { "epoch": 0.28456228494226427, "grad_norm": 0.6241986155509949, "learning_rate": 9.945647128157903e-06, "loss": 0.0087, "step": 33700 }, { "epoch": 0.28464672478942815, "grad_norm": 0.47771725058555603, "learning_rate": 9.945538717783225e-06, "loss": 0.0146, "step": 33710 }, { "epoch": 0.284731164636592, "grad_norm": 0.5066311955451965, "learning_rate": 9.945430199992458e-06, "loss": 0.0159, "step": 33720 }, { "epoch": 0.28481560448375587, "grad_norm": 0.6978440880775452, "learning_rate": 9.945321574787963e-06, "loss": 0.0199, "step": 33730 }, { "epoch": 0.28490004433091975, "grad_norm": 0.7905580401420593, "learning_rate": 9.945212842172097e-06, "loss": 0.032, "step": 33740 }, { "epoch": 0.28498448417808364, "grad_norm": 0.5623002648353577, "learning_rate": 9.945104002147218e-06, "loss": 0.0165, "step": 33750 }, { "epoch": 0.2850689240252475, "grad_norm": 0.7316914200782776, "learning_rate": 9.944995054715696e-06, "loss": 0.0159, "step": 33760 }, { "epoch": 0.2851533638724114, "grad_norm": 0.17613576352596283, "learning_rate": 9.944885999879894e-06, "loss": 0.0219, "step": 33770 }, { "epoch": 0.28523780371957524, "grad_norm": 0.22695082426071167, "learning_rate": 9.944776837642182e-06, "loss": 0.0165, "step": 33780 }, { "epoch": 0.28532224356673913, "grad_norm": 0.5510587692260742, "learning_rate": 9.94466756800493e-06, "loss": 0.024, "step": 33790 }, { "epoch": 0.285406683413903, "grad_norm": 0.5340670347213745, "learning_rate": 9.944558190970513e-06, "loss": 0.0164, "step": 33800 }, { "epoch": 0.2854911232610669, "grad_norm": 1.1944559812545776, "learning_rate": 9.944448706541303e-06, "loss": 0.0193, "step": 33810 }, { "epoch": 0.2855755631082308, "grad_norm": 0.6652022004127502, "learning_rate": 9.944339114719683e-06, "loss": 0.0159, "step": 33820 }, { "epoch": 0.2856600029553947, "grad_norm": 1.1709768772125244, "learning_rate": 9.94422941550803e-06, "loss": 0.0179, "step": 33830 }, { "epoch": 0.2857444428025585, "grad_norm": 0.2509670853614807, "learning_rate": 9.944119608908725e-06, "loss": 0.0217, "step": 33840 }, { "epoch": 0.2858288826497224, "grad_norm": 0.6936414837837219, "learning_rate": 9.944009694924158e-06, "loss": 0.0141, "step": 33850 }, { "epoch": 0.2859133224968863, "grad_norm": 0.1154041439294815, "learning_rate": 9.943899673556712e-06, "loss": 0.028, "step": 33860 }, { "epoch": 0.28599776234405017, "grad_norm": 0.46338167786598206, "learning_rate": 9.943789544808777e-06, "loss": 0.0158, "step": 33870 }, { "epoch": 0.28608220219121405, "grad_norm": 0.6960777044296265, "learning_rate": 9.943679308682748e-06, "loss": 0.0212, "step": 33880 }, { "epoch": 0.2861666420383779, "grad_norm": 0.30677109956741333, "learning_rate": 9.943568965181016e-06, "loss": 0.0225, "step": 33890 }, { "epoch": 0.28625108188554177, "grad_norm": 0.301688015460968, "learning_rate": 9.943458514305982e-06, "loss": 0.024, "step": 33900 }, { "epoch": 0.28633552173270566, "grad_norm": 0.46912527084350586, "learning_rate": 9.943347956060039e-06, "loss": 0.0161, "step": 33910 }, { "epoch": 0.28641996157986954, "grad_norm": 1.0041301250457764, "learning_rate": 9.943237290445593e-06, "loss": 0.0404, "step": 33920 }, { "epoch": 0.28650440142703343, "grad_norm": 0.7463870644569397, "learning_rate": 9.943126517465044e-06, "loss": 0.0183, "step": 33930 }, { "epoch": 0.2865888412741973, "grad_norm": 0.43284082412719727, "learning_rate": 9.943015637120802e-06, "loss": 0.0169, "step": 33940 }, { "epoch": 0.28667328112136115, "grad_norm": 0.5852130055427551, "learning_rate": 9.942904649415275e-06, "loss": 0.0137, "step": 33950 }, { "epoch": 0.28675772096852503, "grad_norm": 0.3198207914829254, "learning_rate": 9.94279355435087e-06, "loss": 0.0141, "step": 33960 }, { "epoch": 0.2868421608156889, "grad_norm": 0.7767875790596008, "learning_rate": 9.94268235193e-06, "loss": 0.0288, "step": 33970 }, { "epoch": 0.2869266006628528, "grad_norm": 0.6149628162384033, "learning_rate": 9.942571042155084e-06, "loss": 0.0185, "step": 33980 }, { "epoch": 0.2870110405100167, "grad_norm": 0.38260307908058167, "learning_rate": 9.942459625028537e-06, "loss": 0.0157, "step": 33990 }, { "epoch": 0.2870954803571806, "grad_norm": 0.61664217710495, "learning_rate": 9.942348100552781e-06, "loss": 0.0176, "step": 34000 }, { "epoch": 0.2871799202043444, "grad_norm": 0.46332496404647827, "learning_rate": 9.942236468730238e-06, "loss": 0.0205, "step": 34010 }, { "epoch": 0.2872643600515083, "grad_norm": 0.6368911266326904, "learning_rate": 9.94212472956333e-06, "loss": 0.0236, "step": 34020 }, { "epoch": 0.2873487998986722, "grad_norm": 0.7134496569633484, "learning_rate": 9.942012883054484e-06, "loss": 0.013, "step": 34030 }, { "epoch": 0.28743323974583607, "grad_norm": 0.7285405397415161, "learning_rate": 9.941900929206133e-06, "loss": 0.0275, "step": 34040 }, { "epoch": 0.28751767959299995, "grad_norm": 0.22417062520980835, "learning_rate": 9.941788868020707e-06, "loss": 0.0173, "step": 34050 }, { "epoch": 0.28760211944016384, "grad_norm": 0.22967474162578583, "learning_rate": 9.941676699500637e-06, "loss": 0.0118, "step": 34060 }, { "epoch": 0.28768655928732767, "grad_norm": 0.39906221628189087, "learning_rate": 9.941564423648365e-06, "loss": 0.0114, "step": 34070 }, { "epoch": 0.28777099913449156, "grad_norm": 0.34106147289276123, "learning_rate": 9.941452040466325e-06, "loss": 0.0147, "step": 34080 }, { "epoch": 0.28785543898165544, "grad_norm": 0.5780034065246582, "learning_rate": 9.941339549956957e-06, "loss": 0.0226, "step": 34090 }, { "epoch": 0.28793987882881933, "grad_norm": 0.24200908839702606, "learning_rate": 9.941226952122709e-06, "loss": 0.0182, "step": 34100 }, { "epoch": 0.2880243186759832, "grad_norm": 0.01810551807284355, "learning_rate": 9.941114246966024e-06, "loss": 0.0193, "step": 34110 }, { "epoch": 0.28810875852314705, "grad_norm": 0.5057688355445862, "learning_rate": 9.94100143448935e-06, "loss": 0.016, "step": 34120 }, { "epoch": 0.28819319837031093, "grad_norm": 0.32918253540992737, "learning_rate": 9.940888514695136e-06, "loss": 0.0135, "step": 34130 }, { "epoch": 0.2882776382174748, "grad_norm": 0.6731421947479248, "learning_rate": 9.940775487585837e-06, "loss": 0.0168, "step": 34140 }, { "epoch": 0.2883620780646387, "grad_norm": 1.0464699268341064, "learning_rate": 9.940662353163908e-06, "loss": 0.0298, "step": 34150 }, { "epoch": 0.2884465179118026, "grad_norm": 0.6818262934684753, "learning_rate": 9.940549111431804e-06, "loss": 0.018, "step": 34160 }, { "epoch": 0.2885309577589665, "grad_norm": 0.458330899477005, "learning_rate": 9.940435762391985e-06, "loss": 0.0248, "step": 34170 }, { "epoch": 0.2886153976061303, "grad_norm": 0.06804860383272171, "learning_rate": 9.940322306046914e-06, "loss": 0.0194, "step": 34180 }, { "epoch": 0.2886998374532942, "grad_norm": 0.683181643486023, "learning_rate": 9.940208742399056e-06, "loss": 0.022, "step": 34190 }, { "epoch": 0.2887842773004581, "grad_norm": 0.5289031267166138, "learning_rate": 9.940095071450874e-06, "loss": 0.0157, "step": 34200 }, { "epoch": 0.28886871714762197, "grad_norm": 0.4286113381385803, "learning_rate": 9.939981293204842e-06, "loss": 0.0186, "step": 34210 }, { "epoch": 0.28895315699478585, "grad_norm": 0.5045250058174133, "learning_rate": 9.939867407663428e-06, "loss": 0.0155, "step": 34220 }, { "epoch": 0.28903759684194974, "grad_norm": 0.7336569428443909, "learning_rate": 9.939753414829105e-06, "loss": 0.0225, "step": 34230 }, { "epoch": 0.28912203668911357, "grad_norm": 0.4570603668689728, "learning_rate": 9.939639314704352e-06, "loss": 0.0205, "step": 34240 }, { "epoch": 0.28920647653627746, "grad_norm": 0.6225149631500244, "learning_rate": 9.939525107291643e-06, "loss": 0.0139, "step": 34250 }, { "epoch": 0.28929091638344134, "grad_norm": 0.5498300194740295, "learning_rate": 9.939410792593463e-06, "loss": 0.0222, "step": 34260 }, { "epoch": 0.28937535623060523, "grad_norm": 0.6519505381584167, "learning_rate": 9.939296370612292e-06, "loss": 0.0265, "step": 34270 }, { "epoch": 0.2894597960777691, "grad_norm": 0.4411297142505646, "learning_rate": 9.939181841350616e-06, "loss": 0.0086, "step": 34280 }, { "epoch": 0.289544235924933, "grad_norm": 0.6570820808410645, "learning_rate": 9.939067204810921e-06, "loss": 0.0145, "step": 34290 }, { "epoch": 0.28962867577209683, "grad_norm": 1.1086894273757935, "learning_rate": 9.9389524609957e-06, "loss": 0.0196, "step": 34300 }, { "epoch": 0.2897131156192607, "grad_norm": 0.161941796541214, "learning_rate": 9.938837609907444e-06, "loss": 0.0217, "step": 34310 }, { "epoch": 0.2897975554664246, "grad_norm": 0.6610442399978638, "learning_rate": 9.938722651548646e-06, "loss": 0.0181, "step": 34320 }, { "epoch": 0.2898819953135885, "grad_norm": 0.42925533652305603, "learning_rate": 9.938607585921806e-06, "loss": 0.0143, "step": 34330 }, { "epoch": 0.2899664351607524, "grad_norm": 0.581007719039917, "learning_rate": 9.93849241302942e-06, "loss": 0.0194, "step": 34340 }, { "epoch": 0.2900508750079162, "grad_norm": 0.41634276509284973, "learning_rate": 9.93837713287399e-06, "loss": 0.017, "step": 34350 }, { "epoch": 0.2901353148550801, "grad_norm": 0.7543720006942749, "learning_rate": 9.938261745458023e-06, "loss": 0.0204, "step": 34360 }, { "epoch": 0.290219754702244, "grad_norm": 0.3917893171310425, "learning_rate": 9.93814625078402e-06, "loss": 0.0223, "step": 34370 }, { "epoch": 0.29030419454940787, "grad_norm": 0.8314948678016663, "learning_rate": 9.938030648854493e-06, "loss": 0.0162, "step": 34380 }, { "epoch": 0.29038863439657175, "grad_norm": 0.9018453359603882, "learning_rate": 9.937914939671953e-06, "loss": 0.0128, "step": 34390 }, { "epoch": 0.29047307424373564, "grad_norm": 0.4656575918197632, "learning_rate": 9.937799123238912e-06, "loss": 0.0143, "step": 34400 }, { "epoch": 0.29055751409089947, "grad_norm": 0.24446897208690643, "learning_rate": 9.937683199557886e-06, "loss": 0.0245, "step": 34410 }, { "epoch": 0.29064195393806336, "grad_norm": 0.15104277431964874, "learning_rate": 9.937567168631393e-06, "loss": 0.0079, "step": 34420 }, { "epoch": 0.29072639378522724, "grad_norm": 0.6478402018547058, "learning_rate": 9.937451030461953e-06, "loss": 0.0351, "step": 34430 }, { "epoch": 0.29081083363239113, "grad_norm": 0.30958181619644165, "learning_rate": 9.937334785052087e-06, "loss": 0.0201, "step": 34440 }, { "epoch": 0.290895273479555, "grad_norm": 0.37877610325813293, "learning_rate": 9.937218432404324e-06, "loss": 0.0197, "step": 34450 }, { "epoch": 0.2909797133267189, "grad_norm": 0.1390691101551056, "learning_rate": 9.937101972521186e-06, "loss": 0.0179, "step": 34460 }, { "epoch": 0.29106415317388273, "grad_norm": 0.8052226305007935, "learning_rate": 9.936985405405206e-06, "loss": 0.02, "step": 34470 }, { "epoch": 0.2911485930210466, "grad_norm": 0.28104764223098755, "learning_rate": 9.936868731058913e-06, "loss": 0.0226, "step": 34480 }, { "epoch": 0.2912330328682105, "grad_norm": 1.23298180103302, "learning_rate": 9.936751949484844e-06, "loss": 0.0148, "step": 34490 }, { "epoch": 0.2913174727153744, "grad_norm": 0.31622979044914246, "learning_rate": 9.936635060685535e-06, "loss": 0.0186, "step": 34500 }, { "epoch": 0.2914019125625383, "grad_norm": 0.5252867937088013, "learning_rate": 9.936518064663523e-06, "loss": 0.0164, "step": 34510 }, { "epoch": 0.29148635240970217, "grad_norm": 0.7932857871055603, "learning_rate": 9.93640096142135e-06, "loss": 0.0248, "step": 34520 }, { "epoch": 0.291570792256866, "grad_norm": 0.5126418471336365, "learning_rate": 9.936283750961561e-06, "loss": 0.013, "step": 34530 }, { "epoch": 0.2916552321040299, "grad_norm": 0.7027820944786072, "learning_rate": 9.9361664332867e-06, "loss": 0.0246, "step": 34540 }, { "epoch": 0.29173967195119377, "grad_norm": 1.0353410243988037, "learning_rate": 9.936049008399316e-06, "loss": 0.0268, "step": 34550 }, { "epoch": 0.29182411179835765, "grad_norm": 0.2027282416820526, "learning_rate": 9.935931476301958e-06, "loss": 0.0188, "step": 34560 }, { "epoch": 0.29190855164552154, "grad_norm": 0.6428461074829102, "learning_rate": 9.935813836997178e-06, "loss": 0.0219, "step": 34570 }, { "epoch": 0.29199299149268537, "grad_norm": 0.4545585811138153, "learning_rate": 9.935696090487537e-06, "loss": 0.0215, "step": 34580 }, { "epoch": 0.29207743133984926, "grad_norm": 0.5144450068473816, "learning_rate": 9.935578236775587e-06, "loss": 0.0202, "step": 34590 }, { "epoch": 0.29216187118701314, "grad_norm": 0.7460724115371704, "learning_rate": 9.935460275863888e-06, "loss": 0.015, "step": 34600 }, { "epoch": 0.29224631103417703, "grad_norm": 0.24706047773361206, "learning_rate": 9.935342207755003e-06, "loss": 0.0164, "step": 34610 }, { "epoch": 0.2923307508813409, "grad_norm": 0.3304809629917145, "learning_rate": 9.935224032451497e-06, "loss": 0.0147, "step": 34620 }, { "epoch": 0.2924151907285048, "grad_norm": 0.45086896419525146, "learning_rate": 9.935105749955937e-06, "loss": 0.017, "step": 34630 }, { "epoch": 0.29249963057566863, "grad_norm": 0.49767595529556274, "learning_rate": 9.934987360270892e-06, "loss": 0.0133, "step": 34640 }, { "epoch": 0.2925840704228325, "grad_norm": 0.0636424571275711, "learning_rate": 9.934868863398933e-06, "loss": 0.009, "step": 34650 }, { "epoch": 0.2926685102699964, "grad_norm": 0.2713453471660614, "learning_rate": 9.934750259342632e-06, "loss": 0.0171, "step": 34660 }, { "epoch": 0.2927529501171603, "grad_norm": 0.15581531822681427, "learning_rate": 9.934631548104566e-06, "loss": 0.0062, "step": 34670 }, { "epoch": 0.2928373899643242, "grad_norm": 1.267543911933899, "learning_rate": 9.934512729687316e-06, "loss": 0.0219, "step": 34680 }, { "epoch": 0.29292182981148807, "grad_norm": 0.6625593304634094, "learning_rate": 9.93439380409346e-06, "loss": 0.0243, "step": 34690 }, { "epoch": 0.2930062696586519, "grad_norm": 0.7027706503868103, "learning_rate": 9.93427477132558e-06, "loss": 0.0219, "step": 34700 }, { "epoch": 0.2930907095058158, "grad_norm": 0.9403373003005981, "learning_rate": 9.934155631386264e-06, "loss": 0.0266, "step": 34710 }, { "epoch": 0.29317514935297967, "grad_norm": 0.13214223086833954, "learning_rate": 9.934036384278101e-06, "loss": 0.0342, "step": 34720 }, { "epoch": 0.29325958920014356, "grad_norm": 0.4401015639305115, "learning_rate": 9.933917030003675e-06, "loss": 0.0116, "step": 34730 }, { "epoch": 0.29334402904730744, "grad_norm": 0.12838435173034668, "learning_rate": 9.933797568565585e-06, "loss": 0.0247, "step": 34740 }, { "epoch": 0.29342846889447133, "grad_norm": 0.5556982755661011, "learning_rate": 9.933677999966423e-06, "loss": 0.0187, "step": 34750 }, { "epoch": 0.29351290874163516, "grad_norm": 0.6442356109619141, "learning_rate": 9.933558324208785e-06, "loss": 0.0213, "step": 34760 }, { "epoch": 0.29359734858879905, "grad_norm": 0.38094907999038696, "learning_rate": 9.933438541295271e-06, "loss": 0.0233, "step": 34770 }, { "epoch": 0.29368178843596293, "grad_norm": 0.622662365436554, "learning_rate": 9.933318651228482e-06, "loss": 0.0166, "step": 34780 }, { "epoch": 0.2937662282831268, "grad_norm": 2.3654184341430664, "learning_rate": 9.933198654011025e-06, "loss": 0.026, "step": 34790 }, { "epoch": 0.2938506681302907, "grad_norm": 0.5330840349197388, "learning_rate": 9.933078549645503e-06, "loss": 0.0158, "step": 34800 }, { "epoch": 0.29393510797745453, "grad_norm": 0.8337697982788086, "learning_rate": 9.932958338134526e-06, "loss": 0.0164, "step": 34810 }, { "epoch": 0.2940195478246184, "grad_norm": 0.46286898851394653, "learning_rate": 9.932838019480704e-06, "loss": 0.0182, "step": 34820 }, { "epoch": 0.2941039876717823, "grad_norm": 0.06580062955617905, "learning_rate": 9.932717593686652e-06, "loss": 0.0167, "step": 34830 }, { "epoch": 0.2941884275189462, "grad_norm": 0.2900283932685852, "learning_rate": 9.932597060754982e-06, "loss": 0.0097, "step": 34840 }, { "epoch": 0.2942728673661101, "grad_norm": 0.3170382082462311, "learning_rate": 9.932476420688317e-06, "loss": 0.0255, "step": 34850 }, { "epoch": 0.29435730721327397, "grad_norm": 0.04672568291425705, "learning_rate": 9.932355673489276e-06, "loss": 0.019, "step": 34860 }, { "epoch": 0.2944417470604378, "grad_norm": 0.8186824321746826, "learning_rate": 9.932234819160479e-06, "loss": 0.023, "step": 34870 }, { "epoch": 0.2945261869076017, "grad_norm": 0.6673487424850464, "learning_rate": 9.932113857704553e-06, "loss": 0.0379, "step": 34880 }, { "epoch": 0.29461062675476557, "grad_norm": 0.4242026209831238, "learning_rate": 9.931992789124124e-06, "loss": 0.0169, "step": 34890 }, { "epoch": 0.29469506660192946, "grad_norm": 0.21637022495269775, "learning_rate": 9.931871613421822e-06, "loss": 0.015, "step": 34900 }, { "epoch": 0.29477950644909334, "grad_norm": 0.6385898590087891, "learning_rate": 9.93175033060028e-06, "loss": 0.0196, "step": 34910 }, { "epoch": 0.29486394629625723, "grad_norm": 0.19650639593601227, "learning_rate": 9.931628940662133e-06, "loss": 0.0156, "step": 34920 }, { "epoch": 0.29494838614342106, "grad_norm": 0.4682471454143524, "learning_rate": 9.931507443610013e-06, "loss": 0.0213, "step": 34930 }, { "epoch": 0.29503282599058495, "grad_norm": 0.6018678545951843, "learning_rate": 9.931385839446563e-06, "loss": 0.016, "step": 34940 }, { "epoch": 0.29511726583774883, "grad_norm": 0.39413902163505554, "learning_rate": 9.931264128174424e-06, "loss": 0.0176, "step": 34950 }, { "epoch": 0.2952017056849127, "grad_norm": 0.4717532992362976, "learning_rate": 9.931142309796238e-06, "loss": 0.0159, "step": 34960 }, { "epoch": 0.2952861455320766, "grad_norm": 0.5881577730178833, "learning_rate": 9.931020384314652e-06, "loss": 0.0196, "step": 34970 }, { "epoch": 0.2953705853792405, "grad_norm": 0.3758423924446106, "learning_rate": 9.930898351732311e-06, "loss": 0.019, "step": 34980 }, { "epoch": 0.2954550252264043, "grad_norm": 0.5440946221351624, "learning_rate": 9.930776212051872e-06, "loss": 0.015, "step": 34990 }, { "epoch": 0.2955394650735682, "grad_norm": 0.3320407271385193, "learning_rate": 9.930653965275983e-06, "loss": 0.0183, "step": 35000 }, { "epoch": 0.2956239049207321, "grad_norm": 0.5294328927993774, "learning_rate": 9.9305316114073e-06, "loss": 0.0187, "step": 35010 }, { "epoch": 0.295708344767896, "grad_norm": 0.45203596353530884, "learning_rate": 9.93040915044848e-06, "loss": 0.0122, "step": 35020 }, { "epoch": 0.29579278461505987, "grad_norm": 0.5437349081039429, "learning_rate": 9.930286582402184e-06, "loss": 0.0202, "step": 35030 }, { "epoch": 0.2958772244622237, "grad_norm": 0.46780282258987427, "learning_rate": 9.930163907271073e-06, "loss": 0.014, "step": 35040 }, { "epoch": 0.2959616643093876, "grad_norm": 0.5247514843940735, "learning_rate": 9.930041125057813e-06, "loss": 0.0156, "step": 35050 }, { "epoch": 0.29604610415655147, "grad_norm": 0.48764029145240784, "learning_rate": 9.92991823576507e-06, "loss": 0.0155, "step": 35060 }, { "epoch": 0.29613054400371536, "grad_norm": 1.3401825428009033, "learning_rate": 9.929795239395515e-06, "loss": 0.0172, "step": 35070 }, { "epoch": 0.29621498385087924, "grad_norm": 0.48138490319252014, "learning_rate": 9.929672135951815e-06, "loss": 0.0215, "step": 35080 }, { "epoch": 0.29629942369804313, "grad_norm": 0.24732987582683563, "learning_rate": 9.929548925436648e-06, "loss": 0.0217, "step": 35090 }, { "epoch": 0.29638386354520696, "grad_norm": 0.7175500988960266, "learning_rate": 9.929425607852688e-06, "loss": 0.0183, "step": 35100 }, { "epoch": 0.29646830339237085, "grad_norm": 0.5914332270622253, "learning_rate": 9.929302183202612e-06, "loss": 0.0209, "step": 35110 }, { "epoch": 0.29655274323953473, "grad_norm": 0.6132344603538513, "learning_rate": 9.929178651489104e-06, "loss": 0.0172, "step": 35120 }, { "epoch": 0.2966371830866986, "grad_norm": 0.4280220568180084, "learning_rate": 9.929055012714847e-06, "loss": 0.0257, "step": 35130 }, { "epoch": 0.2967216229338625, "grad_norm": 0.678401529788971, "learning_rate": 9.928931266882525e-06, "loss": 0.0224, "step": 35140 }, { "epoch": 0.2968060627810264, "grad_norm": 0.4853875935077667, "learning_rate": 9.928807413994823e-06, "loss": 0.0221, "step": 35150 }, { "epoch": 0.2968905026281902, "grad_norm": 0.5566931366920471, "learning_rate": 9.928683454054438e-06, "loss": 0.0181, "step": 35160 }, { "epoch": 0.2969749424753541, "grad_norm": 0.17629724740982056, "learning_rate": 9.928559387064056e-06, "loss": 0.0126, "step": 35170 }, { "epoch": 0.297059382322518, "grad_norm": 0.32657745480537415, "learning_rate": 9.928435213026374e-06, "loss": 0.0157, "step": 35180 }, { "epoch": 0.2971438221696819, "grad_norm": 1.371277093887329, "learning_rate": 9.92831093194409e-06, "loss": 0.029, "step": 35190 }, { "epoch": 0.29722826201684577, "grad_norm": 0.6813676357269287, "learning_rate": 9.928186543819902e-06, "loss": 0.0189, "step": 35200 }, { "epoch": 0.2973127018640096, "grad_norm": 0.3628693222999573, "learning_rate": 9.928062048656513e-06, "loss": 0.0181, "step": 35210 }, { "epoch": 0.2973971417111735, "grad_norm": 0.41927748918533325, "learning_rate": 9.927937446456626e-06, "loss": 0.0164, "step": 35220 }, { "epoch": 0.29748158155833737, "grad_norm": 0.15806972980499268, "learning_rate": 9.927812737222947e-06, "loss": 0.0117, "step": 35230 }, { "epoch": 0.29756602140550126, "grad_norm": 0.33695000410079956, "learning_rate": 9.927687920958184e-06, "loss": 0.0131, "step": 35240 }, { "epoch": 0.29765046125266514, "grad_norm": 0.4801764190196991, "learning_rate": 9.92756299766505e-06, "loss": 0.0237, "step": 35250 }, { "epoch": 0.29773490109982903, "grad_norm": 0.427359402179718, "learning_rate": 9.927437967346258e-06, "loss": 0.01, "step": 35260 }, { "epoch": 0.29781934094699286, "grad_norm": 0.4909394681453705, "learning_rate": 9.927312830004524e-06, "loss": 0.0098, "step": 35270 }, { "epoch": 0.29790378079415675, "grad_norm": 0.3889472782611847, "learning_rate": 9.927187585642564e-06, "loss": 0.0173, "step": 35280 }, { "epoch": 0.29798822064132063, "grad_norm": 0.210465207695961, "learning_rate": 9.927062234263099e-06, "loss": 0.0174, "step": 35290 }, { "epoch": 0.2980726604884845, "grad_norm": 0.38557761907577515, "learning_rate": 9.926936775868852e-06, "loss": 0.024, "step": 35300 }, { "epoch": 0.2981571003356484, "grad_norm": 0.4365217387676239, "learning_rate": 9.926811210462549e-06, "loss": 0.0203, "step": 35310 }, { "epoch": 0.2982415401828123, "grad_norm": 0.7929326295852661, "learning_rate": 9.926685538046914e-06, "loss": 0.0165, "step": 35320 }, { "epoch": 0.2983259800299761, "grad_norm": 0.3540477752685547, "learning_rate": 9.92655975862468e-06, "loss": 0.0107, "step": 35330 }, { "epoch": 0.29841041987714, "grad_norm": 0.9425779581069946, "learning_rate": 9.926433872198578e-06, "loss": 0.0151, "step": 35340 }, { "epoch": 0.2984948597243039, "grad_norm": 0.2581638991832733, "learning_rate": 9.92630787877134e-06, "loss": 0.0129, "step": 35350 }, { "epoch": 0.2985792995714678, "grad_norm": 0.2471226155757904, "learning_rate": 9.926181778345705e-06, "loss": 0.0118, "step": 35360 }, { "epoch": 0.29866373941863167, "grad_norm": 0.09162265807390213, "learning_rate": 9.926055570924413e-06, "loss": 0.0138, "step": 35370 }, { "epoch": 0.29874817926579555, "grad_norm": 0.5212534666061401, "learning_rate": 9.925929256510203e-06, "loss": 0.014, "step": 35380 }, { "epoch": 0.2988326191129594, "grad_norm": 0.6552743315696716, "learning_rate": 9.925802835105818e-06, "loss": 0.0201, "step": 35390 }, { "epoch": 0.29891705896012327, "grad_norm": 0.7469278573989868, "learning_rate": 9.925676306714005e-06, "loss": 0.0291, "step": 35400 }, { "epoch": 0.29900149880728716, "grad_norm": 0.43605318665504456, "learning_rate": 9.925549671337512e-06, "loss": 0.0152, "step": 35410 }, { "epoch": 0.29908593865445104, "grad_norm": 0.5806717872619629, "learning_rate": 9.925422928979089e-06, "loss": 0.0207, "step": 35420 }, { "epoch": 0.29917037850161493, "grad_norm": 0.822399914264679, "learning_rate": 9.925296079641489e-06, "loss": 0.0158, "step": 35430 }, { "epoch": 0.29925481834877876, "grad_norm": 0.3075738549232483, "learning_rate": 9.92516912332747e-06, "loss": 0.0138, "step": 35440 }, { "epoch": 0.29933925819594265, "grad_norm": 0.3171154260635376, "learning_rate": 9.925042060039785e-06, "loss": 0.0281, "step": 35450 }, { "epoch": 0.29942369804310653, "grad_norm": 0.020285988226532936, "learning_rate": 9.924914889781195e-06, "loss": 0.0145, "step": 35460 }, { "epoch": 0.2995081378902704, "grad_norm": 0.48126575350761414, "learning_rate": 9.924787612554462e-06, "loss": 0.0294, "step": 35470 }, { "epoch": 0.2995925777374343, "grad_norm": 0.593376874923706, "learning_rate": 9.924660228362353e-06, "loss": 0.0237, "step": 35480 }, { "epoch": 0.2996770175845982, "grad_norm": 0.7403202056884766, "learning_rate": 9.924532737207633e-06, "loss": 0.0256, "step": 35490 }, { "epoch": 0.299761457431762, "grad_norm": 0.06741231679916382, "learning_rate": 9.924405139093072e-06, "loss": 0.0144, "step": 35500 }, { "epoch": 0.2998458972789259, "grad_norm": 0.6958538293838501, "learning_rate": 9.924277434021438e-06, "loss": 0.0197, "step": 35510 }, { "epoch": 0.2999303371260898, "grad_norm": 0.6169781684875488, "learning_rate": 9.924149621995508e-06, "loss": 0.0196, "step": 35520 }, { "epoch": 0.3000147769732537, "grad_norm": 0.46151965856552124, "learning_rate": 9.92402170301806e-06, "loss": 0.0141, "step": 35530 }, { "epoch": 0.30009921682041757, "grad_norm": 0.3812335431575775, "learning_rate": 9.923893677091867e-06, "loss": 0.0129, "step": 35540 }, { "epoch": 0.30018365666758146, "grad_norm": 0.2805618345737457, "learning_rate": 9.923765544219713e-06, "loss": 0.0137, "step": 35550 }, { "epoch": 0.3002680965147453, "grad_norm": 0.413728266954422, "learning_rate": 9.92363730440438e-06, "loss": 0.0168, "step": 35560 }, { "epoch": 0.3003525363619092, "grad_norm": 1.0352916717529297, "learning_rate": 9.923508957648655e-06, "loss": 0.0088, "step": 35570 }, { "epoch": 0.30043697620907306, "grad_norm": 0.13902735710144043, "learning_rate": 9.923380503955323e-06, "loss": 0.0146, "step": 35580 }, { "epoch": 0.30052141605623695, "grad_norm": 0.17753645777702332, "learning_rate": 9.923251943327174e-06, "loss": 0.0101, "step": 35590 }, { "epoch": 0.30060585590340083, "grad_norm": 0.43955427408218384, "learning_rate": 9.923123275767003e-06, "loss": 0.0114, "step": 35600 }, { "epoch": 0.3006902957505647, "grad_norm": 0.2891683876514435, "learning_rate": 9.922994501277605e-06, "loss": 0.0189, "step": 35610 }, { "epoch": 0.30077473559772855, "grad_norm": 0.5450543761253357, "learning_rate": 9.922865619861774e-06, "loss": 0.0094, "step": 35620 }, { "epoch": 0.30085917544489243, "grad_norm": 0.45525041222572327, "learning_rate": 9.92273663152231e-06, "loss": 0.014, "step": 35630 }, { "epoch": 0.3009436152920563, "grad_norm": 0.3606662154197693, "learning_rate": 9.922607536262015e-06, "loss": 0.0162, "step": 35640 }, { "epoch": 0.3010280551392202, "grad_norm": 0.4312153160572052, "learning_rate": 9.922478334083694e-06, "loss": 0.0094, "step": 35650 }, { "epoch": 0.3011124949863841, "grad_norm": 0.5036609768867493, "learning_rate": 9.922349024990153e-06, "loss": 0.0229, "step": 35660 }, { "epoch": 0.3011969348335479, "grad_norm": 0.5234929323196411, "learning_rate": 9.922219608984199e-06, "loss": 0.026, "step": 35670 }, { "epoch": 0.3012813746807118, "grad_norm": 0.5431053638458252, "learning_rate": 9.922090086068643e-06, "loss": 0.01, "step": 35680 }, { "epoch": 0.3013658145278757, "grad_norm": 0.7390519976615906, "learning_rate": 9.921960456246299e-06, "loss": 0.0298, "step": 35690 }, { "epoch": 0.3014502543750396, "grad_norm": 0.9376678466796875, "learning_rate": 9.921830719519983e-06, "loss": 0.018, "step": 35700 }, { "epoch": 0.30153469422220347, "grad_norm": 0.5399588346481323, "learning_rate": 9.921700875892512e-06, "loss": 0.0119, "step": 35710 }, { "epoch": 0.30161913406936736, "grad_norm": 0.5111792087554932, "learning_rate": 9.921570925366706e-06, "loss": 0.0109, "step": 35720 }, { "epoch": 0.3017035739165312, "grad_norm": 0.6712158918380737, "learning_rate": 9.921440867945388e-06, "loss": 0.0171, "step": 35730 }, { "epoch": 0.3017880137636951, "grad_norm": 0.14643387496471405, "learning_rate": 9.921310703631386e-06, "loss": 0.0196, "step": 35740 }, { "epoch": 0.30187245361085896, "grad_norm": 0.8247216939926147, "learning_rate": 9.92118043242752e-06, "loss": 0.0135, "step": 35750 }, { "epoch": 0.30195689345802285, "grad_norm": 0.47724154591560364, "learning_rate": 9.921050054336625e-06, "loss": 0.0147, "step": 35760 }, { "epoch": 0.30204133330518673, "grad_norm": 0.19168046116828918, "learning_rate": 9.920919569361533e-06, "loss": 0.0172, "step": 35770 }, { "epoch": 0.3021257731523506, "grad_norm": 1.035183072090149, "learning_rate": 9.920788977505073e-06, "loss": 0.0243, "step": 35780 }, { "epoch": 0.30221021299951445, "grad_norm": 1.8772780895233154, "learning_rate": 9.920658278770088e-06, "loss": 0.0218, "step": 35790 }, { "epoch": 0.30229465284667834, "grad_norm": 0.6082906126976013, "learning_rate": 9.920527473159411e-06, "loss": 0.0156, "step": 35800 }, { "epoch": 0.3023790926938422, "grad_norm": 0.17998862266540527, "learning_rate": 9.920396560675887e-06, "loss": 0.0098, "step": 35810 }, { "epoch": 0.3024635325410061, "grad_norm": 0.503464937210083, "learning_rate": 9.920265541322358e-06, "loss": 0.0155, "step": 35820 }, { "epoch": 0.30254797238817, "grad_norm": 0.8688198328018188, "learning_rate": 9.920134415101669e-06, "loss": 0.0179, "step": 35830 }, { "epoch": 0.3026324122353339, "grad_norm": 0.37124118208885193, "learning_rate": 9.920003182016669e-06, "loss": 0.0194, "step": 35840 }, { "epoch": 0.3027168520824977, "grad_norm": 0.5594615936279297, "learning_rate": 9.919871842070207e-06, "loss": 0.015, "step": 35850 }, { "epoch": 0.3028012919296616, "grad_norm": 0.5345581769943237, "learning_rate": 9.919740395265138e-06, "loss": 0.0154, "step": 35860 }, { "epoch": 0.3028857317768255, "grad_norm": 0.5151305198669434, "learning_rate": 9.919608841604317e-06, "loss": 0.0194, "step": 35870 }, { "epoch": 0.30297017162398937, "grad_norm": 0.42245039343833923, "learning_rate": 9.919477181090598e-06, "loss": 0.0223, "step": 35880 }, { "epoch": 0.30305461147115326, "grad_norm": 0.2967085838317871, "learning_rate": 9.919345413726844e-06, "loss": 0.0225, "step": 35890 }, { "epoch": 0.3031390513183171, "grad_norm": 0.38162505626678467, "learning_rate": 9.919213539515915e-06, "loss": 0.0118, "step": 35900 }, { "epoch": 0.303223491165481, "grad_norm": 0.5049235820770264, "learning_rate": 9.919081558460676e-06, "loss": 0.0215, "step": 35910 }, { "epoch": 0.30330793101264486, "grad_norm": 0.45778948068618774, "learning_rate": 9.918949470563994e-06, "loss": 0.0128, "step": 35920 }, { "epoch": 0.30339237085980875, "grad_norm": 0.2824270725250244, "learning_rate": 9.918817275828736e-06, "loss": 0.0221, "step": 35930 }, { "epoch": 0.30347681070697263, "grad_norm": 0.9916436076164246, "learning_rate": 9.918684974257777e-06, "loss": 0.0253, "step": 35940 }, { "epoch": 0.3035612505541365, "grad_norm": 0.6929193735122681, "learning_rate": 9.918552565853988e-06, "loss": 0.0237, "step": 35950 }, { "epoch": 0.30364569040130035, "grad_norm": 0.262925922870636, "learning_rate": 9.918420050620244e-06, "loss": 0.0144, "step": 35960 }, { "epoch": 0.30373013024846424, "grad_norm": 0.4626508951187134, "learning_rate": 9.918287428559424e-06, "loss": 0.0247, "step": 35970 }, { "epoch": 0.3038145700956281, "grad_norm": 0.7493857741355896, "learning_rate": 9.918154699674409e-06, "loss": 0.0271, "step": 35980 }, { "epoch": 0.303899009942792, "grad_norm": 0.23355117440223694, "learning_rate": 9.918021863968083e-06, "loss": 0.0168, "step": 35990 }, { "epoch": 0.3039834497899559, "grad_norm": 0.6955509781837463, "learning_rate": 9.917888921443329e-06, "loss": 0.0196, "step": 36000 }, { "epoch": 0.3040678896371198, "grad_norm": 0.46840018033981323, "learning_rate": 9.917755872103036e-06, "loss": 0.0136, "step": 36010 }, { "epoch": 0.3041523294842836, "grad_norm": 0.33867785334587097, "learning_rate": 9.917622715950092e-06, "loss": 0.022, "step": 36020 }, { "epoch": 0.3042367693314475, "grad_norm": 0.9784456491470337, "learning_rate": 9.917489452987392e-06, "loss": 0.0202, "step": 36030 }, { "epoch": 0.3043212091786114, "grad_norm": 0.47196221351623535, "learning_rate": 9.917356083217828e-06, "loss": 0.0243, "step": 36040 }, { "epoch": 0.30440564902577527, "grad_norm": 0.4477933347225189, "learning_rate": 9.917222606644296e-06, "loss": 0.0201, "step": 36050 }, { "epoch": 0.30449008887293916, "grad_norm": 0.22978802025318146, "learning_rate": 9.9170890232697e-06, "loss": 0.0177, "step": 36060 }, { "epoch": 0.30457452872010304, "grad_norm": 0.645344614982605, "learning_rate": 9.916955333096937e-06, "loss": 0.0184, "step": 36070 }, { "epoch": 0.3046589685672669, "grad_norm": 0.6952880024909973, "learning_rate": 9.916821536128908e-06, "loss": 0.0195, "step": 36080 }, { "epoch": 0.30474340841443076, "grad_norm": 1.369198203086853, "learning_rate": 9.916687632368527e-06, "loss": 0.0162, "step": 36090 }, { "epoch": 0.30482784826159465, "grad_norm": 0.7585639357566833, "learning_rate": 9.916553621818697e-06, "loss": 0.0244, "step": 36100 }, { "epoch": 0.30491228810875853, "grad_norm": 0.15755623579025269, "learning_rate": 9.91641950448233e-06, "loss": 0.0073, "step": 36110 }, { "epoch": 0.3049967279559224, "grad_norm": 0.4046994745731354, "learning_rate": 9.916285280362338e-06, "loss": 0.0215, "step": 36120 }, { "epoch": 0.30508116780308625, "grad_norm": 0.525051474571228, "learning_rate": 9.916150949461638e-06, "loss": 0.0133, "step": 36130 }, { "epoch": 0.30516560765025014, "grad_norm": 0.36579716205596924, "learning_rate": 9.916016511783146e-06, "loss": 0.0167, "step": 36140 }, { "epoch": 0.305250047497414, "grad_norm": 0.5418617129325867, "learning_rate": 9.915881967329784e-06, "loss": 0.0175, "step": 36150 }, { "epoch": 0.3053344873445779, "grad_norm": 0.2535002827644348, "learning_rate": 9.915747316104472e-06, "loss": 0.017, "step": 36160 }, { "epoch": 0.3054189271917418, "grad_norm": 0.7531832456588745, "learning_rate": 9.915612558110136e-06, "loss": 0.0149, "step": 36170 }, { "epoch": 0.3055033670389057, "grad_norm": 0.09086893498897552, "learning_rate": 9.915477693349702e-06, "loss": 0.0115, "step": 36180 }, { "epoch": 0.3055878068860695, "grad_norm": 0.33410799503326416, "learning_rate": 9.9153427218261e-06, "loss": 0.019, "step": 36190 }, { "epoch": 0.3056722467332334, "grad_norm": 0.0952291265130043, "learning_rate": 9.915207643542262e-06, "loss": 0.0139, "step": 36200 }, { "epoch": 0.3057566865803973, "grad_norm": 0.2700866758823395, "learning_rate": 9.91507245850112e-06, "loss": 0.0144, "step": 36210 }, { "epoch": 0.30584112642756117, "grad_norm": 0.6409828662872314, "learning_rate": 9.914937166705613e-06, "loss": 0.017, "step": 36220 }, { "epoch": 0.30592556627472506, "grad_norm": 0.38831162452697754, "learning_rate": 9.914801768158677e-06, "loss": 0.021, "step": 36230 }, { "epoch": 0.30601000612188894, "grad_norm": 0.5890583992004395, "learning_rate": 9.914666262863254e-06, "loss": 0.0149, "step": 36240 }, { "epoch": 0.3060944459690528, "grad_norm": 1.0090044736862183, "learning_rate": 9.914530650822287e-06, "loss": 0.01, "step": 36250 }, { "epoch": 0.30617888581621666, "grad_norm": 0.8264241218566895, "learning_rate": 9.91439493203872e-06, "loss": 0.0173, "step": 36260 }, { "epoch": 0.30626332566338055, "grad_norm": 0.7712541818618774, "learning_rate": 9.914259106515503e-06, "loss": 0.0187, "step": 36270 }, { "epoch": 0.30634776551054443, "grad_norm": 0.37389662861824036, "learning_rate": 9.914123174255585e-06, "loss": 0.0286, "step": 36280 }, { "epoch": 0.3064322053577083, "grad_norm": 0.34914499521255493, "learning_rate": 9.91398713526192e-06, "loss": 0.0109, "step": 36290 }, { "epoch": 0.3065166452048722, "grad_norm": 0.18348586559295654, "learning_rate": 9.91385098953746e-06, "loss": 0.018, "step": 36300 }, { "epoch": 0.30660108505203604, "grad_norm": 0.05291954055428505, "learning_rate": 9.913714737085163e-06, "loss": 0.019, "step": 36310 }, { "epoch": 0.3066855248991999, "grad_norm": 0.3954460322856903, "learning_rate": 9.91357837790799e-06, "loss": 0.0137, "step": 36320 }, { "epoch": 0.3067699647463638, "grad_norm": 0.45117294788360596, "learning_rate": 9.913441912008902e-06, "loss": 0.0124, "step": 36330 }, { "epoch": 0.3068544045935277, "grad_norm": 0.7633441090583801, "learning_rate": 9.913305339390861e-06, "loss": 0.0184, "step": 36340 }, { "epoch": 0.3069388444406916, "grad_norm": 0.4607831835746765, "learning_rate": 9.913168660056836e-06, "loss": 0.0134, "step": 36350 }, { "epoch": 0.3070232842878554, "grad_norm": 0.25486600399017334, "learning_rate": 9.913031874009794e-06, "loss": 0.0173, "step": 36360 }, { "epoch": 0.3071077241350193, "grad_norm": 0.8232319951057434, "learning_rate": 9.912894981252707e-06, "loss": 0.0164, "step": 36370 }, { "epoch": 0.3071921639821832, "grad_norm": 0.6589977741241455, "learning_rate": 9.912757981788547e-06, "loss": 0.024, "step": 36380 }, { "epoch": 0.3072766038293471, "grad_norm": 0.3674556612968445, "learning_rate": 9.912620875620291e-06, "loss": 0.0278, "step": 36390 }, { "epoch": 0.30736104367651096, "grad_norm": 0.18435996770858765, "learning_rate": 9.912483662750917e-06, "loss": 0.0115, "step": 36400 }, { "epoch": 0.30744548352367485, "grad_norm": 0.42162930965423584, "learning_rate": 9.912346343183402e-06, "loss": 0.0247, "step": 36410 }, { "epoch": 0.3075299233708387, "grad_norm": 0.960359513759613, "learning_rate": 9.912208916920733e-06, "loss": 0.0279, "step": 36420 }, { "epoch": 0.30761436321800256, "grad_norm": 0.501909077167511, "learning_rate": 9.912071383965892e-06, "loss": 0.0186, "step": 36430 }, { "epoch": 0.30769880306516645, "grad_norm": 0.030123431235551834, "learning_rate": 9.911933744321867e-06, "loss": 0.0153, "step": 36440 }, { "epoch": 0.30778324291233033, "grad_norm": 0.24226374924182892, "learning_rate": 9.911795997991649e-06, "loss": 0.0205, "step": 36450 }, { "epoch": 0.3078676827594942, "grad_norm": 0.9027159214019775, "learning_rate": 9.911658144978227e-06, "loss": 0.0231, "step": 36460 }, { "epoch": 0.3079521226066581, "grad_norm": 0.66849285364151, "learning_rate": 9.911520185284598e-06, "loss": 0.0238, "step": 36470 }, { "epoch": 0.30803656245382194, "grad_norm": 0.3317207992076874, "learning_rate": 9.911382118913757e-06, "loss": 0.0185, "step": 36480 }, { "epoch": 0.3081210023009858, "grad_norm": 0.3610399067401886, "learning_rate": 9.9112439458687e-06, "loss": 0.0152, "step": 36490 }, { "epoch": 0.3082054421481497, "grad_norm": 0.5433700084686279, "learning_rate": 9.911105666152434e-06, "loss": 0.0145, "step": 36500 }, { "epoch": 0.3082898819953136, "grad_norm": 0.8196445107460022, "learning_rate": 9.910967279767957e-06, "loss": 0.0162, "step": 36510 }, { "epoch": 0.3083743218424775, "grad_norm": 0.28700098395347595, "learning_rate": 9.910828786718279e-06, "loss": 0.0239, "step": 36520 }, { "epoch": 0.3084587616896413, "grad_norm": 0.4552580416202545, "learning_rate": 9.910690187006407e-06, "loss": 0.0207, "step": 36530 }, { "epoch": 0.3085432015368052, "grad_norm": 0.5773892402648926, "learning_rate": 9.910551480635348e-06, "loss": 0.0169, "step": 36540 }, { "epoch": 0.3086276413839691, "grad_norm": 0.18601778149604797, "learning_rate": 9.910412667608118e-06, "loss": 0.0246, "step": 36550 }, { "epoch": 0.308712081231133, "grad_norm": 0.8542512059211731, "learning_rate": 9.91027374792773e-06, "loss": 0.0167, "step": 36560 }, { "epoch": 0.30879652107829686, "grad_norm": 0.5376288294792175, "learning_rate": 9.910134721597205e-06, "loss": 0.0201, "step": 36570 }, { "epoch": 0.30888096092546075, "grad_norm": 0.029440918937325478, "learning_rate": 9.909995588619558e-06, "loss": 0.0241, "step": 36580 }, { "epoch": 0.3089654007726246, "grad_norm": 0.6609123945236206, "learning_rate": 9.909856348997813e-06, "loss": 0.0228, "step": 36590 }, { "epoch": 0.30904984061978846, "grad_norm": 0.2357044517993927, "learning_rate": 9.909717002734996e-06, "loss": 0.0163, "step": 36600 }, { "epoch": 0.30913428046695235, "grad_norm": 0.4717057943344116, "learning_rate": 9.909577549834129e-06, "loss": 0.024, "step": 36610 }, { "epoch": 0.30921872031411624, "grad_norm": 0.41089338064193726, "learning_rate": 9.909437990298245e-06, "loss": 0.0204, "step": 36620 }, { "epoch": 0.3093031601612801, "grad_norm": 0.5416595339775085, "learning_rate": 9.909298324130374e-06, "loss": 0.0149, "step": 36630 }, { "epoch": 0.309387600008444, "grad_norm": 0.42616626620292664, "learning_rate": 9.90915855133355e-06, "loss": 0.0171, "step": 36640 }, { "epoch": 0.30947203985560784, "grad_norm": 1.2785861492156982, "learning_rate": 9.909018671910809e-06, "loss": 0.0242, "step": 36650 }, { "epoch": 0.3095564797027717, "grad_norm": 1.1252942085266113, "learning_rate": 9.908878685865187e-06, "loss": 0.0217, "step": 36660 }, { "epoch": 0.3096409195499356, "grad_norm": 0.41756516695022583, "learning_rate": 9.908738593199726e-06, "loss": 0.017, "step": 36670 }, { "epoch": 0.3097253593970995, "grad_norm": 0.2292490005493164, "learning_rate": 9.908598393917468e-06, "loss": 0.0213, "step": 36680 }, { "epoch": 0.3098097992442634, "grad_norm": 0.5165053009986877, "learning_rate": 9.908458088021458e-06, "loss": 0.0185, "step": 36690 }, { "epoch": 0.30989423909142727, "grad_norm": 0.6979303956031799, "learning_rate": 9.908317675514746e-06, "loss": 0.0157, "step": 36700 }, { "epoch": 0.3099786789385911, "grad_norm": 0.3267558217048645, "learning_rate": 9.90817715640038e-06, "loss": 0.0118, "step": 36710 }, { "epoch": 0.310063118785755, "grad_norm": 0.06113617494702339, "learning_rate": 9.908036530681411e-06, "loss": 0.0169, "step": 36720 }, { "epoch": 0.3101475586329189, "grad_norm": 0.5393148064613342, "learning_rate": 9.907895798360894e-06, "loss": 0.0209, "step": 36730 }, { "epoch": 0.31023199848008276, "grad_norm": 0.4711197316646576, "learning_rate": 9.907754959441888e-06, "loss": 0.0101, "step": 36740 }, { "epoch": 0.31031643832724665, "grad_norm": 0.6764824986457825, "learning_rate": 9.907614013927447e-06, "loss": 0.0173, "step": 36750 }, { "epoch": 0.3104008781744105, "grad_norm": 0.41358494758605957, "learning_rate": 9.90747296182064e-06, "loss": 0.0191, "step": 36760 }, { "epoch": 0.31048531802157436, "grad_norm": 0.7698579430580139, "learning_rate": 9.90733180312452e-06, "loss": 0.0141, "step": 36770 }, { "epoch": 0.31056975786873825, "grad_norm": 0.28188231587409973, "learning_rate": 9.907190537842162e-06, "loss": 0.0144, "step": 36780 }, { "epoch": 0.31065419771590214, "grad_norm": 0.5176096558570862, "learning_rate": 9.907049165976632e-06, "loss": 0.024, "step": 36790 }, { "epoch": 0.310738637563066, "grad_norm": 0.4612256586551666, "learning_rate": 9.906907687530998e-06, "loss": 0.0187, "step": 36800 }, { "epoch": 0.3108230774102299, "grad_norm": 0.2988598048686981, "learning_rate": 9.906766102508336e-06, "loss": 0.0112, "step": 36810 }, { "epoch": 0.31090751725739374, "grad_norm": 0.4280484914779663, "learning_rate": 9.906624410911719e-06, "loss": 0.0189, "step": 36820 }, { "epoch": 0.3109919571045576, "grad_norm": 0.4771469235420227, "learning_rate": 9.906482612744223e-06, "loss": 0.0113, "step": 36830 }, { "epoch": 0.3110763969517215, "grad_norm": 0.5548416376113892, "learning_rate": 9.906340708008933e-06, "loss": 0.0116, "step": 36840 }, { "epoch": 0.3111608367988854, "grad_norm": 0.11100199818611145, "learning_rate": 9.906198696708926e-06, "loss": 0.0081, "step": 36850 }, { "epoch": 0.3112452766460493, "grad_norm": 0.4971669912338257, "learning_rate": 9.906056578847291e-06, "loss": 0.0244, "step": 36860 }, { "epoch": 0.31132971649321317, "grad_norm": 1.6382628679275513, "learning_rate": 9.905914354427111e-06, "loss": 0.0121, "step": 36870 }, { "epoch": 0.311414156340377, "grad_norm": 0.4725772738456726, "learning_rate": 9.905772023451477e-06, "loss": 0.0172, "step": 36880 }, { "epoch": 0.3114985961875409, "grad_norm": 0.2445540428161621, "learning_rate": 9.905629585923479e-06, "loss": 0.0165, "step": 36890 }, { "epoch": 0.3115830360347048, "grad_norm": 0.6951415538787842, "learning_rate": 9.905487041846214e-06, "loss": 0.0128, "step": 36900 }, { "epoch": 0.31166747588186866, "grad_norm": 0.5437749624252319, "learning_rate": 9.905344391222774e-06, "loss": 0.0275, "step": 36910 }, { "epoch": 0.31175191572903255, "grad_norm": 0.916522204875946, "learning_rate": 9.90520163405626e-06, "loss": 0.0197, "step": 36920 }, { "epoch": 0.31183635557619643, "grad_norm": 0.3906140923500061, "learning_rate": 9.90505877034977e-06, "loss": 0.0195, "step": 36930 }, { "epoch": 0.31192079542336026, "grad_norm": 1.0348215103149414, "learning_rate": 9.904915800106409e-06, "loss": 0.0154, "step": 36940 }, { "epoch": 0.31200523527052415, "grad_norm": 0.6498649716377258, "learning_rate": 9.904772723329283e-06, "loss": 0.0333, "step": 36950 }, { "epoch": 0.31208967511768804, "grad_norm": 0.8054680228233337, "learning_rate": 9.9046295400215e-06, "loss": 0.0212, "step": 36960 }, { "epoch": 0.3121741149648519, "grad_norm": 0.6872884631156921, "learning_rate": 9.904486250186165e-06, "loss": 0.0191, "step": 36970 }, { "epoch": 0.3122585548120158, "grad_norm": 0.3384917974472046, "learning_rate": 9.904342853826395e-06, "loss": 0.0172, "step": 36980 }, { "epoch": 0.31234299465917964, "grad_norm": 0.1399644911289215, "learning_rate": 9.904199350945304e-06, "loss": 0.0134, "step": 36990 }, { "epoch": 0.3124274345063435, "grad_norm": 0.3626551926136017, "learning_rate": 9.904055741546008e-06, "loss": 0.0176, "step": 37000 }, { "epoch": 0.3125118743535074, "grad_norm": 0.07570326328277588, "learning_rate": 9.903912025631625e-06, "loss": 0.0156, "step": 37010 }, { "epoch": 0.3125963142006713, "grad_norm": 0.27056726813316345, "learning_rate": 9.903768203205278e-06, "loss": 0.01, "step": 37020 }, { "epoch": 0.3126807540478352, "grad_norm": 0.8308244943618774, "learning_rate": 9.903624274270092e-06, "loss": 0.0222, "step": 37030 }, { "epoch": 0.31276519389499907, "grad_norm": 0.908872127532959, "learning_rate": 9.90348023882919e-06, "loss": 0.027, "step": 37040 }, { "epoch": 0.3128496337421629, "grad_norm": 0.28504517674446106, "learning_rate": 9.903336096885703e-06, "loss": 0.0146, "step": 37050 }, { "epoch": 0.3129340735893268, "grad_norm": 1.2642602920532227, "learning_rate": 9.90319184844276e-06, "loss": 0.0329, "step": 37060 }, { "epoch": 0.3130185134364907, "grad_norm": 0.49915775656700134, "learning_rate": 9.903047493503494e-06, "loss": 0.0182, "step": 37070 }, { "epoch": 0.31310295328365456, "grad_norm": 0.41990798711776733, "learning_rate": 9.902903032071044e-06, "loss": 0.0141, "step": 37080 }, { "epoch": 0.31318739313081845, "grad_norm": 0.8015621304512024, "learning_rate": 9.902758464148544e-06, "loss": 0.0231, "step": 37090 }, { "epoch": 0.31327183297798233, "grad_norm": 0.574648380279541, "learning_rate": 9.902613789739133e-06, "loss": 0.0114, "step": 37100 }, { "epoch": 0.31335627282514616, "grad_norm": 0.42183005809783936, "learning_rate": 9.902469008845955e-06, "loss": 0.0153, "step": 37110 }, { "epoch": 0.31344071267231005, "grad_norm": 0.8584659099578857, "learning_rate": 9.902324121472155e-06, "loss": 0.0155, "step": 37120 }, { "epoch": 0.31352515251947394, "grad_norm": 0.6287288069725037, "learning_rate": 9.902179127620881e-06, "loss": 0.0187, "step": 37130 }, { "epoch": 0.3136095923666378, "grad_norm": 0.24241307377815247, "learning_rate": 9.90203402729528e-06, "loss": 0.0155, "step": 37140 }, { "epoch": 0.3136940322138017, "grad_norm": 0.4265570044517517, "learning_rate": 9.901888820498507e-06, "loss": 0.019, "step": 37150 }, { "epoch": 0.3137784720609656, "grad_norm": 0.33983588218688965, "learning_rate": 9.90174350723371e-06, "loss": 0.0079, "step": 37160 }, { "epoch": 0.3138629119081294, "grad_norm": 0.3028276264667511, "learning_rate": 9.90159808750405e-06, "loss": 0.0145, "step": 37170 }, { "epoch": 0.3139473517552933, "grad_norm": 0.5536737442016602, "learning_rate": 9.901452561312683e-06, "loss": 0.0146, "step": 37180 }, { "epoch": 0.3140317916024572, "grad_norm": 0.3062298595905304, "learning_rate": 9.901306928662772e-06, "loss": 0.0179, "step": 37190 }, { "epoch": 0.3141162314496211, "grad_norm": 1.356179118156433, "learning_rate": 9.901161189557477e-06, "loss": 0.0185, "step": 37200 }, { "epoch": 0.314200671296785, "grad_norm": 0.8524938225746155, "learning_rate": 9.901015343999966e-06, "loss": 0.0215, "step": 37210 }, { "epoch": 0.3142851111439488, "grad_norm": 0.5953320264816284, "learning_rate": 9.900869391993408e-06, "loss": 0.0206, "step": 37220 }, { "epoch": 0.3143695509911127, "grad_norm": 0.29860037565231323, "learning_rate": 9.90072333354097e-06, "loss": 0.0178, "step": 37230 }, { "epoch": 0.3144539908382766, "grad_norm": 0.744928777217865, "learning_rate": 9.900577168645825e-06, "loss": 0.0181, "step": 37240 }, { "epoch": 0.31453843068544046, "grad_norm": 0.517539918422699, "learning_rate": 9.900430897311149e-06, "loss": 0.01, "step": 37250 }, { "epoch": 0.31462287053260435, "grad_norm": 0.35652321577072144, "learning_rate": 9.900284519540117e-06, "loss": 0.0158, "step": 37260 }, { "epoch": 0.31470731037976823, "grad_norm": 0.6896101832389832, "learning_rate": 9.900138035335908e-06, "loss": 0.0167, "step": 37270 }, { "epoch": 0.31479175022693207, "grad_norm": 0.38382139801979065, "learning_rate": 9.899991444701708e-06, "loss": 0.0257, "step": 37280 }, { "epoch": 0.31487619007409595, "grad_norm": 1.178059458732605, "learning_rate": 9.899844747640697e-06, "loss": 0.0197, "step": 37290 }, { "epoch": 0.31496062992125984, "grad_norm": 0.20851270854473114, "learning_rate": 9.899697944156062e-06, "loss": 0.02, "step": 37300 }, { "epoch": 0.3150450697684237, "grad_norm": 0.9811514616012573, "learning_rate": 9.899551034250992e-06, "loss": 0.0168, "step": 37310 }, { "epoch": 0.3151295096155876, "grad_norm": 0.6915039420127869, "learning_rate": 9.899404017928677e-06, "loss": 0.024, "step": 37320 }, { "epoch": 0.3152139494627515, "grad_norm": 0.6112104654312134, "learning_rate": 9.899256895192311e-06, "loss": 0.0124, "step": 37330 }, { "epoch": 0.31529838930991533, "grad_norm": 0.370953768491745, "learning_rate": 9.899109666045087e-06, "loss": 0.0104, "step": 37340 }, { "epoch": 0.3153828291570792, "grad_norm": 0.39208367466926575, "learning_rate": 9.898962330490208e-06, "loss": 0.0202, "step": 37350 }, { "epoch": 0.3154672690042431, "grad_norm": 1.4422497749328613, "learning_rate": 9.89881488853087e-06, "loss": 0.0233, "step": 37360 }, { "epoch": 0.315551708851407, "grad_norm": 1.3952702283859253, "learning_rate": 9.898667340170277e-06, "loss": 0.0218, "step": 37370 }, { "epoch": 0.3156361486985709, "grad_norm": 1.673000693321228, "learning_rate": 9.898519685411633e-06, "loss": 0.0249, "step": 37380 }, { "epoch": 0.31572058854573476, "grad_norm": 0.3141959011554718, "learning_rate": 9.898371924258146e-06, "loss": 0.0124, "step": 37390 }, { "epoch": 0.3158050283928986, "grad_norm": 0.7486371994018555, "learning_rate": 9.898224056713022e-06, "loss": 0.0259, "step": 37400 }, { "epoch": 0.3158894682400625, "grad_norm": 0.498839408159256, "learning_rate": 9.898076082779478e-06, "loss": 0.0211, "step": 37410 }, { "epoch": 0.31597390808722636, "grad_norm": 0.6437764763832092, "learning_rate": 9.897928002460724e-06, "loss": 0.0213, "step": 37420 }, { "epoch": 0.31605834793439025, "grad_norm": 0.5361313819885254, "learning_rate": 9.897779815759977e-06, "loss": 0.0147, "step": 37430 }, { "epoch": 0.31614278778155414, "grad_norm": 0.3259045481681824, "learning_rate": 9.897631522680457e-06, "loss": 0.0129, "step": 37440 }, { "epoch": 0.31622722762871797, "grad_norm": 0.45022061467170715, "learning_rate": 9.897483123225382e-06, "loss": 0.0113, "step": 37450 }, { "epoch": 0.31631166747588185, "grad_norm": 0.7380661964416504, "learning_rate": 9.89733461739798e-06, "loss": 0.0226, "step": 37460 }, { "epoch": 0.31639610732304574, "grad_norm": 1.0758076906204224, "learning_rate": 9.89718600520147e-06, "loss": 0.02, "step": 37470 }, { "epoch": 0.3164805471702096, "grad_norm": 0.36971890926361084, "learning_rate": 9.897037286639087e-06, "loss": 0.0187, "step": 37480 }, { "epoch": 0.3165649870173735, "grad_norm": 0.8278838992118835, "learning_rate": 9.896888461714057e-06, "loss": 0.0144, "step": 37490 }, { "epoch": 0.3166494268645374, "grad_norm": 0.45250895619392395, "learning_rate": 9.896739530429612e-06, "loss": 0.0189, "step": 37500 }, { "epoch": 0.31673386671170123, "grad_norm": 0.08885059505701065, "learning_rate": 9.896590492788988e-06, "loss": 0.0135, "step": 37510 }, { "epoch": 0.3168183065588651, "grad_norm": 0.7351047992706299, "learning_rate": 9.896441348795423e-06, "loss": 0.0206, "step": 37520 }, { "epoch": 0.316902746406029, "grad_norm": 0.23045051097869873, "learning_rate": 9.896292098452154e-06, "loss": 0.0108, "step": 37530 }, { "epoch": 0.3169871862531929, "grad_norm": 0.04505005106329918, "learning_rate": 9.896142741762425e-06, "loss": 0.03, "step": 37540 }, { "epoch": 0.3170716261003568, "grad_norm": 1.051215648651123, "learning_rate": 9.895993278729478e-06, "loss": 0.014, "step": 37550 }, { "epoch": 0.31715606594752066, "grad_norm": 0.2296217530965805, "learning_rate": 9.895843709356561e-06, "loss": 0.0155, "step": 37560 }, { "epoch": 0.3172405057946845, "grad_norm": 0.8150753378868103, "learning_rate": 9.895694033646921e-06, "loss": 0.0149, "step": 37570 }, { "epoch": 0.3173249456418484, "grad_norm": 0.46655112504959106, "learning_rate": 9.895544251603811e-06, "loss": 0.0191, "step": 37580 }, { "epoch": 0.31740938548901226, "grad_norm": 0.921753466129303, "learning_rate": 9.895394363230481e-06, "loss": 0.0154, "step": 37590 }, { "epoch": 0.31749382533617615, "grad_norm": 0.40902894735336304, "learning_rate": 9.895244368530192e-06, "loss": 0.0194, "step": 37600 }, { "epoch": 0.31757826518334004, "grad_norm": 0.3810778558254242, "learning_rate": 9.895094267506196e-06, "loss": 0.0182, "step": 37610 }, { "epoch": 0.3176627050305039, "grad_norm": 0.09483281522989273, "learning_rate": 9.894944060161756e-06, "loss": 0.0143, "step": 37620 }, { "epoch": 0.31774714487766775, "grad_norm": 1.4164499044418335, "learning_rate": 9.894793746500135e-06, "loss": 0.0121, "step": 37630 }, { "epoch": 0.31783158472483164, "grad_norm": 0.32712894678115845, "learning_rate": 9.894643326524595e-06, "loss": 0.0141, "step": 37640 }, { "epoch": 0.3179160245719955, "grad_norm": 0.6329855918884277, "learning_rate": 9.894492800238407e-06, "loss": 0.0158, "step": 37650 }, { "epoch": 0.3180004644191594, "grad_norm": 0.4025427997112274, "learning_rate": 9.894342167644836e-06, "loss": 0.0231, "step": 37660 }, { "epoch": 0.3180849042663233, "grad_norm": 0.19706588983535767, "learning_rate": 9.894191428747157e-06, "loss": 0.017, "step": 37670 }, { "epoch": 0.31816934411348713, "grad_norm": 0.013868716545403004, "learning_rate": 9.894040583548646e-06, "loss": 0.0093, "step": 37680 }, { "epoch": 0.318253783960651, "grad_norm": 1.2827668190002441, "learning_rate": 9.893889632052572e-06, "loss": 0.0194, "step": 37690 }, { "epoch": 0.3183382238078149, "grad_norm": 0.5044389367103577, "learning_rate": 9.893738574262221e-06, "loss": 0.0193, "step": 37700 }, { "epoch": 0.3184226636549788, "grad_norm": 0.4456021189689636, "learning_rate": 9.893587410180869e-06, "loss": 0.0143, "step": 37710 }, { "epoch": 0.3185071035021427, "grad_norm": 0.4061274230480194, "learning_rate": 9.893436139811803e-06, "loss": 0.0252, "step": 37720 }, { "epoch": 0.31859154334930656, "grad_norm": 0.43294280767440796, "learning_rate": 9.893284763158307e-06, "loss": 0.0122, "step": 37730 }, { "epoch": 0.3186759831964704, "grad_norm": 0.4828226864337921, "learning_rate": 9.893133280223666e-06, "loss": 0.0172, "step": 37740 }, { "epoch": 0.3187604230436343, "grad_norm": 0.9259418845176697, "learning_rate": 9.892981691011174e-06, "loss": 0.0116, "step": 37750 }, { "epoch": 0.31884486289079816, "grad_norm": 0.2615472078323364, "learning_rate": 9.892829995524124e-06, "loss": 0.0132, "step": 37760 }, { "epoch": 0.31892930273796205, "grad_norm": 1.0171362161636353, "learning_rate": 9.892678193765808e-06, "loss": 0.0211, "step": 37770 }, { "epoch": 0.31901374258512594, "grad_norm": 0.7626024484634399, "learning_rate": 9.892526285739523e-06, "loss": 0.012, "step": 37780 }, { "epoch": 0.3190981824322898, "grad_norm": 0.44947007298469543, "learning_rate": 9.892374271448573e-06, "loss": 0.0155, "step": 37790 }, { "epoch": 0.31918262227945365, "grad_norm": 0.6086024641990662, "learning_rate": 9.892222150896253e-06, "loss": 0.021, "step": 37800 }, { "epoch": 0.31926706212661754, "grad_norm": 0.32280203700065613, "learning_rate": 9.892069924085872e-06, "loss": 0.0193, "step": 37810 }, { "epoch": 0.3193515019737814, "grad_norm": 0.4257761538028717, "learning_rate": 9.891917591020735e-06, "loss": 0.0177, "step": 37820 }, { "epoch": 0.3194359418209453, "grad_norm": 0.47999075055122375, "learning_rate": 9.89176515170415e-06, "loss": 0.0134, "step": 37830 }, { "epoch": 0.3195203816681092, "grad_norm": 0.8340057730674744, "learning_rate": 9.891612606139426e-06, "loss": 0.0215, "step": 37840 }, { "epoch": 0.31960482151527303, "grad_norm": 0.8375966548919678, "learning_rate": 9.891459954329882e-06, "loss": 0.0244, "step": 37850 }, { "epoch": 0.3196892613624369, "grad_norm": 0.4144800305366516, "learning_rate": 9.891307196278829e-06, "loss": 0.0156, "step": 37860 }, { "epoch": 0.3197737012096008, "grad_norm": 0.26215383410453796, "learning_rate": 9.891154331989585e-06, "loss": 0.0131, "step": 37870 }, { "epoch": 0.3198581410567647, "grad_norm": 0.30529171228408813, "learning_rate": 9.891001361465473e-06, "loss": 0.0125, "step": 37880 }, { "epoch": 0.3199425809039286, "grad_norm": 0.37353694438934326, "learning_rate": 9.890848284709812e-06, "loss": 0.0099, "step": 37890 }, { "epoch": 0.32002702075109246, "grad_norm": 0.13498489558696747, "learning_rate": 9.89069510172593e-06, "loss": 0.0158, "step": 37900 }, { "epoch": 0.3201114605982563, "grad_norm": 0.8497453331947327, "learning_rate": 9.890541812517151e-06, "loss": 0.0202, "step": 37910 }, { "epoch": 0.3201959004454202, "grad_norm": 0.40925875306129456, "learning_rate": 9.890388417086807e-06, "loss": 0.017, "step": 37920 }, { "epoch": 0.32028034029258406, "grad_norm": 0.6930340528488159, "learning_rate": 9.890234915438229e-06, "loss": 0.0126, "step": 37930 }, { "epoch": 0.32036478013974795, "grad_norm": 0.6053873896598816, "learning_rate": 9.89008130757475e-06, "loss": 0.0194, "step": 37940 }, { "epoch": 0.32044921998691184, "grad_norm": 0.33997058868408203, "learning_rate": 9.889927593499707e-06, "loss": 0.0151, "step": 37950 }, { "epoch": 0.3205336598340757, "grad_norm": 0.253255158662796, "learning_rate": 9.889773773216439e-06, "loss": 0.0085, "step": 37960 }, { "epoch": 0.32061809968123955, "grad_norm": 0.46164047718048096, "learning_rate": 9.889619846728286e-06, "loss": 0.0125, "step": 37970 }, { "epoch": 0.32070253952840344, "grad_norm": 1.2523419857025146, "learning_rate": 9.889465814038593e-06, "loss": 0.0222, "step": 37980 }, { "epoch": 0.3207869793755673, "grad_norm": 0.8469717502593994, "learning_rate": 9.889311675150703e-06, "loss": 0.0224, "step": 37990 }, { "epoch": 0.3208714192227312, "grad_norm": 0.649960994720459, "learning_rate": 9.889157430067968e-06, "loss": 0.0222, "step": 38000 }, { "epoch": 0.3209558590698951, "grad_norm": 0.5092555284500122, "learning_rate": 9.889003078793733e-06, "loss": 0.0257, "step": 38010 }, { "epoch": 0.321040298917059, "grad_norm": 0.1953773945569992, "learning_rate": 9.888848621331354e-06, "loss": 0.0099, "step": 38020 }, { "epoch": 0.3211247387642228, "grad_norm": 0.7257287502288818, "learning_rate": 9.888694057684185e-06, "loss": 0.0201, "step": 38030 }, { "epoch": 0.3212091786113867, "grad_norm": 0.6561552882194519, "learning_rate": 9.888539387855582e-06, "loss": 0.0176, "step": 38040 }, { "epoch": 0.3212936184585506, "grad_norm": 0.2819553017616272, "learning_rate": 9.888384611848908e-06, "loss": 0.0158, "step": 38050 }, { "epoch": 0.3213780583057145, "grad_norm": 1.111884593963623, "learning_rate": 9.888229729667517e-06, "loss": 0.0329, "step": 38060 }, { "epoch": 0.32146249815287836, "grad_norm": 0.6537862420082092, "learning_rate": 9.888074741314782e-06, "loss": 0.0218, "step": 38070 }, { "epoch": 0.3215469380000422, "grad_norm": 0.6263417601585388, "learning_rate": 9.887919646794065e-06, "loss": 0.0105, "step": 38080 }, { "epoch": 0.3216313778472061, "grad_norm": 0.5826760530471802, "learning_rate": 9.887764446108732e-06, "loss": 0.016, "step": 38090 }, { "epoch": 0.32171581769436997, "grad_norm": 0.5410755276679993, "learning_rate": 9.88760913926216e-06, "loss": 0.0161, "step": 38100 }, { "epoch": 0.32180025754153385, "grad_norm": 0.17876042425632477, "learning_rate": 9.887453726257716e-06, "loss": 0.0179, "step": 38110 }, { "epoch": 0.32188469738869774, "grad_norm": 0.7391490340232849, "learning_rate": 9.88729820709878e-06, "loss": 0.0222, "step": 38120 }, { "epoch": 0.3219691372358616, "grad_norm": 0.30804216861724854, "learning_rate": 9.88714258178873e-06, "loss": 0.0138, "step": 38130 }, { "epoch": 0.32205357708302546, "grad_norm": 0.8192024827003479, "learning_rate": 9.886986850330943e-06, "loss": 0.0118, "step": 38140 }, { "epoch": 0.32213801693018934, "grad_norm": 1.1853222846984863, "learning_rate": 9.886831012728804e-06, "loss": 0.0156, "step": 38150 }, { "epoch": 0.32222245677735323, "grad_norm": 0.43645980954170227, "learning_rate": 9.886675068985697e-06, "loss": 0.0125, "step": 38160 }, { "epoch": 0.3223068966245171, "grad_norm": 0.48630398511886597, "learning_rate": 9.886519019105007e-06, "loss": 0.0215, "step": 38170 }, { "epoch": 0.322391336471681, "grad_norm": 0.15304119884967804, "learning_rate": 9.886362863090127e-06, "loss": 0.0101, "step": 38180 }, { "epoch": 0.3224757763188449, "grad_norm": 0.6117144227027893, "learning_rate": 9.886206600944447e-06, "loss": 0.0184, "step": 38190 }, { "epoch": 0.3225602161660087, "grad_norm": 0.4774325489997864, "learning_rate": 9.886050232671358e-06, "loss": 0.0146, "step": 38200 }, { "epoch": 0.3226446560131726, "grad_norm": 0.3919534981250763, "learning_rate": 9.885893758274263e-06, "loss": 0.025, "step": 38210 }, { "epoch": 0.3227290958603365, "grad_norm": 0.5680050849914551, "learning_rate": 9.885737177756554e-06, "loss": 0.0199, "step": 38220 }, { "epoch": 0.3228135357075004, "grad_norm": 0.634876012802124, "learning_rate": 9.885580491121635e-06, "loss": 0.0208, "step": 38230 }, { "epoch": 0.32289797555466426, "grad_norm": 0.23763012886047363, "learning_rate": 9.885423698372912e-06, "loss": 0.0212, "step": 38240 }, { "epoch": 0.32298241540182815, "grad_norm": 0.6927350759506226, "learning_rate": 9.885266799513784e-06, "loss": 0.0117, "step": 38250 }, { "epoch": 0.323066855248992, "grad_norm": 0.30693671107292175, "learning_rate": 9.885109794547663e-06, "loss": 0.0207, "step": 38260 }, { "epoch": 0.32315129509615587, "grad_norm": 0.7111374139785767, "learning_rate": 9.88495268347796e-06, "loss": 0.0201, "step": 38270 }, { "epoch": 0.32323573494331975, "grad_norm": 0.7159861326217651, "learning_rate": 9.884795466308084e-06, "loss": 0.0107, "step": 38280 }, { "epoch": 0.32332017479048364, "grad_norm": 0.3638116717338562, "learning_rate": 9.884638143041451e-06, "loss": 0.0147, "step": 38290 }, { "epoch": 0.3234046146376475, "grad_norm": 0.37360894680023193, "learning_rate": 9.88448071368148e-06, "loss": 0.0159, "step": 38300 }, { "epoch": 0.32348905448481136, "grad_norm": 0.6110513806343079, "learning_rate": 9.884323178231588e-06, "loss": 0.0201, "step": 38310 }, { "epoch": 0.32357349433197524, "grad_norm": 0.4705067574977875, "learning_rate": 9.884165536695197e-06, "loss": 0.0117, "step": 38320 }, { "epoch": 0.32365793417913913, "grad_norm": 0.21369044482707977, "learning_rate": 9.884007789075732e-06, "loss": 0.0128, "step": 38330 }, { "epoch": 0.323742374026303, "grad_norm": 0.7452293038368225, "learning_rate": 9.88384993537662e-06, "loss": 0.0253, "step": 38340 }, { "epoch": 0.3238268138734669, "grad_norm": 0.32155418395996094, "learning_rate": 9.883691975601287e-06, "loss": 0.0129, "step": 38350 }, { "epoch": 0.3239112537206308, "grad_norm": 0.13826215267181396, "learning_rate": 9.883533909753165e-06, "loss": 0.0142, "step": 38360 }, { "epoch": 0.3239956935677946, "grad_norm": 0.4582575857639313, "learning_rate": 9.883375737835685e-06, "loss": 0.0269, "step": 38370 }, { "epoch": 0.3240801334149585, "grad_norm": 0.6996370553970337, "learning_rate": 9.883217459852288e-06, "loss": 0.0144, "step": 38380 }, { "epoch": 0.3241645732621224, "grad_norm": 0.4133835434913635, "learning_rate": 9.883059075806408e-06, "loss": 0.0142, "step": 38390 }, { "epoch": 0.3242490131092863, "grad_norm": 0.11199208348989487, "learning_rate": 9.882900585701484e-06, "loss": 0.0106, "step": 38400 }, { "epoch": 0.32433345295645016, "grad_norm": 0.6018553972244263, "learning_rate": 9.88274198954096e-06, "loss": 0.0157, "step": 38410 }, { "epoch": 0.32441789280361405, "grad_norm": 0.15190330147743225, "learning_rate": 9.882583287328283e-06, "loss": 0.0161, "step": 38420 }, { "epoch": 0.3245023326507779, "grad_norm": 0.9225510954856873, "learning_rate": 9.882424479066894e-06, "loss": 0.0155, "step": 38430 }, { "epoch": 0.32458677249794177, "grad_norm": 0.21446651220321655, "learning_rate": 9.882265564760247e-06, "loss": 0.0172, "step": 38440 }, { "epoch": 0.32467121234510565, "grad_norm": 0.4632764756679535, "learning_rate": 9.882106544411793e-06, "loss": 0.0157, "step": 38450 }, { "epoch": 0.32475565219226954, "grad_norm": 0.10676569491624832, "learning_rate": 9.881947418024986e-06, "loss": 0.0179, "step": 38460 }, { "epoch": 0.3248400920394334, "grad_norm": 1.2231009006500244, "learning_rate": 9.88178818560328e-06, "loss": 0.0237, "step": 38470 }, { "epoch": 0.3249245318865973, "grad_norm": 0.4512556493282318, "learning_rate": 9.881628847150136e-06, "loss": 0.0208, "step": 38480 }, { "epoch": 0.32500897173376114, "grad_norm": 0.34672728180885315, "learning_rate": 9.881469402669015e-06, "loss": 0.0169, "step": 38490 }, { "epoch": 0.32509341158092503, "grad_norm": 0.1723146140575409, "learning_rate": 9.881309852163376e-06, "loss": 0.013, "step": 38500 }, { "epoch": 0.3251778514280889, "grad_norm": 0.3672271966934204, "learning_rate": 9.881150195636689e-06, "loss": 0.0144, "step": 38510 }, { "epoch": 0.3252622912752528, "grad_norm": 0.65793377161026, "learning_rate": 9.880990433092418e-06, "loss": 0.0205, "step": 38520 }, { "epoch": 0.3253467311224167, "grad_norm": 0.27473264932632446, "learning_rate": 9.880830564534038e-06, "loss": 0.02, "step": 38530 }, { "epoch": 0.3254311709695805, "grad_norm": 0.7231922745704651, "learning_rate": 9.880670589965019e-06, "loss": 0.026, "step": 38540 }, { "epoch": 0.3255156108167444, "grad_norm": 0.12913990020751953, "learning_rate": 9.880510509388832e-06, "loss": 0.0162, "step": 38550 }, { "epoch": 0.3256000506639083, "grad_norm": 0.4506487548351288, "learning_rate": 9.880350322808957e-06, "loss": 0.0111, "step": 38560 }, { "epoch": 0.3256844905110722, "grad_norm": 0.2437579184770584, "learning_rate": 9.880190030228873e-06, "loss": 0.0134, "step": 38570 }, { "epoch": 0.32576893035823606, "grad_norm": 0.25329527258872986, "learning_rate": 9.880029631652064e-06, "loss": 0.0143, "step": 38580 }, { "epoch": 0.32585337020539995, "grad_norm": 0.22194547951221466, "learning_rate": 9.879869127082008e-06, "loss": 0.0166, "step": 38590 }, { "epoch": 0.3259378100525638, "grad_norm": 0.12158586084842682, "learning_rate": 9.879708516522197e-06, "loss": 0.0129, "step": 38600 }, { "epoch": 0.32602224989972767, "grad_norm": 0.43394237756729126, "learning_rate": 9.879547799976118e-06, "loss": 0.0173, "step": 38610 }, { "epoch": 0.32610668974689155, "grad_norm": 0.36129242181777954, "learning_rate": 9.879386977447258e-06, "loss": 0.0184, "step": 38620 }, { "epoch": 0.32619112959405544, "grad_norm": 0.41957542300224304, "learning_rate": 9.879226048939114e-06, "loss": 0.0096, "step": 38630 }, { "epoch": 0.3262755694412193, "grad_norm": 0.0873279795050621, "learning_rate": 9.879065014455179e-06, "loss": 0.0132, "step": 38640 }, { "epoch": 0.3263600092883832, "grad_norm": 0.5906350612640381, "learning_rate": 9.878903873998952e-06, "loss": 0.0268, "step": 38650 }, { "epoch": 0.32644444913554704, "grad_norm": 0.2852047383785248, "learning_rate": 9.878742627573935e-06, "loss": 0.013, "step": 38660 }, { "epoch": 0.32652888898271093, "grad_norm": 0.475939005613327, "learning_rate": 9.878581275183625e-06, "loss": 0.028, "step": 38670 }, { "epoch": 0.3266133288298748, "grad_norm": 0.1633816957473755, "learning_rate": 9.87841981683153e-06, "loss": 0.0157, "step": 38680 }, { "epoch": 0.3266977686770387, "grad_norm": 0.42607924342155457, "learning_rate": 9.878258252521159e-06, "loss": 0.0115, "step": 38690 }, { "epoch": 0.3267822085242026, "grad_norm": 0.2740992307662964, "learning_rate": 9.878096582256013e-06, "loss": 0.0157, "step": 38700 }, { "epoch": 0.3268666483713665, "grad_norm": 0.39706897735595703, "learning_rate": 9.877934806039613e-06, "loss": 0.0348, "step": 38710 }, { "epoch": 0.3269510882185303, "grad_norm": 2.4698052406311035, "learning_rate": 9.877772923875469e-06, "loss": 0.0159, "step": 38720 }, { "epoch": 0.3270355280656942, "grad_norm": 0.29608991742134094, "learning_rate": 9.877610935767095e-06, "loss": 0.0109, "step": 38730 }, { "epoch": 0.3271199679128581, "grad_norm": 0.9781742095947266, "learning_rate": 9.87744884171801e-06, "loss": 0.0279, "step": 38740 }, { "epoch": 0.32720440776002196, "grad_norm": 0.3255426287651062, "learning_rate": 9.877286641731737e-06, "loss": 0.014, "step": 38750 }, { "epoch": 0.32728884760718585, "grad_norm": 0.1855112910270691, "learning_rate": 9.877124335811797e-06, "loss": 0.0158, "step": 38760 }, { "epoch": 0.3273732874543497, "grad_norm": 0.485100120306015, "learning_rate": 9.876961923961717e-06, "loss": 0.0124, "step": 38770 }, { "epoch": 0.32745772730151357, "grad_norm": 0.6117292046546936, "learning_rate": 9.876799406185023e-06, "loss": 0.0155, "step": 38780 }, { "epoch": 0.32754216714867745, "grad_norm": 0.4406290054321289, "learning_rate": 9.876636782485245e-06, "loss": 0.0149, "step": 38790 }, { "epoch": 0.32762660699584134, "grad_norm": 0.654266357421875, "learning_rate": 9.876474052865915e-06, "loss": 0.0156, "step": 38800 }, { "epoch": 0.3277110468430052, "grad_norm": 0.8390688300132751, "learning_rate": 9.876311217330567e-06, "loss": 0.0184, "step": 38810 }, { "epoch": 0.3277954866901691, "grad_norm": 0.5542684197425842, "learning_rate": 9.87614827588274e-06, "loss": 0.0157, "step": 38820 }, { "epoch": 0.32787992653733294, "grad_norm": 0.3690391778945923, "learning_rate": 9.875985228525973e-06, "loss": 0.016, "step": 38830 }, { "epoch": 0.32796436638449683, "grad_norm": 0.14865586161613464, "learning_rate": 9.875822075263803e-06, "loss": 0.0166, "step": 38840 }, { "epoch": 0.3280488062316607, "grad_norm": 0.5660854578018188, "learning_rate": 9.87565881609978e-06, "loss": 0.019, "step": 38850 }, { "epoch": 0.3281332460788246, "grad_norm": 0.5471311211585999, "learning_rate": 9.875495451037445e-06, "loss": 0.0135, "step": 38860 }, { "epoch": 0.3282176859259885, "grad_norm": 0.40065544843673706, "learning_rate": 9.875331980080348e-06, "loss": 0.0235, "step": 38870 }, { "epoch": 0.3283021257731524, "grad_norm": 0.9576102495193481, "learning_rate": 9.875168403232038e-06, "loss": 0.0165, "step": 38880 }, { "epoch": 0.3283865656203162, "grad_norm": 1.0841076374053955, "learning_rate": 9.87500472049607e-06, "loss": 0.0169, "step": 38890 }, { "epoch": 0.3284710054674801, "grad_norm": 0.2100243866443634, "learning_rate": 9.874840931876e-06, "loss": 0.0209, "step": 38900 }, { "epoch": 0.328555445314644, "grad_norm": 0.49025991559028625, "learning_rate": 9.874677037375382e-06, "loss": 0.0205, "step": 38910 }, { "epoch": 0.32863988516180787, "grad_norm": 0.4916420876979828, "learning_rate": 9.874513036997778e-06, "loss": 0.0163, "step": 38920 }, { "epoch": 0.32872432500897175, "grad_norm": 0.4194941520690918, "learning_rate": 9.874348930746751e-06, "loss": 0.0205, "step": 38930 }, { "epoch": 0.32880876485613564, "grad_norm": 0.563065767288208, "learning_rate": 9.874184718625863e-06, "loss": 0.0114, "step": 38940 }, { "epoch": 0.32889320470329947, "grad_norm": 0.3010135293006897, "learning_rate": 9.874020400638682e-06, "loss": 0.014, "step": 38950 }, { "epoch": 0.32897764455046336, "grad_norm": 0.29201650619506836, "learning_rate": 9.873855976788775e-06, "loss": 0.0218, "step": 38960 }, { "epoch": 0.32906208439762724, "grad_norm": 0.2432214617729187, "learning_rate": 9.873691447079718e-06, "loss": 0.0161, "step": 38970 }, { "epoch": 0.3291465242447911, "grad_norm": 0.23664279282093048, "learning_rate": 9.87352681151508e-06, "loss": 0.0171, "step": 38980 }, { "epoch": 0.329230964091955, "grad_norm": 0.03480397164821625, "learning_rate": 9.873362070098438e-06, "loss": 0.0189, "step": 38990 }, { "epoch": 0.32931540393911884, "grad_norm": 0.31630897521972656, "learning_rate": 9.873197222833371e-06, "loss": 0.0185, "step": 39000 }, { "epoch": 0.32939984378628273, "grad_norm": 0.6962526440620422, "learning_rate": 9.873032269723459e-06, "loss": 0.0169, "step": 39010 }, { "epoch": 0.3294842836334466, "grad_norm": 0.9861292839050293, "learning_rate": 9.872867210772285e-06, "loss": 0.0245, "step": 39020 }, { "epoch": 0.3295687234806105, "grad_norm": 0.39211246371269226, "learning_rate": 9.872702045983433e-06, "loss": 0.0253, "step": 39030 }, { "epoch": 0.3296531633277744, "grad_norm": 0.8568999171257019, "learning_rate": 9.872536775360491e-06, "loss": 0.0138, "step": 39040 }, { "epoch": 0.3297376031749383, "grad_norm": 0.5513879656791687, "learning_rate": 9.872371398907049e-06, "loss": 0.0126, "step": 39050 }, { "epoch": 0.3298220430221021, "grad_norm": 0.6128337979316711, "learning_rate": 9.872205916626698e-06, "loss": 0.0242, "step": 39060 }, { "epoch": 0.329906482869266, "grad_norm": 0.7460026741027832, "learning_rate": 9.872040328523034e-06, "loss": 0.0175, "step": 39070 }, { "epoch": 0.3299909227164299, "grad_norm": 0.22719469666481018, "learning_rate": 9.871874634599652e-06, "loss": 0.0098, "step": 39080 }, { "epoch": 0.33007536256359377, "grad_norm": 1.0441166162490845, "learning_rate": 9.871708834860151e-06, "loss": 0.0181, "step": 39090 }, { "epoch": 0.33015980241075765, "grad_norm": 0.44570475816726685, "learning_rate": 9.871542929308134e-06, "loss": 0.0159, "step": 39100 }, { "epoch": 0.33024424225792154, "grad_norm": 0.6481784582138062, "learning_rate": 9.8713769179472e-06, "loss": 0.0195, "step": 39110 }, { "epoch": 0.33032868210508537, "grad_norm": 0.600463330745697, "learning_rate": 9.871210800780959e-06, "loss": 0.0186, "step": 39120 }, { "epoch": 0.33041312195224926, "grad_norm": 0.575313150882721, "learning_rate": 9.871044577813018e-06, "loss": 0.0184, "step": 39130 }, { "epoch": 0.33049756179941314, "grad_norm": 0.06374791264533997, "learning_rate": 9.870878249046987e-06, "loss": 0.0105, "step": 39140 }, { "epoch": 0.33058200164657703, "grad_norm": 0.23958483338356018, "learning_rate": 9.870711814486479e-06, "loss": 0.02, "step": 39150 }, { "epoch": 0.3306664414937409, "grad_norm": 0.4246731996536255, "learning_rate": 9.870545274135108e-06, "loss": 0.0108, "step": 39160 }, { "epoch": 0.33075088134090475, "grad_norm": 0.3546631634235382, "learning_rate": 9.870378627996491e-06, "loss": 0.0096, "step": 39170 }, { "epoch": 0.33083532118806863, "grad_norm": 0.46741417050361633, "learning_rate": 9.870211876074249e-06, "loss": 0.0124, "step": 39180 }, { "epoch": 0.3309197610352325, "grad_norm": 0.4363301396369934, "learning_rate": 9.870045018372002e-06, "loss": 0.0154, "step": 39190 }, { "epoch": 0.3310042008823964, "grad_norm": 0.9771584868431091, "learning_rate": 9.869878054893376e-06, "loss": 0.0373, "step": 39200 }, { "epoch": 0.3310886407295603, "grad_norm": 1.2813242673873901, "learning_rate": 9.869710985641997e-06, "loss": 0.0241, "step": 39210 }, { "epoch": 0.3311730805767242, "grad_norm": 0.26419737935066223, "learning_rate": 9.869543810621493e-06, "loss": 0.0154, "step": 39220 }, { "epoch": 0.331257520423888, "grad_norm": 1.0575802326202393, "learning_rate": 9.869376529835495e-06, "loss": 0.0148, "step": 39230 }, { "epoch": 0.3313419602710519, "grad_norm": 0.4841301739215851, "learning_rate": 9.869209143287637e-06, "loss": 0.0211, "step": 39240 }, { "epoch": 0.3314264001182158, "grad_norm": 0.4612232446670532, "learning_rate": 9.869041650981553e-06, "loss": 0.011, "step": 39250 }, { "epoch": 0.33151083996537967, "grad_norm": 0.34757962822914124, "learning_rate": 9.868874052920882e-06, "loss": 0.0199, "step": 39260 }, { "epoch": 0.33159527981254355, "grad_norm": 0.505975604057312, "learning_rate": 9.868706349109267e-06, "loss": 0.0189, "step": 39270 }, { "epoch": 0.33167971965970744, "grad_norm": 0.28359055519104004, "learning_rate": 9.868538539550346e-06, "loss": 0.0114, "step": 39280 }, { "epoch": 0.33176415950687127, "grad_norm": 0.4132102429866791, "learning_rate": 9.868370624247764e-06, "loss": 0.0175, "step": 39290 }, { "epoch": 0.33184859935403516, "grad_norm": 0.5317903161048889, "learning_rate": 9.868202603205173e-06, "loss": 0.0142, "step": 39300 }, { "epoch": 0.33193303920119904, "grad_norm": 0.7278638482093811, "learning_rate": 9.868034476426216e-06, "loss": 0.0225, "step": 39310 }, { "epoch": 0.33201747904836293, "grad_norm": 0.11268208920955658, "learning_rate": 9.86786624391455e-06, "loss": 0.0149, "step": 39320 }, { "epoch": 0.3321019188955268, "grad_norm": 0.33070674538612366, "learning_rate": 9.867697905673825e-06, "loss": 0.0142, "step": 39330 }, { "epoch": 0.3321863587426907, "grad_norm": 0.5519323348999023, "learning_rate": 9.867529461707701e-06, "loss": 0.0212, "step": 39340 }, { "epoch": 0.33227079858985453, "grad_norm": 0.4154856204986572, "learning_rate": 9.867360912019833e-06, "loss": 0.015, "step": 39350 }, { "epoch": 0.3323552384370184, "grad_norm": 0.33757883310317993, "learning_rate": 9.867192256613884e-06, "loss": 0.0137, "step": 39360 }, { "epoch": 0.3324396782841823, "grad_norm": 0.7523974180221558, "learning_rate": 9.867023495493517e-06, "loss": 0.0161, "step": 39370 }, { "epoch": 0.3325241181313462, "grad_norm": 0.6767920255661011, "learning_rate": 9.866854628662396e-06, "loss": 0.0166, "step": 39380 }, { "epoch": 0.3326085579785101, "grad_norm": 0.1681596040725708, "learning_rate": 9.86668565612419e-06, "loss": 0.0182, "step": 39390 }, { "epoch": 0.3326929978256739, "grad_norm": 0.23646672070026398, "learning_rate": 9.866516577882568e-06, "loss": 0.0126, "step": 39400 }, { "epoch": 0.3327774376728378, "grad_norm": 0.43757766485214233, "learning_rate": 9.866347393941205e-06, "loss": 0.0162, "step": 39410 }, { "epoch": 0.3328618775200017, "grad_norm": 0.17290031909942627, "learning_rate": 9.866178104303773e-06, "loss": 0.0093, "step": 39420 }, { "epoch": 0.33294631736716557, "grad_norm": 0.35320842266082764, "learning_rate": 9.866008708973948e-06, "loss": 0.0187, "step": 39430 }, { "epoch": 0.33303075721432945, "grad_norm": 0.11594273149967194, "learning_rate": 9.865839207955414e-06, "loss": 0.0105, "step": 39440 }, { "epoch": 0.33311519706149334, "grad_norm": 1.1447813510894775, "learning_rate": 9.865669601251848e-06, "loss": 0.0216, "step": 39450 }, { "epoch": 0.33319963690865717, "grad_norm": 0.4022064208984375, "learning_rate": 9.865499888866937e-06, "loss": 0.0174, "step": 39460 }, { "epoch": 0.33328407675582106, "grad_norm": 0.35567936301231384, "learning_rate": 9.865330070804362e-06, "loss": 0.022, "step": 39470 }, { "epoch": 0.33336851660298494, "grad_norm": 0.1373256891965866, "learning_rate": 9.865160147067819e-06, "loss": 0.0121, "step": 39480 }, { "epoch": 0.33345295645014883, "grad_norm": 0.645761251449585, "learning_rate": 9.864990117660992e-06, "loss": 0.0174, "step": 39490 }, { "epoch": 0.3335373962973127, "grad_norm": 0.2258705049753189, "learning_rate": 9.864819982587576e-06, "loss": 0.0223, "step": 39500 }, { "epoch": 0.3336218361444766, "grad_norm": 0.683625340461731, "learning_rate": 9.864649741851268e-06, "loss": 0.018, "step": 39510 }, { "epoch": 0.33370627599164043, "grad_norm": 0.5774014592170715, "learning_rate": 9.864479395455764e-06, "loss": 0.0134, "step": 39520 }, { "epoch": 0.3337907158388043, "grad_norm": 0.21574057638645172, "learning_rate": 9.864308943404765e-06, "loss": 0.0165, "step": 39530 }, { "epoch": 0.3338751556859682, "grad_norm": 0.43025124073028564, "learning_rate": 9.864138385701972e-06, "loss": 0.019, "step": 39540 }, { "epoch": 0.3339595955331321, "grad_norm": 0.6085658073425293, "learning_rate": 9.86396772235109e-06, "loss": 0.0216, "step": 39550 }, { "epoch": 0.334044035380296, "grad_norm": 0.9280481338500977, "learning_rate": 9.863796953355826e-06, "loss": 0.0141, "step": 39560 }, { "epoch": 0.33412847522745986, "grad_norm": 0.35785508155822754, "learning_rate": 9.863626078719889e-06, "loss": 0.0178, "step": 39570 }, { "epoch": 0.3342129150746237, "grad_norm": 0.6328158974647522, "learning_rate": 9.86345509844699e-06, "loss": 0.0181, "step": 39580 }, { "epoch": 0.3342973549217876, "grad_norm": 0.3452287018299103, "learning_rate": 9.863284012540843e-06, "loss": 0.0173, "step": 39590 }, { "epoch": 0.33438179476895147, "grad_norm": 0.4080866277217865, "learning_rate": 9.863112821005165e-06, "loss": 0.0219, "step": 39600 }, { "epoch": 0.33446623461611535, "grad_norm": 0.10354027897119522, "learning_rate": 9.86294152384367e-06, "loss": 0.0193, "step": 39610 }, { "epoch": 0.33455067446327924, "grad_norm": 0.5575851202011108, "learning_rate": 9.862770121060083e-06, "loss": 0.0258, "step": 39620 }, { "epoch": 0.33463511431044307, "grad_norm": 0.6047977209091187, "learning_rate": 9.862598612658127e-06, "loss": 0.0154, "step": 39630 }, { "epoch": 0.33471955415760696, "grad_norm": 0.630804717540741, "learning_rate": 9.862426998641525e-06, "loss": 0.0146, "step": 39640 }, { "epoch": 0.33480399400477084, "grad_norm": 0.661264181137085, "learning_rate": 9.862255279014004e-06, "loss": 0.0102, "step": 39650 }, { "epoch": 0.33488843385193473, "grad_norm": 0.17906981706619263, "learning_rate": 9.862083453779296e-06, "loss": 0.0139, "step": 39660 }, { "epoch": 0.3349728736990986, "grad_norm": 0.4404414892196655, "learning_rate": 9.86191152294113e-06, "loss": 0.0094, "step": 39670 }, { "epoch": 0.3350573135462625, "grad_norm": 0.5044915080070496, "learning_rate": 9.861739486503244e-06, "loss": 0.022, "step": 39680 }, { "epoch": 0.33514175339342633, "grad_norm": 0.4355093836784363, "learning_rate": 9.861567344469372e-06, "loss": 0.0121, "step": 39690 }, { "epoch": 0.3352261932405902, "grad_norm": 0.7766475677490234, "learning_rate": 9.861395096843252e-06, "loss": 0.0164, "step": 39700 }, { "epoch": 0.3353106330877541, "grad_norm": 0.48812469840049744, "learning_rate": 9.861222743628629e-06, "loss": 0.0185, "step": 39710 }, { "epoch": 0.335395072934918, "grad_norm": 0.3535558879375458, "learning_rate": 9.861050284829241e-06, "loss": 0.0149, "step": 39720 }, { "epoch": 0.3354795127820819, "grad_norm": 0.695252537727356, "learning_rate": 9.86087772044884e-06, "loss": 0.0113, "step": 39730 }, { "epoch": 0.33556395262924577, "grad_norm": 0.1217043399810791, "learning_rate": 9.860705050491169e-06, "loss": 0.0264, "step": 39740 }, { "epoch": 0.3356483924764096, "grad_norm": 0.3560740649700165, "learning_rate": 9.860532274959982e-06, "loss": 0.0121, "step": 39750 }, { "epoch": 0.3357328323235735, "grad_norm": 0.4288644790649414, "learning_rate": 9.860359393859029e-06, "loss": 0.0128, "step": 39760 }, { "epoch": 0.33581727217073737, "grad_norm": 0.23249530792236328, "learning_rate": 9.860186407192066e-06, "loss": 0.0236, "step": 39770 }, { "epoch": 0.33590171201790125, "grad_norm": 1.4396709203720093, "learning_rate": 9.860013314962851e-06, "loss": 0.0245, "step": 39780 }, { "epoch": 0.33598615186506514, "grad_norm": 0.8006957769393921, "learning_rate": 9.85984011717514e-06, "loss": 0.0157, "step": 39790 }, { "epoch": 0.336070591712229, "grad_norm": 0.415446013212204, "learning_rate": 9.859666813832702e-06, "loss": 0.0165, "step": 39800 }, { "epoch": 0.33615503155939286, "grad_norm": 0.4415375292301178, "learning_rate": 9.859493404939293e-06, "loss": 0.0226, "step": 39810 }, { "epoch": 0.33623947140655674, "grad_norm": 0.2626778483390808, "learning_rate": 9.859319890498684e-06, "loss": 0.0154, "step": 39820 }, { "epoch": 0.33632391125372063, "grad_norm": 0.3479436933994293, "learning_rate": 9.859146270514643e-06, "loss": 0.0125, "step": 39830 }, { "epoch": 0.3364083511008845, "grad_norm": 0.10094145685434341, "learning_rate": 9.85897254499094e-06, "loss": 0.0099, "step": 39840 }, { "epoch": 0.3364927909480484, "grad_norm": 0.6929450035095215, "learning_rate": 9.858798713931351e-06, "loss": 0.0226, "step": 39850 }, { "epoch": 0.33657723079521223, "grad_norm": 0.25724080204963684, "learning_rate": 9.858624777339649e-06, "loss": 0.0102, "step": 39860 }, { "epoch": 0.3366616706423761, "grad_norm": 0.9865303039550781, "learning_rate": 9.858450735219611e-06, "loss": 0.0155, "step": 39870 }, { "epoch": 0.33674611048954, "grad_norm": 0.7658450603485107, "learning_rate": 9.85827658757502e-06, "loss": 0.023, "step": 39880 }, { "epoch": 0.3368305503367039, "grad_norm": 0.5731362104415894, "learning_rate": 9.858102334409656e-06, "loss": 0.0152, "step": 39890 }, { "epoch": 0.3369149901838678, "grad_norm": 0.80445396900177, "learning_rate": 9.857927975727306e-06, "loss": 0.0223, "step": 39900 }, { "epoch": 0.33699943003103167, "grad_norm": 0.27241286635398865, "learning_rate": 9.857753511531756e-06, "loss": 0.0155, "step": 39910 }, { "epoch": 0.3370838698781955, "grad_norm": 0.4739223122596741, "learning_rate": 9.857578941826793e-06, "loss": 0.0095, "step": 39920 }, { "epoch": 0.3371683097253594, "grad_norm": 0.36120346188545227, "learning_rate": 9.857404266616213e-06, "loss": 0.0105, "step": 39930 }, { "epoch": 0.33725274957252327, "grad_norm": 0.2197287380695343, "learning_rate": 9.857229485903808e-06, "loss": 0.0153, "step": 39940 }, { "epoch": 0.33733718941968716, "grad_norm": 1.0376688241958618, "learning_rate": 9.857054599693372e-06, "loss": 0.0301, "step": 39950 }, { "epoch": 0.33742162926685104, "grad_norm": 0.3720361590385437, "learning_rate": 9.856879607988706e-06, "loss": 0.015, "step": 39960 }, { "epoch": 0.33750606911401493, "grad_norm": 0.33463919162750244, "learning_rate": 9.856704510793611e-06, "loss": 0.0174, "step": 39970 }, { "epoch": 0.33759050896117876, "grad_norm": 0.4498008191585541, "learning_rate": 9.85652930811189e-06, "loss": 0.0211, "step": 39980 }, { "epoch": 0.33767494880834265, "grad_norm": 0.4894177317619324, "learning_rate": 9.856353999947344e-06, "loss": 0.0178, "step": 39990 }, { "epoch": 0.33775938865550653, "grad_norm": 0.10930493474006653, "learning_rate": 9.856178586303786e-06, "loss": 0.0138, "step": 40000 }, { "epoch": 0.3378438285026704, "grad_norm": 0.4200774133205414, "learning_rate": 9.856003067185025e-06, "loss": 0.0164, "step": 40010 }, { "epoch": 0.3379282683498343, "grad_norm": 0.48158761858940125, "learning_rate": 9.855827442594873e-06, "loss": 0.0179, "step": 40020 }, { "epoch": 0.3380127081969982, "grad_norm": 0.9737389087677002, "learning_rate": 9.855651712537143e-06, "loss": 0.0208, "step": 40030 }, { "epoch": 0.338097148044162, "grad_norm": 0.2940516173839569, "learning_rate": 9.855475877015653e-06, "loss": 0.0206, "step": 40040 }, { "epoch": 0.3381815878913259, "grad_norm": 0.1738031953573227, "learning_rate": 9.855299936034222e-06, "loss": 0.011, "step": 40050 }, { "epoch": 0.3382660277384898, "grad_norm": 0.6435337662696838, "learning_rate": 9.855123889596672e-06, "loss": 0.0165, "step": 40060 }, { "epoch": 0.3383504675856537, "grad_norm": 0.7570912837982178, "learning_rate": 9.854947737706825e-06, "loss": 0.0176, "step": 40070 }, { "epoch": 0.33843490743281757, "grad_norm": 0.7070111036300659, "learning_rate": 9.854771480368508e-06, "loss": 0.0178, "step": 40080 }, { "epoch": 0.3385193472799814, "grad_norm": 0.7938956618309021, "learning_rate": 9.854595117585551e-06, "loss": 0.0179, "step": 40090 }, { "epoch": 0.3386037871271453, "grad_norm": 0.3445296287536621, "learning_rate": 9.85441864936178e-06, "loss": 0.013, "step": 40100 }, { "epoch": 0.33868822697430917, "grad_norm": 0.1866961121559143, "learning_rate": 9.854242075701033e-06, "loss": 0.0197, "step": 40110 }, { "epoch": 0.33877266682147306, "grad_norm": 0.4206261932849884, "learning_rate": 9.854065396607142e-06, "loss": 0.022, "step": 40120 }, { "epoch": 0.33885710666863694, "grad_norm": 0.7601265907287598, "learning_rate": 9.853888612083946e-06, "loss": 0.0143, "step": 40130 }, { "epoch": 0.33894154651580083, "grad_norm": 0.22890131175518036, "learning_rate": 9.853711722135281e-06, "loss": 0.0127, "step": 40140 }, { "epoch": 0.33902598636296466, "grad_norm": 0.9074074029922485, "learning_rate": 9.853534726764997e-06, "loss": 0.0294, "step": 40150 }, { "epoch": 0.33911042621012855, "grad_norm": 0.3291747272014618, "learning_rate": 9.85335762597693e-06, "loss": 0.0124, "step": 40160 }, { "epoch": 0.33919486605729243, "grad_norm": 0.24538777768611908, "learning_rate": 9.85318041977493e-06, "loss": 0.0261, "step": 40170 }, { "epoch": 0.3392793059044563, "grad_norm": 0.5622690320014954, "learning_rate": 9.853003108162847e-06, "loss": 0.0171, "step": 40180 }, { "epoch": 0.3393637457516202, "grad_norm": 1.0066472291946411, "learning_rate": 9.85282569114453e-06, "loss": 0.0245, "step": 40190 }, { "epoch": 0.3394481855987841, "grad_norm": 0.26297006011009216, "learning_rate": 9.852648168723834e-06, "loss": 0.0193, "step": 40200 }, { "epoch": 0.3395326254459479, "grad_norm": 0.018176961690187454, "learning_rate": 9.852470540904614e-06, "loss": 0.0204, "step": 40210 }, { "epoch": 0.3396170652931118, "grad_norm": 0.406406968832016, "learning_rate": 9.852292807690729e-06, "loss": 0.0145, "step": 40220 }, { "epoch": 0.3397015051402757, "grad_norm": 0.4886947572231293, "learning_rate": 9.852114969086038e-06, "loss": 0.0091, "step": 40230 }, { "epoch": 0.3397859449874396, "grad_norm": 0.5450629591941833, "learning_rate": 9.851937025094402e-06, "loss": 0.0262, "step": 40240 }, { "epoch": 0.33987038483460347, "grad_norm": 0.7944597601890564, "learning_rate": 9.851758975719691e-06, "loss": 0.0207, "step": 40250 }, { "epoch": 0.33995482468176735, "grad_norm": 0.20915094017982483, "learning_rate": 9.851580820965767e-06, "loss": 0.0134, "step": 40260 }, { "epoch": 0.3400392645289312, "grad_norm": 0.3642213046550751, "learning_rate": 9.851402560836504e-06, "loss": 0.017, "step": 40270 }, { "epoch": 0.34012370437609507, "grad_norm": 0.3032413721084595, "learning_rate": 9.85122419533577e-06, "loss": 0.0142, "step": 40280 }, { "epoch": 0.34020814422325896, "grad_norm": 0.36687877774238586, "learning_rate": 9.85104572446744e-06, "loss": 0.018, "step": 40290 }, { "epoch": 0.34029258407042284, "grad_norm": 0.4815066158771515, "learning_rate": 9.850867148235393e-06, "loss": 0.019, "step": 40300 }, { "epoch": 0.34037702391758673, "grad_norm": 0.9767453074455261, "learning_rate": 9.850688466643506e-06, "loss": 0.0263, "step": 40310 }, { "epoch": 0.34046146376475056, "grad_norm": 0.5037928819656372, "learning_rate": 9.850509679695657e-06, "loss": 0.0129, "step": 40320 }, { "epoch": 0.34054590361191445, "grad_norm": 0.33316847681999207, "learning_rate": 9.850330787395732e-06, "loss": 0.0243, "step": 40330 }, { "epoch": 0.34063034345907833, "grad_norm": 0.3439544141292572, "learning_rate": 9.850151789747618e-06, "loss": 0.0146, "step": 40340 }, { "epoch": 0.3407147833062422, "grad_norm": 0.25384247303009033, "learning_rate": 9.8499726867552e-06, "loss": 0.0223, "step": 40350 }, { "epoch": 0.3407992231534061, "grad_norm": 0.488675981760025, "learning_rate": 9.84979347842237e-06, "loss": 0.0091, "step": 40360 }, { "epoch": 0.34088366300057, "grad_norm": 0.6372178196907043, "learning_rate": 9.849614164753019e-06, "loss": 0.0246, "step": 40370 }, { "epoch": 0.3409681028477338, "grad_norm": 0.27890437841415405, "learning_rate": 9.849434745751041e-06, "loss": 0.0134, "step": 40380 }, { "epoch": 0.3410525426948977, "grad_norm": 1.01375412940979, "learning_rate": 9.849255221420336e-06, "loss": 0.0159, "step": 40390 }, { "epoch": 0.3411369825420616, "grad_norm": 0.11661692708730698, "learning_rate": 9.849075591764798e-06, "loss": 0.0165, "step": 40400 }, { "epoch": 0.3412214223892255, "grad_norm": 0.3836638927459717, "learning_rate": 9.848895856788334e-06, "loss": 0.0205, "step": 40410 }, { "epoch": 0.34130586223638937, "grad_norm": 0.23179776966571808, "learning_rate": 9.848716016494847e-06, "loss": 0.0268, "step": 40420 }, { "epoch": 0.34139030208355325, "grad_norm": 0.1824246197938919, "learning_rate": 9.848536070888239e-06, "loss": 0.0129, "step": 40430 }, { "epoch": 0.3414747419307171, "grad_norm": 0.19556047022342682, "learning_rate": 9.848356019972422e-06, "loss": 0.0191, "step": 40440 }, { "epoch": 0.34155918177788097, "grad_norm": 1.104767918586731, "learning_rate": 9.848175863751307e-06, "loss": 0.0178, "step": 40450 }, { "epoch": 0.34164362162504486, "grad_norm": 0.7926121354103088, "learning_rate": 9.847995602228805e-06, "loss": 0.0218, "step": 40460 }, { "epoch": 0.34172806147220874, "grad_norm": 0.6691994667053223, "learning_rate": 9.847815235408832e-06, "loss": 0.0156, "step": 40470 }, { "epoch": 0.34181250131937263, "grad_norm": 0.4436074495315552, "learning_rate": 9.847634763295306e-06, "loss": 0.0173, "step": 40480 }, { "epoch": 0.34189694116653646, "grad_norm": 0.8241327404975891, "learning_rate": 9.847454185892145e-06, "loss": 0.0288, "step": 40490 }, { "epoch": 0.34198138101370035, "grad_norm": 0.18434783816337585, "learning_rate": 9.847273503203273e-06, "loss": 0.0286, "step": 40500 }, { "epoch": 0.34206582086086423, "grad_norm": 0.2651118338108063, "learning_rate": 9.847092715232614e-06, "loss": 0.0128, "step": 40510 }, { "epoch": 0.3421502607080281, "grad_norm": 0.27354246377944946, "learning_rate": 9.846911821984095e-06, "loss": 0.0156, "step": 40520 }, { "epoch": 0.342234700555192, "grad_norm": 0.4465189278125763, "learning_rate": 9.846730823461644e-06, "loss": 0.0251, "step": 40530 }, { "epoch": 0.3423191404023559, "grad_norm": 0.2714497148990631, "learning_rate": 9.846549719669191e-06, "loss": 0.0092, "step": 40540 }, { "epoch": 0.3424035802495197, "grad_norm": 0.6009504795074463, "learning_rate": 9.846368510610673e-06, "loss": 0.012, "step": 40550 }, { "epoch": 0.3424880200966836, "grad_norm": 1.0062198638916016, "learning_rate": 9.846187196290024e-06, "loss": 0.0165, "step": 40560 }, { "epoch": 0.3425724599438475, "grad_norm": 0.4324374496936798, "learning_rate": 9.846005776711183e-06, "loss": 0.0203, "step": 40570 }, { "epoch": 0.3426568997910114, "grad_norm": 0.8428201675415039, "learning_rate": 9.845824251878088e-06, "loss": 0.0139, "step": 40580 }, { "epoch": 0.34274133963817527, "grad_norm": 0.13847161829471588, "learning_rate": 9.845642621794683e-06, "loss": 0.0178, "step": 40590 }, { "epoch": 0.34282577948533915, "grad_norm": 0.7523601651191711, "learning_rate": 9.845460886464914e-06, "loss": 0.03, "step": 40600 }, { "epoch": 0.342910219332503, "grad_norm": 0.6194416880607605, "learning_rate": 9.845279045892728e-06, "loss": 0.019, "step": 40610 }, { "epoch": 0.34299465917966687, "grad_norm": 0.7342014908790588, "learning_rate": 9.845097100082074e-06, "loss": 0.0178, "step": 40620 }, { "epoch": 0.34307909902683076, "grad_norm": 1.0363807678222656, "learning_rate": 9.844915049036903e-06, "loss": 0.0312, "step": 40630 }, { "epoch": 0.34316353887399464, "grad_norm": 0.7419235706329346, "learning_rate": 9.844732892761171e-06, "loss": 0.0196, "step": 40640 }, { "epoch": 0.34324797872115853, "grad_norm": 0.48151645064353943, "learning_rate": 9.844550631258833e-06, "loss": 0.0128, "step": 40650 }, { "epoch": 0.3433324185683224, "grad_norm": 0.3631047010421753, "learning_rate": 9.844368264533847e-06, "loss": 0.0186, "step": 40660 }, { "epoch": 0.34341685841548625, "grad_norm": 0.7554516792297363, "learning_rate": 9.844185792590176e-06, "loss": 0.0205, "step": 40670 }, { "epoch": 0.34350129826265013, "grad_norm": 0.38204145431518555, "learning_rate": 9.84400321543178e-06, "loss": 0.0168, "step": 40680 }, { "epoch": 0.343585738109814, "grad_norm": 0.2407083511352539, "learning_rate": 9.84382053306263e-06, "loss": 0.0303, "step": 40690 }, { "epoch": 0.3436701779569779, "grad_norm": 0.42922624945640564, "learning_rate": 9.84363774548669e-06, "loss": 0.0259, "step": 40700 }, { "epoch": 0.3437546178041418, "grad_norm": 0.3898789882659912, "learning_rate": 9.84345485270793e-06, "loss": 0.0159, "step": 40710 }, { "epoch": 0.3438390576513056, "grad_norm": 0.3832091987133026, "learning_rate": 9.843271854730324e-06, "loss": 0.0206, "step": 40720 }, { "epoch": 0.3439234974984695, "grad_norm": 0.7984740138053894, "learning_rate": 9.843088751557843e-06, "loss": 0.0184, "step": 40730 }, { "epoch": 0.3440079373456334, "grad_norm": 0.6242927312850952, "learning_rate": 9.842905543194469e-06, "loss": 0.0283, "step": 40740 }, { "epoch": 0.3440923771927973, "grad_norm": 0.9376715421676636, "learning_rate": 9.84272222964418e-06, "loss": 0.0163, "step": 40750 }, { "epoch": 0.34417681703996117, "grad_norm": 0.24128375947475433, "learning_rate": 9.842538810910956e-06, "loss": 0.0133, "step": 40760 }, { "epoch": 0.34426125688712506, "grad_norm": 0.4233151078224182, "learning_rate": 9.842355286998779e-06, "loss": 0.0194, "step": 40770 }, { "epoch": 0.3443456967342889, "grad_norm": 0.5917053818702698, "learning_rate": 9.84217165791164e-06, "loss": 0.0146, "step": 40780 }, { "epoch": 0.3444301365814528, "grad_norm": 0.30676814913749695, "learning_rate": 9.841987923653524e-06, "loss": 0.0152, "step": 40790 }, { "epoch": 0.34451457642861666, "grad_norm": 0.2584719955921173, "learning_rate": 9.841804084228423e-06, "loss": 0.0162, "step": 40800 }, { "epoch": 0.34459901627578055, "grad_norm": 0.2523493468761444, "learning_rate": 9.84162013964033e-06, "loss": 0.0154, "step": 40810 }, { "epoch": 0.34468345612294443, "grad_norm": 0.8342496752738953, "learning_rate": 9.841436089893237e-06, "loss": 0.0171, "step": 40820 }, { "epoch": 0.3447678959701083, "grad_norm": 1.1008899211883545, "learning_rate": 9.841251934991147e-06, "loss": 0.0218, "step": 40830 }, { "epoch": 0.34485233581727215, "grad_norm": 0.43306413292884827, "learning_rate": 9.841067674938054e-06, "loss": 0.0092, "step": 40840 }, { "epoch": 0.34493677566443603, "grad_norm": 0.3763328492641449, "learning_rate": 9.840883309737966e-06, "loss": 0.0224, "step": 40850 }, { "epoch": 0.3450212155115999, "grad_norm": 0.43215587735176086, "learning_rate": 9.840698839394884e-06, "loss": 0.016, "step": 40860 }, { "epoch": 0.3451056553587638, "grad_norm": 0.3230215907096863, "learning_rate": 9.840514263912815e-06, "loss": 0.0168, "step": 40870 }, { "epoch": 0.3451900952059277, "grad_norm": 0.39698708057403564, "learning_rate": 9.840329583295767e-06, "loss": 0.0132, "step": 40880 }, { "epoch": 0.3452745350530916, "grad_norm": 1.6643810272216797, "learning_rate": 9.840144797547752e-06, "loss": 0.0247, "step": 40890 }, { "epoch": 0.3453589749002554, "grad_norm": 1.3447004556655884, "learning_rate": 9.839959906672786e-06, "loss": 0.03, "step": 40900 }, { "epoch": 0.3454434147474193, "grad_norm": 0.38925158977508545, "learning_rate": 9.839774910674882e-06, "loss": 0.0244, "step": 40910 }, { "epoch": 0.3455278545945832, "grad_norm": 0.3734906017780304, "learning_rate": 9.83958980955806e-06, "loss": 0.0106, "step": 40920 }, { "epoch": 0.34561229444174707, "grad_norm": 0.4419344961643219, "learning_rate": 9.839404603326336e-06, "loss": 0.0128, "step": 40930 }, { "epoch": 0.34569673428891096, "grad_norm": 0.6158347129821777, "learning_rate": 9.839219291983737e-06, "loss": 0.0245, "step": 40940 }, { "epoch": 0.3457811741360748, "grad_norm": 0.3017117977142334, "learning_rate": 9.839033875534287e-06, "loss": 0.013, "step": 40950 }, { "epoch": 0.3458656139832387, "grad_norm": 0.009681619703769684, "learning_rate": 9.838848353982012e-06, "loss": 0.0207, "step": 40960 }, { "epoch": 0.34595005383040256, "grad_norm": 0.7836440205574036, "learning_rate": 9.838662727330943e-06, "loss": 0.0144, "step": 40970 }, { "epoch": 0.34603449367756645, "grad_norm": 0.3864240348339081, "learning_rate": 9.838476995585111e-06, "loss": 0.0206, "step": 40980 }, { "epoch": 0.34611893352473033, "grad_norm": 0.6436413526535034, "learning_rate": 9.838291158748549e-06, "loss": 0.0231, "step": 40990 }, { "epoch": 0.3462033733718942, "grad_norm": 0.499884694814682, "learning_rate": 9.838105216825298e-06, "loss": 0.0179, "step": 41000 }, { "epoch": 0.34628781321905805, "grad_norm": 0.44654911756515503, "learning_rate": 9.83791916981939e-06, "loss": 0.0104, "step": 41010 }, { "epoch": 0.34637225306622194, "grad_norm": 0.43427443504333496, "learning_rate": 9.83773301773487e-06, "loss": 0.0214, "step": 41020 }, { "epoch": 0.3464566929133858, "grad_norm": 0.2808274030685425, "learning_rate": 9.83754676057578e-06, "loss": 0.0146, "step": 41030 }, { "epoch": 0.3465411327605497, "grad_norm": 0.36398810148239136, "learning_rate": 9.837360398346165e-06, "loss": 0.0107, "step": 41040 }, { "epoch": 0.3466255726077136, "grad_norm": 0.036026205867528915, "learning_rate": 9.837173931050075e-06, "loss": 0.0179, "step": 41050 }, { "epoch": 0.3467100124548775, "grad_norm": 0.6538885235786438, "learning_rate": 9.836987358691557e-06, "loss": 0.0183, "step": 41060 }, { "epoch": 0.3467944523020413, "grad_norm": 0.536529541015625, "learning_rate": 9.836800681274665e-06, "loss": 0.0155, "step": 41070 }, { "epoch": 0.3468788921492052, "grad_norm": 0.46984028816223145, "learning_rate": 9.836613898803452e-06, "loss": 0.0162, "step": 41080 }, { "epoch": 0.3469633319963691, "grad_norm": 0.341572642326355, "learning_rate": 9.83642701128198e-06, "loss": 0.0226, "step": 41090 }, { "epoch": 0.34704777184353297, "grad_norm": 0.5001272559165955, "learning_rate": 9.836240018714302e-06, "loss": 0.0126, "step": 41100 }, { "epoch": 0.34713221169069686, "grad_norm": 0.4251266419887543, "learning_rate": 9.836052921104482e-06, "loss": 0.0209, "step": 41110 }, { "epoch": 0.34721665153786074, "grad_norm": 0.10142862051725388, "learning_rate": 9.835865718456583e-06, "loss": 0.0121, "step": 41120 }, { "epoch": 0.3473010913850246, "grad_norm": 0.07627008110284805, "learning_rate": 9.835678410774673e-06, "loss": 0.0128, "step": 41130 }, { "epoch": 0.34738553123218846, "grad_norm": 0.7838097214698792, "learning_rate": 9.835490998062818e-06, "loss": 0.0177, "step": 41140 }, { "epoch": 0.34746997107935235, "grad_norm": 0.3011516034603119, "learning_rate": 9.835303480325088e-06, "loss": 0.0135, "step": 41150 }, { "epoch": 0.34755441092651623, "grad_norm": 0.5070050358772278, "learning_rate": 9.83511585756556e-06, "loss": 0.0121, "step": 41160 }, { "epoch": 0.3476388507736801, "grad_norm": 0.521452784538269, "learning_rate": 9.834928129788305e-06, "loss": 0.0161, "step": 41170 }, { "epoch": 0.34772329062084395, "grad_norm": 0.5311946272850037, "learning_rate": 9.834740296997402e-06, "loss": 0.0156, "step": 41180 }, { "epoch": 0.34780773046800784, "grad_norm": 0.4798286557197571, "learning_rate": 9.834552359196931e-06, "loss": 0.012, "step": 41190 }, { "epoch": 0.3478921703151717, "grad_norm": 0.24096590280532837, "learning_rate": 9.834364316390972e-06, "loss": 0.0113, "step": 41200 }, { "epoch": 0.3479766101623356, "grad_norm": 0.08276009559631348, "learning_rate": 9.834176168583611e-06, "loss": 0.0201, "step": 41210 }, { "epoch": 0.3480610500094995, "grad_norm": 0.3266957700252533, "learning_rate": 9.833987915778935e-06, "loss": 0.013, "step": 41220 }, { "epoch": 0.3481454898566634, "grad_norm": 0.3667276203632355, "learning_rate": 9.833799557981032e-06, "loss": 0.0155, "step": 41230 }, { "epoch": 0.3482299297038272, "grad_norm": 0.2032022327184677, "learning_rate": 9.833611095193993e-06, "loss": 0.0147, "step": 41240 }, { "epoch": 0.3483143695509911, "grad_norm": 0.1850304752588272, "learning_rate": 9.833422527421912e-06, "loss": 0.0092, "step": 41250 }, { "epoch": 0.348398809398155, "grad_norm": 0.6419050097465515, "learning_rate": 9.833233854668883e-06, "loss": 0.0112, "step": 41260 }, { "epoch": 0.34848324924531887, "grad_norm": 0.3727477490901947, "learning_rate": 9.833045076939006e-06, "loss": 0.0168, "step": 41270 }, { "epoch": 0.34856768909248276, "grad_norm": 0.36125266551971436, "learning_rate": 9.83285619423638e-06, "loss": 0.0184, "step": 41280 }, { "epoch": 0.34865212893964664, "grad_norm": 0.1955556869506836, "learning_rate": 9.832667206565108e-06, "loss": 0.0163, "step": 41290 }, { "epoch": 0.3487365687868105, "grad_norm": 0.6229550242424011, "learning_rate": 9.832478113929295e-06, "loss": 0.021, "step": 41300 }, { "epoch": 0.34882100863397436, "grad_norm": 0.9880120754241943, "learning_rate": 9.832288916333046e-06, "loss": 0.0141, "step": 41310 }, { "epoch": 0.34890544848113825, "grad_norm": 0.5672401189804077, "learning_rate": 9.832099613780474e-06, "loss": 0.0225, "step": 41320 }, { "epoch": 0.34898988832830213, "grad_norm": 0.3990262746810913, "learning_rate": 9.831910206275688e-06, "loss": 0.016, "step": 41330 }, { "epoch": 0.349074328175466, "grad_norm": 0.17718903720378876, "learning_rate": 9.8317206938228e-06, "loss": 0.0122, "step": 41340 }, { "epoch": 0.3491587680226299, "grad_norm": 0.794573187828064, "learning_rate": 9.831531076425932e-06, "loss": 0.0169, "step": 41350 }, { "epoch": 0.34924320786979374, "grad_norm": 0.4494243860244751, "learning_rate": 9.831341354089199e-06, "loss": 0.0151, "step": 41360 }, { "epoch": 0.3493276477169576, "grad_norm": 0.48209166526794434, "learning_rate": 9.83115152681672e-06, "loss": 0.0148, "step": 41370 }, { "epoch": 0.3494120875641215, "grad_norm": 0.4290556311607361, "learning_rate": 9.83096159461262e-06, "loss": 0.0093, "step": 41380 }, { "epoch": 0.3494965274112854, "grad_norm": 0.5587393045425415, "learning_rate": 9.830771557481024e-06, "loss": 0.014, "step": 41390 }, { "epoch": 0.3495809672584493, "grad_norm": 0.5462266206741333, "learning_rate": 9.830581415426062e-06, "loss": 0.0217, "step": 41400 }, { "epoch": 0.3496654071056131, "grad_norm": 0.46176984906196594, "learning_rate": 9.830391168451858e-06, "loss": 0.0122, "step": 41410 }, { "epoch": 0.349749846952777, "grad_norm": 0.3453681468963623, "learning_rate": 9.83020081656255e-06, "loss": 0.0164, "step": 41420 }, { "epoch": 0.3498342867999409, "grad_norm": 0.5246922969818115, "learning_rate": 9.83001035976227e-06, "loss": 0.0253, "step": 41430 }, { "epoch": 0.34991872664710477, "grad_norm": 1.0485397577285767, "learning_rate": 9.829819798055153e-06, "loss": 0.0136, "step": 41440 }, { "epoch": 0.35000316649426866, "grad_norm": 0.6194220781326294, "learning_rate": 9.829629131445342e-06, "loss": 0.0182, "step": 41450 }, { "epoch": 0.35008760634143254, "grad_norm": 0.44131338596343994, "learning_rate": 9.829438359936976e-06, "loss": 0.0133, "step": 41460 }, { "epoch": 0.3501720461885964, "grad_norm": 0.26119333505630493, "learning_rate": 9.829247483534197e-06, "loss": 0.0154, "step": 41470 }, { "epoch": 0.35025648603576026, "grad_norm": 0.38638681173324585, "learning_rate": 9.829056502241152e-06, "loss": 0.018, "step": 41480 }, { "epoch": 0.35034092588292415, "grad_norm": 0.41086047887802124, "learning_rate": 9.82886541606199e-06, "loss": 0.018, "step": 41490 }, { "epoch": 0.35042536573008803, "grad_norm": 0.24564798176288605, "learning_rate": 9.82867422500086e-06, "loss": 0.0171, "step": 41500 }, { "epoch": 0.3505098055772519, "grad_norm": 0.6527855396270752, "learning_rate": 9.828482929061918e-06, "loss": 0.0184, "step": 41510 }, { "epoch": 0.3505942454244158, "grad_norm": 0.8748036623001099, "learning_rate": 9.828291528249313e-06, "loss": 0.0249, "step": 41520 }, { "epoch": 0.35067868527157964, "grad_norm": 0.3120797574520111, "learning_rate": 9.828100022567207e-06, "loss": 0.0139, "step": 41530 }, { "epoch": 0.3507631251187435, "grad_norm": 0.3545888066291809, "learning_rate": 9.827908412019758e-06, "loss": 0.0164, "step": 41540 }, { "epoch": 0.3508475649659074, "grad_norm": 0.9058964848518372, "learning_rate": 9.827716696611127e-06, "loss": 0.024, "step": 41550 }, { "epoch": 0.3509320048130713, "grad_norm": 0.2988397181034088, "learning_rate": 9.827524876345479e-06, "loss": 0.0165, "step": 41560 }, { "epoch": 0.3510164446602352, "grad_norm": 0.07098110765218735, "learning_rate": 9.827332951226979e-06, "loss": 0.0418, "step": 41570 }, { "epoch": 0.35110088450739907, "grad_norm": 0.6189855933189392, "learning_rate": 9.827140921259799e-06, "loss": 0.0152, "step": 41580 }, { "epoch": 0.3511853243545629, "grad_norm": 0.03619833290576935, "learning_rate": 9.826948786448105e-06, "loss": 0.0167, "step": 41590 }, { "epoch": 0.3512697642017268, "grad_norm": 0.2765769958496094, "learning_rate": 9.826756546796073e-06, "loss": 0.0112, "step": 41600 }, { "epoch": 0.3513542040488907, "grad_norm": 0.2429056316614151, "learning_rate": 9.826564202307878e-06, "loss": 0.0157, "step": 41610 }, { "epoch": 0.35143864389605456, "grad_norm": 0.2891162037849426, "learning_rate": 9.826371752987698e-06, "loss": 0.0128, "step": 41620 }, { "epoch": 0.35152308374321845, "grad_norm": 0.8600901961326599, "learning_rate": 9.826179198839712e-06, "loss": 0.0132, "step": 41630 }, { "epoch": 0.3516075235903823, "grad_norm": 0.625361442565918, "learning_rate": 9.825986539868101e-06, "loss": 0.0191, "step": 41640 }, { "epoch": 0.35169196343754616, "grad_norm": 0.47601231932640076, "learning_rate": 9.825793776077056e-06, "loss": 0.0181, "step": 41650 }, { "epoch": 0.35177640328471005, "grad_norm": 0.4754060208797455, "learning_rate": 9.825600907470755e-06, "loss": 0.0225, "step": 41660 }, { "epoch": 0.35186084313187393, "grad_norm": 0.6040816307067871, "learning_rate": 9.825407934053392e-06, "loss": 0.0182, "step": 41670 }, { "epoch": 0.3519452829790378, "grad_norm": 1.1316802501678467, "learning_rate": 9.82521485582916e-06, "loss": 0.0178, "step": 41680 }, { "epoch": 0.3520297228262017, "grad_norm": 0.4951711595058441, "learning_rate": 9.825021672802247e-06, "loss": 0.0127, "step": 41690 }, { "epoch": 0.35211416267336554, "grad_norm": 0.6014373898506165, "learning_rate": 9.824828384976853e-06, "loss": 0.015, "step": 41700 }, { "epoch": 0.3521986025205294, "grad_norm": 0.04162167012691498, "learning_rate": 9.824634992357176e-06, "loss": 0.012, "step": 41710 }, { "epoch": 0.3522830423676933, "grad_norm": 0.30727630853652954, "learning_rate": 9.824441494947415e-06, "loss": 0.0199, "step": 41720 }, { "epoch": 0.3523674822148572, "grad_norm": 0.1509166955947876, "learning_rate": 9.824247892751773e-06, "loss": 0.0178, "step": 41730 }, { "epoch": 0.3524519220620211, "grad_norm": 0.9066462516784668, "learning_rate": 9.824054185774455e-06, "loss": 0.0159, "step": 41740 }, { "epoch": 0.35253636190918497, "grad_norm": 0.5506599545478821, "learning_rate": 9.82386037401967e-06, "loss": 0.0177, "step": 41750 }, { "epoch": 0.3526208017563488, "grad_norm": 0.4775465726852417, "learning_rate": 9.823666457491625e-06, "loss": 0.012, "step": 41760 }, { "epoch": 0.3527052416035127, "grad_norm": 0.3366394639015198, "learning_rate": 9.823472436194533e-06, "loss": 0.0124, "step": 41770 }, { "epoch": 0.3527896814506766, "grad_norm": 0.6187310218811035, "learning_rate": 9.823278310132609e-06, "loss": 0.027, "step": 41780 }, { "epoch": 0.35287412129784046, "grad_norm": 0.4443396031856537, "learning_rate": 9.823084079310069e-06, "loss": 0.0219, "step": 41790 }, { "epoch": 0.35295856114500435, "grad_norm": 0.581816554069519, "learning_rate": 9.82288974373113e-06, "loss": 0.0223, "step": 41800 }, { "epoch": 0.3530430009921682, "grad_norm": 0.6513648629188538, "learning_rate": 9.822695303400013e-06, "loss": 0.0097, "step": 41810 }, { "epoch": 0.35312744083933206, "grad_norm": 0.10740835964679718, "learning_rate": 9.822500758320943e-06, "loss": 0.0094, "step": 41820 }, { "epoch": 0.35321188068649595, "grad_norm": 0.5391808748245239, "learning_rate": 9.822306108498147e-06, "loss": 0.0137, "step": 41830 }, { "epoch": 0.35329632053365984, "grad_norm": 0.26156455278396606, "learning_rate": 9.822111353935848e-06, "loss": 0.0112, "step": 41840 }, { "epoch": 0.3533807603808237, "grad_norm": 0.4703039228916168, "learning_rate": 9.82191649463828e-06, "loss": 0.0203, "step": 41850 }, { "epoch": 0.3534652002279876, "grad_norm": 0.578925371170044, "learning_rate": 9.821721530609672e-06, "loss": 0.0182, "step": 41860 }, { "epoch": 0.35354964007515144, "grad_norm": 0.3445691466331482, "learning_rate": 9.821526461854262e-06, "loss": 0.0306, "step": 41870 }, { "epoch": 0.3536340799223153, "grad_norm": 0.14040198922157288, "learning_rate": 9.821331288376285e-06, "loss": 0.0138, "step": 41880 }, { "epoch": 0.3537185197694792, "grad_norm": 0.3259347081184387, "learning_rate": 9.82113601017998e-06, "loss": 0.0196, "step": 41890 }, { "epoch": 0.3538029596166431, "grad_norm": 0.007530600763857365, "learning_rate": 9.820940627269589e-06, "loss": 0.0096, "step": 41900 }, { "epoch": 0.353887399463807, "grad_norm": 0.11494559049606323, "learning_rate": 9.820745139649355e-06, "loss": 0.0242, "step": 41910 }, { "epoch": 0.35397183931097087, "grad_norm": 0.7221099734306335, "learning_rate": 9.820549547323526e-06, "loss": 0.0262, "step": 41920 }, { "epoch": 0.3540562791581347, "grad_norm": 0.6351282596588135, "learning_rate": 9.820353850296346e-06, "loss": 0.0195, "step": 41930 }, { "epoch": 0.3541407190052986, "grad_norm": 0.6352200508117676, "learning_rate": 9.82015804857207e-06, "loss": 0.013, "step": 41940 }, { "epoch": 0.3542251588524625, "grad_norm": 0.5795859098434448, "learning_rate": 9.81996214215495e-06, "loss": 0.0149, "step": 41950 }, { "epoch": 0.35430959869962636, "grad_norm": 0.2617749869823456, "learning_rate": 9.819766131049239e-06, "loss": 0.0154, "step": 41960 }, { "epoch": 0.35439403854679025, "grad_norm": 0.37716859579086304, "learning_rate": 9.819570015259195e-06, "loss": 0.0159, "step": 41970 }, { "epoch": 0.35447847839395413, "grad_norm": 0.43011152744293213, "learning_rate": 9.819373794789077e-06, "loss": 0.0082, "step": 41980 }, { "epoch": 0.35456291824111796, "grad_norm": 0.4917515814304352, "learning_rate": 9.819177469643148e-06, "loss": 0.0137, "step": 41990 }, { "epoch": 0.35464735808828185, "grad_norm": 0.20735907554626465, "learning_rate": 9.818981039825673e-06, "loss": 0.0081, "step": 42000 }, { "epoch": 0.35473179793544574, "grad_norm": 0.5617019534111023, "learning_rate": 9.818784505340916e-06, "loss": 0.0203, "step": 42010 }, { "epoch": 0.3548162377826096, "grad_norm": 0.24491815268993378, "learning_rate": 9.81858786619315e-06, "loss": 0.0087, "step": 42020 }, { "epoch": 0.3549006776297735, "grad_norm": 0.5499271154403687, "learning_rate": 9.81839112238664e-06, "loss": 0.0124, "step": 42030 }, { "epoch": 0.35498511747693734, "grad_norm": 0.25578072667121887, "learning_rate": 9.818194273925663e-06, "loss": 0.0141, "step": 42040 }, { "epoch": 0.3550695573241012, "grad_norm": 0.3498833179473877, "learning_rate": 9.817997320814494e-06, "loss": 0.0121, "step": 42050 }, { "epoch": 0.3551539971712651, "grad_norm": 0.9778949022293091, "learning_rate": 9.81780026305741e-06, "loss": 0.0197, "step": 42060 }, { "epoch": 0.355238437018429, "grad_norm": 0.641150951385498, "learning_rate": 9.817603100658692e-06, "loss": 0.0146, "step": 42070 }, { "epoch": 0.3553228768655929, "grad_norm": 0.37319695949554443, "learning_rate": 9.817405833622622e-06, "loss": 0.0127, "step": 42080 }, { "epoch": 0.35540731671275677, "grad_norm": 0.3796336352825165, "learning_rate": 9.817208461953484e-06, "loss": 0.015, "step": 42090 }, { "epoch": 0.3554917565599206, "grad_norm": 0.612254798412323, "learning_rate": 9.817010985655566e-06, "loss": 0.0118, "step": 42100 }, { "epoch": 0.3555761964070845, "grad_norm": 0.20698203146457672, "learning_rate": 9.816813404733155e-06, "loss": 0.0149, "step": 42110 }, { "epoch": 0.3556606362542484, "grad_norm": 0.2940160632133484, "learning_rate": 9.816615719190545e-06, "loss": 0.0136, "step": 42120 }, { "epoch": 0.35574507610141226, "grad_norm": 0.39426037669181824, "learning_rate": 9.81641792903203e-06, "loss": 0.0257, "step": 42130 }, { "epoch": 0.35582951594857615, "grad_norm": 0.141378715634346, "learning_rate": 9.816220034261902e-06, "loss": 0.0164, "step": 42140 }, { "epoch": 0.35591395579574003, "grad_norm": 0.2747253477573395, "learning_rate": 9.816022034884463e-06, "loss": 0.0139, "step": 42150 }, { "epoch": 0.35599839564290386, "grad_norm": 0.4279266595840454, "learning_rate": 9.815823930904011e-06, "loss": 0.0095, "step": 42160 }, { "epoch": 0.35608283549006775, "grad_norm": 0.6176056265830994, "learning_rate": 9.815625722324852e-06, "loss": 0.015, "step": 42170 }, { "epoch": 0.35616727533723164, "grad_norm": 0.1647895723581314, "learning_rate": 9.815427409151288e-06, "loss": 0.0206, "step": 42180 }, { "epoch": 0.3562517151843955, "grad_norm": 0.847614049911499, "learning_rate": 9.815228991387628e-06, "loss": 0.0311, "step": 42190 }, { "epoch": 0.3563361550315594, "grad_norm": 0.7030911445617676, "learning_rate": 9.81503046903818e-06, "loss": 0.0151, "step": 42200 }, { "epoch": 0.3564205948787233, "grad_norm": 0.3477350175380707, "learning_rate": 9.814831842107259e-06, "loss": 0.0125, "step": 42210 }, { "epoch": 0.3565050347258871, "grad_norm": 0.33353883028030396, "learning_rate": 9.814633110599175e-06, "loss": 0.0195, "step": 42220 }, { "epoch": 0.356589474573051, "grad_norm": 0.9057306051254272, "learning_rate": 9.814434274518247e-06, "loss": 0.02, "step": 42230 }, { "epoch": 0.3566739144202149, "grad_norm": 0.4788852035999298, "learning_rate": 9.814235333868794e-06, "loss": 0.0189, "step": 42240 }, { "epoch": 0.3567583542673788, "grad_norm": 0.43796873092651367, "learning_rate": 9.814036288655135e-06, "loss": 0.0183, "step": 42250 }, { "epoch": 0.35684279411454267, "grad_norm": 0.4619568884372711, "learning_rate": 9.813837138881594e-06, "loss": 0.0182, "step": 42260 }, { "epoch": 0.3569272339617065, "grad_norm": 0.4668453633785248, "learning_rate": 9.813637884552499e-06, "loss": 0.0095, "step": 42270 }, { "epoch": 0.3570116738088704, "grad_norm": 0.2038099318742752, "learning_rate": 9.813438525672175e-06, "loss": 0.0083, "step": 42280 }, { "epoch": 0.3570961136560343, "grad_norm": 0.6286730170249939, "learning_rate": 9.813239062244951e-06, "loss": 0.0223, "step": 42290 }, { "epoch": 0.35718055350319816, "grad_norm": 0.8866361379623413, "learning_rate": 9.81303949427516e-06, "loss": 0.0203, "step": 42300 }, { "epoch": 0.35726499335036205, "grad_norm": 0.23011717200279236, "learning_rate": 9.812839821767139e-06, "loss": 0.0163, "step": 42310 }, { "epoch": 0.35734943319752593, "grad_norm": 0.533581554889679, "learning_rate": 9.812640044725223e-06, "loss": 0.0194, "step": 42320 }, { "epoch": 0.35743387304468976, "grad_norm": 0.2700953185558319, "learning_rate": 9.812440163153754e-06, "loss": 0.0161, "step": 42330 }, { "epoch": 0.35751831289185365, "grad_norm": 0.2008589506149292, "learning_rate": 9.812240177057067e-06, "loss": 0.0235, "step": 42340 }, { "epoch": 0.35760275273901754, "grad_norm": 0.49371519684791565, "learning_rate": 9.812040086439512e-06, "loss": 0.0195, "step": 42350 }, { "epoch": 0.3576871925861814, "grad_norm": 0.04921085387468338, "learning_rate": 9.81183989130543e-06, "loss": 0.0192, "step": 42360 }, { "epoch": 0.3577716324333453, "grad_norm": 0.1638750284910202, "learning_rate": 9.811639591659174e-06, "loss": 0.0085, "step": 42370 }, { "epoch": 0.3578560722805092, "grad_norm": 0.6024919748306274, "learning_rate": 9.811439187505092e-06, "loss": 0.027, "step": 42380 }, { "epoch": 0.357940512127673, "grad_norm": 0.40390899777412415, "learning_rate": 9.811238678847534e-06, "loss": 0.0185, "step": 42390 }, { "epoch": 0.3580249519748369, "grad_norm": 0.6699589490890503, "learning_rate": 9.811038065690861e-06, "loss": 0.0135, "step": 42400 }, { "epoch": 0.3581093918220008, "grad_norm": 0.4016490876674652, "learning_rate": 9.810837348039427e-06, "loss": 0.0094, "step": 42410 }, { "epoch": 0.3581938316691647, "grad_norm": 0.4312809407711029, "learning_rate": 9.81063652589759e-06, "loss": 0.0219, "step": 42420 }, { "epoch": 0.3582782715163286, "grad_norm": 0.2762916684150696, "learning_rate": 9.810435599269712e-06, "loss": 0.0162, "step": 42430 }, { "epoch": 0.35836271136349246, "grad_norm": 0.2618850767612457, "learning_rate": 9.810234568160163e-06, "loss": 0.0098, "step": 42440 }, { "epoch": 0.3584471512106563, "grad_norm": 0.6801089644432068, "learning_rate": 9.8100334325733e-06, "loss": 0.0164, "step": 42450 }, { "epoch": 0.3585315910578202, "grad_norm": 0.3928157389163971, "learning_rate": 9.8098321925135e-06, "loss": 0.0179, "step": 42460 }, { "epoch": 0.35861603090498406, "grad_norm": 0.1881593018770218, "learning_rate": 9.80963084798513e-06, "loss": 0.012, "step": 42470 }, { "epoch": 0.35870047075214795, "grad_norm": 0.3433957099914551, "learning_rate": 9.809429398992563e-06, "loss": 0.0162, "step": 42480 }, { "epoch": 0.35878491059931183, "grad_norm": 0.5019651651382446, "learning_rate": 9.809227845540175e-06, "loss": 0.0148, "step": 42490 }, { "epoch": 0.35886935044647567, "grad_norm": 0.43452540040016174, "learning_rate": 9.809026187632343e-06, "loss": 0.014, "step": 42500 }, { "epoch": 0.35895379029363955, "grad_norm": 0.5692149996757507, "learning_rate": 9.808824425273448e-06, "loss": 0.0205, "step": 42510 }, { "epoch": 0.35903823014080344, "grad_norm": 0.323072612285614, "learning_rate": 9.808622558467872e-06, "loss": 0.0155, "step": 42520 }, { "epoch": 0.3591226699879673, "grad_norm": 0.14980705082416534, "learning_rate": 9.80842058722e-06, "loss": 0.0279, "step": 42530 }, { "epoch": 0.3592071098351312, "grad_norm": 0.3213697373867035, "learning_rate": 9.808218511534218e-06, "loss": 0.0106, "step": 42540 }, { "epoch": 0.3592915496822951, "grad_norm": 1.4260185956954956, "learning_rate": 9.808016331414914e-06, "loss": 0.0204, "step": 42550 }, { "epoch": 0.35937598952945893, "grad_norm": 0.549897313117981, "learning_rate": 9.807814046866482e-06, "loss": 0.0192, "step": 42560 }, { "epoch": 0.3594604293766228, "grad_norm": 0.6670869588851929, "learning_rate": 9.807611657893313e-06, "loss": 0.0265, "step": 42570 }, { "epoch": 0.3595448692237867, "grad_norm": 1.034732460975647, "learning_rate": 9.807409164499805e-06, "loss": 0.0172, "step": 42580 }, { "epoch": 0.3596293090709506, "grad_norm": 0.6653253436088562, "learning_rate": 9.807206566690354e-06, "loss": 0.0259, "step": 42590 }, { "epoch": 0.3597137489181145, "grad_norm": 0.70106440782547, "learning_rate": 9.807003864469362e-06, "loss": 0.0221, "step": 42600 }, { "epoch": 0.35979818876527836, "grad_norm": 0.36862680315971375, "learning_rate": 9.80680105784123e-06, "loss": 0.0128, "step": 42610 }, { "epoch": 0.3598826286124422, "grad_norm": 0.28769856691360474, "learning_rate": 9.806598146810366e-06, "loss": 0.0124, "step": 42620 }, { "epoch": 0.3599670684596061, "grad_norm": 0.32093310356140137, "learning_rate": 9.806395131381176e-06, "loss": 0.0174, "step": 42630 }, { "epoch": 0.36005150830676996, "grad_norm": 0.5669400691986084, "learning_rate": 9.806192011558067e-06, "loss": 0.0123, "step": 42640 }, { "epoch": 0.36013594815393385, "grad_norm": 0.32245874404907227, "learning_rate": 9.805988787345452e-06, "loss": 0.0162, "step": 42650 }, { "epoch": 0.36022038800109774, "grad_norm": 0.13645651936531067, "learning_rate": 9.805785458747746e-06, "loss": 0.0084, "step": 42660 }, { "epoch": 0.3603048278482616, "grad_norm": 0.15841884911060333, "learning_rate": 9.805582025769365e-06, "loss": 0.0123, "step": 42670 }, { "epoch": 0.36038926769542545, "grad_norm": 0.3424074351787567, "learning_rate": 9.805378488414726e-06, "loss": 0.0107, "step": 42680 }, { "epoch": 0.36047370754258934, "grad_norm": 0.61085444688797, "learning_rate": 9.805174846688252e-06, "loss": 0.0177, "step": 42690 }, { "epoch": 0.3605581473897532, "grad_norm": 0.6736013889312744, "learning_rate": 9.804971100594365e-06, "loss": 0.0128, "step": 42700 }, { "epoch": 0.3606425872369171, "grad_norm": 0.4233173429965973, "learning_rate": 9.80476725013749e-06, "loss": 0.0168, "step": 42710 }, { "epoch": 0.360727027084081, "grad_norm": 0.704908013343811, "learning_rate": 9.804563295322053e-06, "loss": 0.0185, "step": 42720 }, { "epoch": 0.36081146693124483, "grad_norm": 0.16303972899913788, "learning_rate": 9.80435923615249e-06, "loss": 0.0154, "step": 42730 }, { "epoch": 0.3608959067784087, "grad_norm": 0.28857770562171936, "learning_rate": 9.804155072633226e-06, "loss": 0.0137, "step": 42740 }, { "epoch": 0.3609803466255726, "grad_norm": 0.6176055669784546, "learning_rate": 9.803950804768698e-06, "loss": 0.0205, "step": 42750 }, { "epoch": 0.3610647864727365, "grad_norm": 0.5381375551223755, "learning_rate": 9.803746432563344e-06, "loss": 0.0227, "step": 42760 }, { "epoch": 0.3611492263199004, "grad_norm": 0.4783633351325989, "learning_rate": 9.803541956021602e-06, "loss": 0.0277, "step": 42770 }, { "epoch": 0.36123366616706426, "grad_norm": 0.31535619497299194, "learning_rate": 9.803337375147912e-06, "loss": 0.0148, "step": 42780 }, { "epoch": 0.3613181060142281, "grad_norm": 0.6457907557487488, "learning_rate": 9.80313268994672e-06, "loss": 0.0189, "step": 42790 }, { "epoch": 0.361402545861392, "grad_norm": 0.8597951531410217, "learning_rate": 9.80292790042247e-06, "loss": 0.0096, "step": 42800 }, { "epoch": 0.36148698570855586, "grad_norm": 0.3180690109729767, "learning_rate": 9.80272300657961e-06, "loss": 0.0231, "step": 42810 }, { "epoch": 0.36157142555571975, "grad_norm": 0.13285662233829498, "learning_rate": 9.80251800842259e-06, "loss": 0.0101, "step": 42820 }, { "epoch": 0.36165586540288364, "grad_norm": 0.5910240411758423, "learning_rate": 9.802312905955861e-06, "loss": 0.0192, "step": 42830 }, { "epoch": 0.3617403052500475, "grad_norm": 0.3470640182495117, "learning_rate": 9.802107699183883e-06, "loss": 0.0137, "step": 42840 }, { "epoch": 0.36182474509721135, "grad_norm": 0.15776881575584412, "learning_rate": 9.801902388111109e-06, "loss": 0.0121, "step": 42850 }, { "epoch": 0.36190918494437524, "grad_norm": 0.3984823524951935, "learning_rate": 9.801696972741998e-06, "loss": 0.0118, "step": 42860 }, { "epoch": 0.3619936247915391, "grad_norm": 0.2751534879207611, "learning_rate": 9.801491453081015e-06, "loss": 0.0259, "step": 42870 }, { "epoch": 0.362078064638703, "grad_norm": 0.91523277759552, "learning_rate": 9.80128582913262e-06, "loss": 0.0168, "step": 42880 }, { "epoch": 0.3621625044858669, "grad_norm": 0.03643742576241493, "learning_rate": 9.80108010090128e-06, "loss": 0.0171, "step": 42890 }, { "epoch": 0.3622469443330308, "grad_norm": 0.6718101501464844, "learning_rate": 9.800874268391465e-06, "loss": 0.0184, "step": 42900 }, { "epoch": 0.3623313841801946, "grad_norm": 0.5015348792076111, "learning_rate": 9.800668331607644e-06, "loss": 0.0135, "step": 42910 }, { "epoch": 0.3624158240273585, "grad_norm": 0.4222857654094696, "learning_rate": 9.800462290554291e-06, "loss": 0.0275, "step": 42920 }, { "epoch": 0.3625002638745224, "grad_norm": 0.3836376965045929, "learning_rate": 9.80025614523588e-06, "loss": 0.0091, "step": 42930 }, { "epoch": 0.3625847037216863, "grad_norm": 0.670660138130188, "learning_rate": 9.80004989565689e-06, "loss": 0.0171, "step": 42940 }, { "epoch": 0.36266914356885016, "grad_norm": 0.12066522240638733, "learning_rate": 9.7998435418218e-06, "loss": 0.0188, "step": 42950 }, { "epoch": 0.362753583416014, "grad_norm": 0.6222934126853943, "learning_rate": 9.799637083735093e-06, "loss": 0.0193, "step": 42960 }, { "epoch": 0.3628380232631779, "grad_norm": 0.8818350434303284, "learning_rate": 9.799430521401251e-06, "loss": 0.0202, "step": 42970 }, { "epoch": 0.36292246311034176, "grad_norm": 0.3360476791858673, "learning_rate": 9.799223854824762e-06, "loss": 0.0251, "step": 42980 }, { "epoch": 0.36300690295750565, "grad_norm": 0.6393051743507385, "learning_rate": 9.799017084010114e-06, "loss": 0.0114, "step": 42990 }, { "epoch": 0.36309134280466954, "grad_norm": 0.7033851146697998, "learning_rate": 9.798810208961797e-06, "loss": 0.0152, "step": 43000 }, { "epoch": 0.3631757826518334, "grad_norm": 0.15403002500534058, "learning_rate": 9.798603229684306e-06, "loss": 0.0145, "step": 43010 }, { "epoch": 0.36326022249899725, "grad_norm": 0.6516137719154358, "learning_rate": 9.798396146182139e-06, "loss": 0.0137, "step": 43020 }, { "epoch": 0.36334466234616114, "grad_norm": 0.4684986472129822, "learning_rate": 9.798188958459789e-06, "loss": 0.021, "step": 43030 }, { "epoch": 0.363429102193325, "grad_norm": 0.36333927512168884, "learning_rate": 9.797981666521756e-06, "loss": 0.0163, "step": 43040 }, { "epoch": 0.3635135420404889, "grad_norm": 0.552803635597229, "learning_rate": 9.797774270372548e-06, "loss": 0.0128, "step": 43050 }, { "epoch": 0.3635979818876528, "grad_norm": 0.2920187711715698, "learning_rate": 9.797566770016664e-06, "loss": 0.0274, "step": 43060 }, { "epoch": 0.3636824217348167, "grad_norm": 0.3128267228603363, "learning_rate": 9.797359165458613e-06, "loss": 0.0186, "step": 43070 }, { "epoch": 0.3637668615819805, "grad_norm": 0.4593546688556671, "learning_rate": 9.797151456702905e-06, "loss": 0.0125, "step": 43080 }, { "epoch": 0.3638513014291444, "grad_norm": 0.4009689688682556, "learning_rate": 9.79694364375405e-06, "loss": 0.0096, "step": 43090 }, { "epoch": 0.3639357412763083, "grad_norm": 0.26045364141464233, "learning_rate": 9.796735726616561e-06, "loss": 0.0287, "step": 43100 }, { "epoch": 0.3640201811234722, "grad_norm": 0.30679282546043396, "learning_rate": 9.796527705294956e-06, "loss": 0.0299, "step": 43110 }, { "epoch": 0.36410462097063606, "grad_norm": 0.43707478046417236, "learning_rate": 9.796319579793751e-06, "loss": 0.0151, "step": 43120 }, { "epoch": 0.3641890608177999, "grad_norm": 0.3572850823402405, "learning_rate": 9.796111350117468e-06, "loss": 0.0174, "step": 43130 }, { "epoch": 0.3642735006649638, "grad_norm": 0.31134331226348877, "learning_rate": 9.795903016270631e-06, "loss": 0.0128, "step": 43140 }, { "epoch": 0.36435794051212766, "grad_norm": 0.6417958736419678, "learning_rate": 9.795694578257762e-06, "loss": 0.0152, "step": 43150 }, { "epoch": 0.36444238035929155, "grad_norm": 0.6505857110023499, "learning_rate": 9.795486036083388e-06, "loss": 0.0107, "step": 43160 }, { "epoch": 0.36452682020645544, "grad_norm": 0.22135408222675323, "learning_rate": 9.795277389752043e-06, "loss": 0.0235, "step": 43170 }, { "epoch": 0.3646112600536193, "grad_norm": 0.26489463448524475, "learning_rate": 9.795068639268252e-06, "loss": 0.0151, "step": 43180 }, { "epoch": 0.36469569990078315, "grad_norm": 0.4612411558628082, "learning_rate": 9.794859784636556e-06, "loss": 0.0157, "step": 43190 }, { "epoch": 0.36478013974794704, "grad_norm": 0.28124508261680603, "learning_rate": 9.794650825861487e-06, "loss": 0.0156, "step": 43200 }, { "epoch": 0.3648645795951109, "grad_norm": 0.361773818731308, "learning_rate": 9.794441762947585e-06, "loss": 0.0122, "step": 43210 }, { "epoch": 0.3649490194422748, "grad_norm": 0.2377055585384369, "learning_rate": 9.794232595899389e-06, "loss": 0.015, "step": 43220 }, { "epoch": 0.3650334592894387, "grad_norm": 0.4867222011089325, "learning_rate": 9.794023324721445e-06, "loss": 0.0212, "step": 43230 }, { "epoch": 0.3651178991366026, "grad_norm": 0.5061119794845581, "learning_rate": 9.793813949418295e-06, "loss": 0.0161, "step": 43240 }, { "epoch": 0.3652023389837664, "grad_norm": 0.2796095013618469, "learning_rate": 9.793604469994491e-06, "loss": 0.0185, "step": 43250 }, { "epoch": 0.3652867788309303, "grad_norm": 0.3083512783050537, "learning_rate": 9.79339488645458e-06, "loss": 0.0179, "step": 43260 }, { "epoch": 0.3653712186780942, "grad_norm": 0.4331669807434082, "learning_rate": 9.79318519880311e-06, "loss": 0.0132, "step": 43270 }, { "epoch": 0.3654556585252581, "grad_norm": 0.345880389213562, "learning_rate": 9.792975407044645e-06, "loss": 0.0119, "step": 43280 }, { "epoch": 0.36554009837242196, "grad_norm": 0.43351173400878906, "learning_rate": 9.792765511183733e-06, "loss": 0.0086, "step": 43290 }, { "epoch": 0.36562453821958585, "grad_norm": 0.6644691228866577, "learning_rate": 9.792555511224937e-06, "loss": 0.0169, "step": 43300 }, { "epoch": 0.3657089780667497, "grad_norm": 0.17856647074222565, "learning_rate": 9.792345407172818e-06, "loss": 0.0154, "step": 43310 }, { "epoch": 0.36579341791391357, "grad_norm": 0.21265067160129547, "learning_rate": 9.792135199031937e-06, "loss": 0.0106, "step": 43320 }, { "epoch": 0.36587785776107745, "grad_norm": 1.2209393978118896, "learning_rate": 9.791924886806866e-06, "loss": 0.0204, "step": 43330 }, { "epoch": 0.36596229760824134, "grad_norm": 0.5349112749099731, "learning_rate": 9.791714470502165e-06, "loss": 0.0209, "step": 43340 }, { "epoch": 0.3660467374554052, "grad_norm": 0.3052466809749603, "learning_rate": 9.791503950122408e-06, "loss": 0.0147, "step": 43350 }, { "epoch": 0.36613117730256906, "grad_norm": 0.34693342447280884, "learning_rate": 9.791293325672167e-06, "loss": 0.0286, "step": 43360 }, { "epoch": 0.36621561714973294, "grad_norm": 0.22715309262275696, "learning_rate": 9.791082597156016e-06, "loss": 0.0134, "step": 43370 }, { "epoch": 0.36630005699689683, "grad_norm": 0.3893168866634369, "learning_rate": 9.790871764578534e-06, "loss": 0.0102, "step": 43380 }, { "epoch": 0.3663844968440607, "grad_norm": 0.21317583322525024, "learning_rate": 9.790660827944299e-06, "loss": 0.0149, "step": 43390 }, { "epoch": 0.3664689366912246, "grad_norm": 0.06389522552490234, "learning_rate": 9.790449787257892e-06, "loss": 0.0145, "step": 43400 }, { "epoch": 0.3665533765383885, "grad_norm": 0.148281991481781, "learning_rate": 9.790238642523897e-06, "loss": 0.03, "step": 43410 }, { "epoch": 0.3666378163855523, "grad_norm": 0.5122378468513489, "learning_rate": 9.790027393746901e-06, "loss": 0.0161, "step": 43420 }, { "epoch": 0.3667222562327162, "grad_norm": 0.8475763201713562, "learning_rate": 9.789816040931491e-06, "loss": 0.0252, "step": 43430 }, { "epoch": 0.3668066960798801, "grad_norm": 0.6106105446815491, "learning_rate": 9.789604584082258e-06, "loss": 0.0132, "step": 43440 }, { "epoch": 0.366891135927044, "grad_norm": 0.4254794716835022, "learning_rate": 9.789393023203796e-06, "loss": 0.0142, "step": 43450 }, { "epoch": 0.36697557577420786, "grad_norm": 0.90081387758255, "learning_rate": 9.789181358300699e-06, "loss": 0.0153, "step": 43460 }, { "epoch": 0.36706001562137175, "grad_norm": 0.41032302379608154, "learning_rate": 9.788969589377562e-06, "loss": 0.0097, "step": 43470 }, { "epoch": 0.3671444554685356, "grad_norm": 0.38781654834747314, "learning_rate": 9.78875771643899e-06, "loss": 0.0133, "step": 43480 }, { "epoch": 0.36722889531569947, "grad_norm": 0.9421370625495911, "learning_rate": 9.78854573948958e-06, "loss": 0.0185, "step": 43490 }, { "epoch": 0.36731333516286335, "grad_norm": 0.6399651169776917, "learning_rate": 9.788333658533938e-06, "loss": 0.0166, "step": 43500 }, { "epoch": 0.36739777501002724, "grad_norm": 0.7791334390640259, "learning_rate": 9.78812147357667e-06, "loss": 0.0202, "step": 43510 }, { "epoch": 0.3674822148571911, "grad_norm": 0.44508877396583557, "learning_rate": 9.787909184622386e-06, "loss": 0.0136, "step": 43520 }, { "epoch": 0.367566654704355, "grad_norm": 0.5230858325958252, "learning_rate": 9.787696791675694e-06, "loss": 0.0237, "step": 43530 }, { "epoch": 0.36765109455151884, "grad_norm": 0.7618764042854309, "learning_rate": 9.78748429474121e-06, "loss": 0.0151, "step": 43540 }, { "epoch": 0.36773553439868273, "grad_norm": 0.4701038897037506, "learning_rate": 9.787271693823546e-06, "loss": 0.0192, "step": 43550 }, { "epoch": 0.3678199742458466, "grad_norm": 0.7179448008537292, "learning_rate": 9.787058988927324e-06, "loss": 0.0123, "step": 43560 }, { "epoch": 0.3679044140930105, "grad_norm": 0.14124371111392975, "learning_rate": 9.786846180057161e-06, "loss": 0.0115, "step": 43570 }, { "epoch": 0.3679888539401744, "grad_norm": 0.4076708257198334, "learning_rate": 9.78663326721768e-06, "loss": 0.0112, "step": 43580 }, { "epoch": 0.3680732937873382, "grad_norm": 0.4609985649585724, "learning_rate": 9.786420250413505e-06, "loss": 0.0263, "step": 43590 }, { "epoch": 0.3681577336345021, "grad_norm": 0.503278911113739, "learning_rate": 9.786207129649264e-06, "loss": 0.0153, "step": 43600 }, { "epoch": 0.368242173481666, "grad_norm": 0.44873932003974915, "learning_rate": 9.785993904929584e-06, "loss": 0.0173, "step": 43610 }, { "epoch": 0.3683266133288299, "grad_norm": 0.165707528591156, "learning_rate": 9.785780576259097e-06, "loss": 0.0154, "step": 43620 }, { "epoch": 0.36841105317599376, "grad_norm": 0.42990952730178833, "learning_rate": 9.785567143642437e-06, "loss": 0.0096, "step": 43630 }, { "epoch": 0.36849549302315765, "grad_norm": 0.901501476764679, "learning_rate": 9.785353607084239e-06, "loss": 0.0173, "step": 43640 }, { "epoch": 0.3685799328703215, "grad_norm": 0.39896610379219055, "learning_rate": 9.785139966589141e-06, "loss": 0.0222, "step": 43650 }, { "epoch": 0.36866437271748537, "grad_norm": 0.32276245951652527, "learning_rate": 9.784926222161785e-06, "loss": 0.0228, "step": 43660 }, { "epoch": 0.36874881256464925, "grad_norm": 2.0319535732269287, "learning_rate": 9.784712373806811e-06, "loss": 0.0162, "step": 43670 }, { "epoch": 0.36883325241181314, "grad_norm": 0.35065773129463196, "learning_rate": 9.784498421528864e-06, "loss": 0.0213, "step": 43680 }, { "epoch": 0.368917692258977, "grad_norm": 0.3364042639732361, "learning_rate": 9.784284365332593e-06, "loss": 0.0103, "step": 43690 }, { "epoch": 0.3690021321061409, "grad_norm": 0.3753051459789276, "learning_rate": 9.784070205222644e-06, "loss": 0.0133, "step": 43700 }, { "epoch": 0.36908657195330474, "grad_norm": 0.24546387791633606, "learning_rate": 9.783855941203673e-06, "loss": 0.0155, "step": 43710 }, { "epoch": 0.36917101180046863, "grad_norm": 0.4379958510398865, "learning_rate": 9.78364157328033e-06, "loss": 0.0151, "step": 43720 }, { "epoch": 0.3692554516476325, "grad_norm": 0.3772495985031128, "learning_rate": 9.783427101457272e-06, "loss": 0.0183, "step": 43730 }, { "epoch": 0.3693398914947964, "grad_norm": 0.4862182140350342, "learning_rate": 9.783212525739158e-06, "loss": 0.0165, "step": 43740 }, { "epoch": 0.3694243313419603, "grad_norm": 0.4658811092376709, "learning_rate": 9.782997846130648e-06, "loss": 0.0244, "step": 43750 }, { "epoch": 0.3695087711891242, "grad_norm": 0.4492747187614441, "learning_rate": 9.782783062636405e-06, "loss": 0.0165, "step": 43760 }, { "epoch": 0.369593211036288, "grad_norm": 0.41962742805480957, "learning_rate": 9.782568175261094e-06, "loss": 0.0202, "step": 43770 }, { "epoch": 0.3696776508834519, "grad_norm": 0.5684574246406555, "learning_rate": 9.782353184009383e-06, "loss": 0.0114, "step": 43780 }, { "epoch": 0.3697620907306158, "grad_norm": 0.2550623416900635, "learning_rate": 9.78213808888594e-06, "loss": 0.0183, "step": 43790 }, { "epoch": 0.36984653057777966, "grad_norm": 0.9817987084388733, "learning_rate": 9.781922889895437e-06, "loss": 0.0211, "step": 43800 }, { "epoch": 0.36993097042494355, "grad_norm": 0.35145139694213867, "learning_rate": 9.781707587042551e-06, "loss": 0.0155, "step": 43810 }, { "epoch": 0.3700154102721074, "grad_norm": 0.6328026056289673, "learning_rate": 9.781492180331953e-06, "loss": 0.0107, "step": 43820 }, { "epoch": 0.37009985011927127, "grad_norm": 0.30482688546180725, "learning_rate": 9.781276669768326e-06, "loss": 0.0166, "step": 43830 }, { "epoch": 0.37018428996643515, "grad_norm": 0.2779114842414856, "learning_rate": 9.781061055356351e-06, "loss": 0.0222, "step": 43840 }, { "epoch": 0.37026872981359904, "grad_norm": 0.6733545064926147, "learning_rate": 9.780845337100708e-06, "loss": 0.0181, "step": 43850 }, { "epoch": 0.3703531696607629, "grad_norm": 0.4060290455818176, "learning_rate": 9.780629515006083e-06, "loss": 0.015, "step": 43860 }, { "epoch": 0.3704376095079268, "grad_norm": 0.699118435382843, "learning_rate": 9.780413589077167e-06, "loss": 0.013, "step": 43870 }, { "epoch": 0.37052204935509064, "grad_norm": 0.498485803604126, "learning_rate": 9.780197559318647e-06, "loss": 0.0167, "step": 43880 }, { "epoch": 0.37060648920225453, "grad_norm": 0.382079541683197, "learning_rate": 9.779981425735215e-06, "loss": 0.0291, "step": 43890 }, { "epoch": 0.3706909290494184, "grad_norm": 0.6595408320426941, "learning_rate": 9.779765188331565e-06, "loss": 0.0149, "step": 43900 }, { "epoch": 0.3707753688965823, "grad_norm": 0.24471929669380188, "learning_rate": 9.779548847112395e-06, "loss": 0.0227, "step": 43910 }, { "epoch": 0.3708598087437462, "grad_norm": 0.5397467017173767, "learning_rate": 9.779332402082404e-06, "loss": 0.0178, "step": 43920 }, { "epoch": 0.3709442485909101, "grad_norm": 0.020930344238877296, "learning_rate": 9.779115853246293e-06, "loss": 0.0112, "step": 43930 }, { "epoch": 0.3710286884380739, "grad_norm": 0.07919229567050934, "learning_rate": 9.778899200608765e-06, "loss": 0.0124, "step": 43940 }, { "epoch": 0.3711131282852378, "grad_norm": 0.3827727437019348, "learning_rate": 9.778682444174526e-06, "loss": 0.0309, "step": 43950 }, { "epoch": 0.3711975681324017, "grad_norm": 0.3788534998893738, "learning_rate": 9.778465583948282e-06, "loss": 0.0189, "step": 43960 }, { "epoch": 0.37128200797956556, "grad_norm": 0.46959343552589417, "learning_rate": 9.778248619934744e-06, "loss": 0.0156, "step": 43970 }, { "epoch": 0.37136644782672945, "grad_norm": 0.7466889023780823, "learning_rate": 9.778031552138627e-06, "loss": 0.0209, "step": 43980 }, { "epoch": 0.37145088767389334, "grad_norm": 0.2997828722000122, "learning_rate": 9.777814380564644e-06, "loss": 0.0163, "step": 43990 }, { "epoch": 0.37153532752105717, "grad_norm": 0.4219473898410797, "learning_rate": 9.77759710521751e-06, "loss": 0.0137, "step": 44000 }, { "epoch": 0.37161976736822105, "grad_norm": 0.3080304265022278, "learning_rate": 9.777379726101948e-06, "loss": 0.0372, "step": 44010 }, { "epoch": 0.37170420721538494, "grad_norm": 0.2832379639148712, "learning_rate": 9.777162243222676e-06, "loss": 0.0137, "step": 44020 }, { "epoch": 0.3717886470625488, "grad_norm": 0.4844450056552887, "learning_rate": 9.77694465658442e-06, "loss": 0.0087, "step": 44030 }, { "epoch": 0.3718730869097127, "grad_norm": 0.18800851702690125, "learning_rate": 9.776726966191904e-06, "loss": 0.017, "step": 44040 }, { "epoch": 0.37195752675687654, "grad_norm": 0.14021754264831543, "learning_rate": 9.776509172049857e-06, "loss": 0.0157, "step": 44050 }, { "epoch": 0.37204196660404043, "grad_norm": 0.29790061712265015, "learning_rate": 9.77629127416301e-06, "loss": 0.0119, "step": 44060 }, { "epoch": 0.3721264064512043, "grad_norm": 0.5635319948196411, "learning_rate": 9.776073272536096e-06, "loss": 0.016, "step": 44070 }, { "epoch": 0.3722108462983682, "grad_norm": 0.19569316506385803, "learning_rate": 9.775855167173849e-06, "loss": 0.0157, "step": 44080 }, { "epoch": 0.3722952861455321, "grad_norm": 0.47004249691963196, "learning_rate": 9.775636958081005e-06, "loss": 0.0136, "step": 44090 }, { "epoch": 0.372379725992696, "grad_norm": 0.3565555810928345, "learning_rate": 9.775418645262305e-06, "loss": 0.0144, "step": 44100 }, { "epoch": 0.3724641658398598, "grad_norm": 0.26448526978492737, "learning_rate": 9.775200228722492e-06, "loss": 0.0147, "step": 44110 }, { "epoch": 0.3725486056870237, "grad_norm": 0.40719398856163025, "learning_rate": 9.774981708466309e-06, "loss": 0.0207, "step": 44120 }, { "epoch": 0.3726330455341876, "grad_norm": 0.47715988755226135, "learning_rate": 9.7747630844985e-06, "loss": 0.0292, "step": 44130 }, { "epoch": 0.37271748538135147, "grad_norm": 1.1073893308639526, "learning_rate": 9.774544356823816e-06, "loss": 0.0218, "step": 44140 }, { "epoch": 0.37280192522851535, "grad_norm": 0.8387842774391174, "learning_rate": 9.774325525447006e-06, "loss": 0.017, "step": 44150 }, { "epoch": 0.37288636507567924, "grad_norm": 0.32057392597198486, "learning_rate": 9.774106590372826e-06, "loss": 0.0098, "step": 44160 }, { "epoch": 0.37297080492284307, "grad_norm": 0.384845495223999, "learning_rate": 9.773887551606027e-06, "loss": 0.0133, "step": 44170 }, { "epoch": 0.37305524477000696, "grad_norm": 0.6483229398727417, "learning_rate": 9.773668409151371e-06, "loss": 0.0256, "step": 44180 }, { "epoch": 0.37313968461717084, "grad_norm": 0.7918877005577087, "learning_rate": 9.773449163013613e-06, "loss": 0.0229, "step": 44190 }, { "epoch": 0.37322412446433473, "grad_norm": 0.5772350430488586, "learning_rate": 9.77322981319752e-06, "loss": 0.0155, "step": 44200 }, { "epoch": 0.3733085643114986, "grad_norm": 1.1668193340301514, "learning_rate": 9.77301035970785e-06, "loss": 0.0136, "step": 44210 }, { "epoch": 0.3733930041586625, "grad_norm": 0.33436286449432373, "learning_rate": 9.772790802549377e-06, "loss": 0.0111, "step": 44220 }, { "epoch": 0.37347744400582633, "grad_norm": 0.28514203429222107, "learning_rate": 9.772571141726863e-06, "loss": 0.0219, "step": 44230 }, { "epoch": 0.3735618838529902, "grad_norm": 0.134624645113945, "learning_rate": 9.772351377245082e-06, "loss": 0.0185, "step": 44240 }, { "epoch": 0.3736463237001541, "grad_norm": 0.20287233591079712, "learning_rate": 9.772131509108808e-06, "loss": 0.0232, "step": 44250 }, { "epoch": 0.373730763547318, "grad_norm": 0.3253430128097534, "learning_rate": 9.771911537322816e-06, "loss": 0.0194, "step": 44260 }, { "epoch": 0.3738152033944819, "grad_norm": 0.3799642324447632, "learning_rate": 9.771691461891883e-06, "loss": 0.023, "step": 44270 }, { "epoch": 0.3738996432416457, "grad_norm": 0.058069877326488495, "learning_rate": 9.771471282820788e-06, "loss": 0.0219, "step": 44280 }, { "epoch": 0.3739840830888096, "grad_norm": 0.4037723243236542, "learning_rate": 9.771251000114316e-06, "loss": 0.0138, "step": 44290 }, { "epoch": 0.3740685229359735, "grad_norm": 0.5474432706832886, "learning_rate": 9.771030613777249e-06, "loss": 0.0181, "step": 44300 }, { "epoch": 0.37415296278313737, "grad_norm": 0.4944508373737335, "learning_rate": 9.770810123814376e-06, "loss": 0.0191, "step": 44310 }, { "epoch": 0.37423740263030125, "grad_norm": 0.577677309513092, "learning_rate": 9.770589530230483e-06, "loss": 0.0147, "step": 44320 }, { "epoch": 0.37432184247746514, "grad_norm": 0.816230833530426, "learning_rate": 9.770368833030364e-06, "loss": 0.0163, "step": 44330 }, { "epoch": 0.37440628232462897, "grad_norm": 0.48560723662376404, "learning_rate": 9.770148032218812e-06, "loss": 0.0259, "step": 44340 }, { "epoch": 0.37449072217179286, "grad_norm": 0.2982025742530823, "learning_rate": 9.76992712780062e-06, "loss": 0.0108, "step": 44350 }, { "epoch": 0.37457516201895674, "grad_norm": 0.6105253100395203, "learning_rate": 9.76970611978059e-06, "loss": 0.015, "step": 44360 }, { "epoch": 0.37465960186612063, "grad_norm": 0.6947150230407715, "learning_rate": 9.76948500816352e-06, "loss": 0.0167, "step": 44370 }, { "epoch": 0.3747440417132845, "grad_norm": 2.8153834342956543, "learning_rate": 9.769263792954214e-06, "loss": 0.0259, "step": 44380 }, { "epoch": 0.3748284815604484, "grad_norm": 0.20599235594272614, "learning_rate": 9.769042474157476e-06, "loss": 0.0079, "step": 44390 }, { "epoch": 0.37491292140761223, "grad_norm": 0.37483876943588257, "learning_rate": 9.76882105177811e-06, "loss": 0.0176, "step": 44400 }, { "epoch": 0.3749973612547761, "grad_norm": 1.2703492641448975, "learning_rate": 9.76859952582093e-06, "loss": 0.0177, "step": 44410 }, { "epoch": 0.37508180110194, "grad_norm": 0.8953637480735779, "learning_rate": 9.768377896290746e-06, "loss": 0.0133, "step": 44420 }, { "epoch": 0.3751662409491039, "grad_norm": 0.8479762077331543, "learning_rate": 9.76815616319237e-06, "loss": 0.0175, "step": 44430 }, { "epoch": 0.3752506807962678, "grad_norm": 0.7222302556037903, "learning_rate": 9.76793432653062e-06, "loss": 0.0146, "step": 44440 }, { "epoch": 0.3753351206434316, "grad_norm": 0.2663542926311493, "learning_rate": 9.767712386310312e-06, "loss": 0.0225, "step": 44450 }, { "epoch": 0.3754195604905955, "grad_norm": 0.17376817762851715, "learning_rate": 9.767490342536268e-06, "loss": 0.0113, "step": 44460 }, { "epoch": 0.3755040003377594, "grad_norm": 0.9229792356491089, "learning_rate": 9.767268195213313e-06, "loss": 0.0184, "step": 44470 }, { "epoch": 0.37558844018492327, "grad_norm": 0.3628811538219452, "learning_rate": 9.767045944346267e-06, "loss": 0.0133, "step": 44480 }, { "epoch": 0.37567288003208715, "grad_norm": 0.6279811859130859, "learning_rate": 9.766823589939963e-06, "loss": 0.0186, "step": 44490 }, { "epoch": 0.37575731987925104, "grad_norm": 0.08442921936511993, "learning_rate": 9.766601131999224e-06, "loss": 0.0167, "step": 44500 }, { "epoch": 0.37584175972641487, "grad_norm": 0.09064722806215286, "learning_rate": 9.766378570528888e-06, "loss": 0.0149, "step": 44510 }, { "epoch": 0.37592619957357876, "grad_norm": 0.3543815612792969, "learning_rate": 9.766155905533784e-06, "loss": 0.0221, "step": 44520 }, { "epoch": 0.37601063942074264, "grad_norm": 0.4165608882904053, "learning_rate": 9.765933137018753e-06, "loss": 0.0174, "step": 44530 }, { "epoch": 0.37609507926790653, "grad_norm": 0.42749276757240295, "learning_rate": 9.76571026498863e-06, "loss": 0.0171, "step": 44540 }, { "epoch": 0.3761795191150704, "grad_norm": 0.44973188638687134, "learning_rate": 9.765487289448256e-06, "loss": 0.0139, "step": 44550 }, { "epoch": 0.3762639589622343, "grad_norm": 0.2999230623245239, "learning_rate": 9.765264210402474e-06, "loss": 0.0181, "step": 44560 }, { "epoch": 0.37634839880939813, "grad_norm": 0.22739721834659576, "learning_rate": 9.76504102785613e-06, "loss": 0.0153, "step": 44570 }, { "epoch": 0.376432838656562, "grad_norm": 0.7822932600975037, "learning_rate": 9.764817741814072e-06, "loss": 0.0199, "step": 44580 }, { "epoch": 0.3765172785037259, "grad_norm": 0.41300278902053833, "learning_rate": 9.76459435228115e-06, "loss": 0.0143, "step": 44590 }, { "epoch": 0.3766017183508898, "grad_norm": 0.47501957416534424, "learning_rate": 9.764370859262213e-06, "loss": 0.0154, "step": 44600 }, { "epoch": 0.3766861581980537, "grad_norm": 0.43642565608024597, "learning_rate": 9.764147262762118e-06, "loss": 0.014, "step": 44610 }, { "epoch": 0.37677059804521756, "grad_norm": 0.5739748477935791, "learning_rate": 9.763923562785722e-06, "loss": 0.0182, "step": 44620 }, { "epoch": 0.3768550378923814, "grad_norm": 0.2437511682510376, "learning_rate": 9.763699759337881e-06, "loss": 0.0206, "step": 44630 }, { "epoch": 0.3769394777395453, "grad_norm": 0.7624385356903076, "learning_rate": 9.763475852423459e-06, "loss": 0.0257, "step": 44640 }, { "epoch": 0.37702391758670917, "grad_norm": 0.41639789938926697, "learning_rate": 9.763251842047316e-06, "loss": 0.03, "step": 44650 }, { "epoch": 0.37710835743387305, "grad_norm": 0.44987818598747253, "learning_rate": 9.763027728214318e-06, "loss": 0.012, "step": 44660 }, { "epoch": 0.37719279728103694, "grad_norm": 0.542930006980896, "learning_rate": 9.762803510929336e-06, "loss": 0.0172, "step": 44670 }, { "epoch": 0.37727723712820077, "grad_norm": 0.192082479596138, "learning_rate": 9.762579190197236e-06, "loss": 0.0218, "step": 44680 }, { "epoch": 0.37736167697536466, "grad_norm": 0.37746939063072205, "learning_rate": 9.762354766022895e-06, "loss": 0.0173, "step": 44690 }, { "epoch": 0.37744611682252854, "grad_norm": 0.8175413608551025, "learning_rate": 9.76213023841118e-06, "loss": 0.0237, "step": 44700 }, { "epoch": 0.37753055666969243, "grad_norm": 0.4121266305446625, "learning_rate": 9.761905607366975e-06, "loss": 0.0138, "step": 44710 }, { "epoch": 0.3776149965168563, "grad_norm": 0.46921494603157043, "learning_rate": 9.761680872895156e-06, "loss": 0.0241, "step": 44720 }, { "epoch": 0.3776994363640202, "grad_norm": 0.35194534063339233, "learning_rate": 9.761456035000603e-06, "loss": 0.0113, "step": 44730 }, { "epoch": 0.37778387621118403, "grad_norm": 0.5733862519264221, "learning_rate": 9.761231093688202e-06, "loss": 0.0232, "step": 44740 }, { "epoch": 0.3778683160583479, "grad_norm": 0.3275872468948364, "learning_rate": 9.761006048962836e-06, "loss": 0.0194, "step": 44750 }, { "epoch": 0.3779527559055118, "grad_norm": 0.27684739232063293, "learning_rate": 9.760780900829396e-06, "loss": 0.0088, "step": 44760 }, { "epoch": 0.3780371957526757, "grad_norm": 0.3191450536251068, "learning_rate": 9.760555649292768e-06, "loss": 0.0126, "step": 44770 }, { "epoch": 0.3781216355998396, "grad_norm": 0.4965613782405853, "learning_rate": 9.760330294357848e-06, "loss": 0.0172, "step": 44780 }, { "epoch": 0.37820607544700346, "grad_norm": 0.5027417540550232, "learning_rate": 9.76010483602953e-06, "loss": 0.0186, "step": 44790 }, { "epoch": 0.3782905152941673, "grad_norm": 0.12829487025737762, "learning_rate": 9.75987927431271e-06, "loss": 0.0124, "step": 44800 }, { "epoch": 0.3783749551413312, "grad_norm": 0.41781941056251526, "learning_rate": 9.759653609212288e-06, "loss": 0.0196, "step": 44810 }, { "epoch": 0.37845939498849507, "grad_norm": 0.4362892210483551, "learning_rate": 9.759427840733164e-06, "loss": 0.01, "step": 44820 }, { "epoch": 0.37854383483565895, "grad_norm": 0.7737006545066833, "learning_rate": 9.759201968880243e-06, "loss": 0.0163, "step": 44830 }, { "epoch": 0.37862827468282284, "grad_norm": 1.2596925497055054, "learning_rate": 9.75897599365843e-06, "loss": 0.0225, "step": 44840 }, { "epoch": 0.3787127145299867, "grad_norm": 0.4666786789894104, "learning_rate": 9.758749915072633e-06, "loss": 0.0167, "step": 44850 }, { "epoch": 0.37879715437715056, "grad_norm": 0.9702615737915039, "learning_rate": 9.758523733127764e-06, "loss": 0.0124, "step": 44860 }, { "epoch": 0.37888159422431444, "grad_norm": 0.5615870952606201, "learning_rate": 9.758297447828736e-06, "loss": 0.0142, "step": 44870 }, { "epoch": 0.37896603407147833, "grad_norm": 0.8141301870346069, "learning_rate": 9.758071059180462e-06, "loss": 0.0203, "step": 44880 }, { "epoch": 0.3790504739186422, "grad_norm": 0.05215015634894371, "learning_rate": 9.757844567187858e-06, "loss": 0.0206, "step": 44890 }, { "epoch": 0.3791349137658061, "grad_norm": 0.2253851294517517, "learning_rate": 9.757617971855847e-06, "loss": 0.0236, "step": 44900 }, { "epoch": 0.37921935361296993, "grad_norm": 0.48844388127326965, "learning_rate": 9.757391273189347e-06, "loss": 0.017, "step": 44910 }, { "epoch": 0.3793037934601338, "grad_norm": 0.4208482801914215, "learning_rate": 9.757164471193284e-06, "loss": 0.0136, "step": 44920 }, { "epoch": 0.3793882333072977, "grad_norm": 0.43265849351882935, "learning_rate": 9.756937565872582e-06, "loss": 0.0139, "step": 44930 }, { "epoch": 0.3794726731544616, "grad_norm": 0.33794546127319336, "learning_rate": 9.756710557232173e-06, "loss": 0.0115, "step": 44940 }, { "epoch": 0.3795571130016255, "grad_norm": 0.09111502021551132, "learning_rate": 9.756483445276985e-06, "loss": 0.0108, "step": 44950 }, { "epoch": 0.37964155284878937, "grad_norm": 0.37971821427345276, "learning_rate": 9.75625623001195e-06, "loss": 0.0116, "step": 44960 }, { "epoch": 0.3797259926959532, "grad_norm": 0.639278769493103, "learning_rate": 9.756028911442006e-06, "loss": 0.0115, "step": 44970 }, { "epoch": 0.3798104325431171, "grad_norm": 0.27120697498321533, "learning_rate": 9.755801489572087e-06, "loss": 0.0145, "step": 44980 }, { "epoch": 0.37989487239028097, "grad_norm": 0.17441703379154205, "learning_rate": 9.755573964407136e-06, "loss": 0.011, "step": 44990 }, { "epoch": 0.37997931223744486, "grad_norm": 0.5897381901741028, "learning_rate": 9.755346335952092e-06, "loss": 0.0189, "step": 45000 }, { "epoch": 0.38006375208460874, "grad_norm": 0.38673684000968933, "learning_rate": 9.7551186042119e-06, "loss": 0.0231, "step": 45010 }, { "epoch": 0.3801481919317726, "grad_norm": 0.9292475581169128, "learning_rate": 9.754890769191507e-06, "loss": 0.0126, "step": 45020 }, { "epoch": 0.38023263177893646, "grad_norm": 0.38020578026771545, "learning_rate": 9.754662830895861e-06, "loss": 0.0131, "step": 45030 }, { "epoch": 0.38031707162610034, "grad_norm": 0.42976999282836914, "learning_rate": 9.754434789329913e-06, "loss": 0.0116, "step": 45040 }, { "epoch": 0.38040151147326423, "grad_norm": 0.379158079624176, "learning_rate": 9.754206644498613e-06, "loss": 0.0175, "step": 45050 }, { "epoch": 0.3804859513204281, "grad_norm": 0.44459980726242065, "learning_rate": 9.75397839640692e-06, "loss": 0.0111, "step": 45060 }, { "epoch": 0.380570391167592, "grad_norm": 0.02639123797416687, "learning_rate": 9.75375004505979e-06, "loss": 0.0235, "step": 45070 }, { "epoch": 0.3806548310147559, "grad_norm": 0.7348262071609497, "learning_rate": 9.753521590462185e-06, "loss": 0.0216, "step": 45080 }, { "epoch": 0.3807392708619197, "grad_norm": 0.7544496059417725, "learning_rate": 9.753293032619064e-06, "loss": 0.0147, "step": 45090 }, { "epoch": 0.3808237107090836, "grad_norm": 0.7265428304672241, "learning_rate": 9.753064371535393e-06, "loss": 0.019, "step": 45100 }, { "epoch": 0.3809081505562475, "grad_norm": 0.34174296259880066, "learning_rate": 9.752835607216138e-06, "loss": 0.0097, "step": 45110 }, { "epoch": 0.3809925904034114, "grad_norm": 0.2050381451845169, "learning_rate": 9.752606739666266e-06, "loss": 0.0111, "step": 45120 }, { "epoch": 0.38107703025057527, "grad_norm": 1.4369289875030518, "learning_rate": 9.752377768890752e-06, "loss": 0.0209, "step": 45130 }, { "epoch": 0.3811614700977391, "grad_norm": 0.3222334384918213, "learning_rate": 9.752148694894565e-06, "loss": 0.0095, "step": 45140 }, { "epoch": 0.381245909944903, "grad_norm": 0.44081687927246094, "learning_rate": 9.751919517682682e-06, "loss": 0.0225, "step": 45150 }, { "epoch": 0.38133034979206687, "grad_norm": 0.2748172879219055, "learning_rate": 9.751690237260083e-06, "loss": 0.0129, "step": 45160 }, { "epoch": 0.38141478963923076, "grad_norm": 0.2347346842288971, "learning_rate": 9.751460853631742e-06, "loss": 0.0143, "step": 45170 }, { "epoch": 0.38149922948639464, "grad_norm": 0.5315325260162354, "learning_rate": 9.751231366802646e-06, "loss": 0.0147, "step": 45180 }, { "epoch": 0.38158366933355853, "grad_norm": 0.2837122976779938, "learning_rate": 9.751001776777781e-06, "loss": 0.0134, "step": 45190 }, { "epoch": 0.38166810918072236, "grad_norm": 0.6267984509468079, "learning_rate": 9.75077208356213e-06, "loss": 0.0207, "step": 45200 }, { "epoch": 0.38175254902788625, "grad_norm": 0.4444758892059326, "learning_rate": 9.750542287160682e-06, "loss": 0.0126, "step": 45210 }, { "epoch": 0.38183698887505013, "grad_norm": 0.5359405279159546, "learning_rate": 9.75031238757843e-06, "loss": 0.0299, "step": 45220 }, { "epoch": 0.381921428722214, "grad_norm": 0.043821390718221664, "learning_rate": 9.750082384820364e-06, "loss": 0.0068, "step": 45230 }, { "epoch": 0.3820058685693779, "grad_norm": 0.519212543964386, "learning_rate": 9.749852278891485e-06, "loss": 0.011, "step": 45240 }, { "epoch": 0.3820903084165418, "grad_norm": 0.4769182503223419, "learning_rate": 9.749622069796785e-06, "loss": 0.0141, "step": 45250 }, { "epoch": 0.3821747482637056, "grad_norm": 1.5789105892181396, "learning_rate": 9.749391757541269e-06, "loss": 0.0177, "step": 45260 }, { "epoch": 0.3822591881108695, "grad_norm": 0.3641236126422882, "learning_rate": 9.749161342129936e-06, "loss": 0.0136, "step": 45270 }, { "epoch": 0.3823436279580334, "grad_norm": 0.7254853248596191, "learning_rate": 9.748930823567793e-06, "loss": 0.0146, "step": 45280 }, { "epoch": 0.3824280678051973, "grad_norm": 0.34468546509742737, "learning_rate": 9.748700201859844e-06, "loss": 0.0105, "step": 45290 }, { "epoch": 0.38251250765236117, "grad_norm": 0.5433927774429321, "learning_rate": 9.7484694770111e-06, "loss": 0.0114, "step": 45300 }, { "epoch": 0.38259694749952505, "grad_norm": 0.4465872049331665, "learning_rate": 9.748238649026573e-06, "loss": 0.0171, "step": 45310 }, { "epoch": 0.3826813873466889, "grad_norm": 0.7297940254211426, "learning_rate": 9.748007717911277e-06, "loss": 0.0155, "step": 45320 }, { "epoch": 0.38276582719385277, "grad_norm": 0.663547694683075, "learning_rate": 9.747776683670225e-06, "loss": 0.0156, "step": 45330 }, { "epoch": 0.38285026704101666, "grad_norm": 0.3512054979801178, "learning_rate": 9.747545546308435e-06, "loss": 0.0222, "step": 45340 }, { "epoch": 0.38293470688818054, "grad_norm": 0.2705206871032715, "learning_rate": 9.74731430583093e-06, "loss": 0.0227, "step": 45350 }, { "epoch": 0.38301914673534443, "grad_norm": 0.1963406652212143, "learning_rate": 9.747082962242731e-06, "loss": 0.0185, "step": 45360 }, { "epoch": 0.38310358658250826, "grad_norm": 0.17231905460357666, "learning_rate": 9.746851515548862e-06, "loss": 0.0207, "step": 45370 }, { "epoch": 0.38318802642967215, "grad_norm": 0.2073379009962082, "learning_rate": 9.746619965754352e-06, "loss": 0.0152, "step": 45380 }, { "epoch": 0.38327246627683603, "grad_norm": 1.3885411024093628, "learning_rate": 9.746388312864227e-06, "loss": 0.0179, "step": 45390 }, { "epoch": 0.3833569061239999, "grad_norm": 0.11438124626874924, "learning_rate": 9.746156556883522e-06, "loss": 0.0084, "step": 45400 }, { "epoch": 0.3834413459711638, "grad_norm": 0.7126258015632629, "learning_rate": 9.74592469781727e-06, "loss": 0.0135, "step": 45410 }, { "epoch": 0.3835257858183277, "grad_norm": 0.5312482118606567, "learning_rate": 9.745692735670504e-06, "loss": 0.0146, "step": 45420 }, { "epoch": 0.3836102256654915, "grad_norm": 0.3998948037624359, "learning_rate": 9.745460670448266e-06, "loss": 0.0098, "step": 45430 }, { "epoch": 0.3836946655126554, "grad_norm": 0.6730522513389587, "learning_rate": 9.745228502155595e-06, "loss": 0.0176, "step": 45440 }, { "epoch": 0.3837791053598193, "grad_norm": 0.5110579133033752, "learning_rate": 9.744996230797531e-06, "loss": 0.0205, "step": 45450 }, { "epoch": 0.3838635452069832, "grad_norm": 0.40918663144111633, "learning_rate": 9.744763856379123e-06, "loss": 0.0167, "step": 45460 }, { "epoch": 0.38394798505414707, "grad_norm": 0.5368292927742004, "learning_rate": 9.744531378905414e-06, "loss": 0.0165, "step": 45470 }, { "epoch": 0.38403242490131095, "grad_norm": 0.3864164352416992, "learning_rate": 9.744298798381459e-06, "loss": 0.0132, "step": 45480 }, { "epoch": 0.3841168647484748, "grad_norm": 0.22484946250915527, "learning_rate": 9.744066114812305e-06, "loss": 0.0181, "step": 45490 }, { "epoch": 0.38420130459563867, "grad_norm": 0.42982250452041626, "learning_rate": 9.743833328203007e-06, "loss": 0.0139, "step": 45500 }, { "epoch": 0.38428574444280256, "grad_norm": 0.39132437109947205, "learning_rate": 9.743600438558621e-06, "loss": 0.0159, "step": 45510 }, { "epoch": 0.38437018428996644, "grad_norm": 0.19160620868206024, "learning_rate": 9.743367445884207e-06, "loss": 0.0202, "step": 45520 }, { "epoch": 0.38445462413713033, "grad_norm": 0.4568413197994232, "learning_rate": 9.743134350184821e-06, "loss": 0.027, "step": 45530 }, { "epoch": 0.3845390639842942, "grad_norm": 0.45358556509017944, "learning_rate": 9.742901151465531e-06, "loss": 0.0164, "step": 45540 }, { "epoch": 0.38462350383145805, "grad_norm": 0.29946720600128174, "learning_rate": 9.7426678497314e-06, "loss": 0.0093, "step": 45550 }, { "epoch": 0.38470794367862193, "grad_norm": 0.3774689733982086, "learning_rate": 9.742434444987495e-06, "loss": 0.0147, "step": 45560 }, { "epoch": 0.3847923835257858, "grad_norm": 0.2245595008134842, "learning_rate": 9.742200937238886e-06, "loss": 0.0096, "step": 45570 }, { "epoch": 0.3848768233729497, "grad_norm": 0.17677733302116394, "learning_rate": 9.741967326490644e-06, "loss": 0.0185, "step": 45580 }, { "epoch": 0.3849612632201136, "grad_norm": 0.5318061709403992, "learning_rate": 9.741733612747843e-06, "loss": 0.0142, "step": 45590 }, { "epoch": 0.3850457030672774, "grad_norm": 0.2946944832801819, "learning_rate": 9.74149979601556e-06, "loss": 0.0211, "step": 45600 }, { "epoch": 0.3851301429144413, "grad_norm": 0.33344584703445435, "learning_rate": 9.741265876298873e-06, "loss": 0.0137, "step": 45610 }, { "epoch": 0.3852145827616052, "grad_norm": 0.5422059297561646, "learning_rate": 9.741031853602862e-06, "loss": 0.0182, "step": 45620 }, { "epoch": 0.3852990226087691, "grad_norm": 0.3686557710170746, "learning_rate": 9.740797727932612e-06, "loss": 0.0102, "step": 45630 }, { "epoch": 0.38538346245593297, "grad_norm": 0.12562060356140137, "learning_rate": 9.740563499293205e-06, "loss": 0.0156, "step": 45640 }, { "epoch": 0.38546790230309685, "grad_norm": 0.3921075463294983, "learning_rate": 9.740329167689733e-06, "loss": 0.0137, "step": 45650 }, { "epoch": 0.3855523421502607, "grad_norm": 0.23140688240528107, "learning_rate": 9.74009473312728e-06, "loss": 0.0177, "step": 45660 }, { "epoch": 0.38563678199742457, "grad_norm": 0.5416081547737122, "learning_rate": 9.739860195610943e-06, "loss": 0.023, "step": 45670 }, { "epoch": 0.38572122184458846, "grad_norm": 0.7904573082923889, "learning_rate": 9.739625555145813e-06, "loss": 0.016, "step": 45680 }, { "epoch": 0.38580566169175234, "grad_norm": 0.7024451494216919, "learning_rate": 9.739390811736986e-06, "loss": 0.0134, "step": 45690 }, { "epoch": 0.38589010153891623, "grad_norm": 0.59343421459198, "learning_rate": 9.739155965389564e-06, "loss": 0.0148, "step": 45700 }, { "epoch": 0.3859745413860801, "grad_norm": 0.3921768069267273, "learning_rate": 9.738921016108644e-06, "loss": 0.0171, "step": 45710 }, { "epoch": 0.38605898123324395, "grad_norm": 0.6989474892616272, "learning_rate": 9.738685963899331e-06, "loss": 0.0192, "step": 45720 }, { "epoch": 0.38614342108040783, "grad_norm": 0.46220457553863525, "learning_rate": 9.73845080876673e-06, "loss": 0.0102, "step": 45730 }, { "epoch": 0.3862278609275717, "grad_norm": 0.7357529401779175, "learning_rate": 9.738215550715949e-06, "loss": 0.0181, "step": 45740 }, { "epoch": 0.3863123007747356, "grad_norm": 0.5152562260627747, "learning_rate": 9.737980189752096e-06, "loss": 0.0169, "step": 45750 }, { "epoch": 0.3863967406218995, "grad_norm": 0.7476515173912048, "learning_rate": 9.737744725880285e-06, "loss": 0.0111, "step": 45760 }, { "epoch": 0.3864811804690633, "grad_norm": 0.5402404069900513, "learning_rate": 9.73750915910563e-06, "loss": 0.0173, "step": 45770 }, { "epoch": 0.3865656203162272, "grad_norm": 0.17971748113632202, "learning_rate": 9.737273489433244e-06, "loss": 0.0182, "step": 45780 }, { "epoch": 0.3866500601633911, "grad_norm": 0.24519284069538116, "learning_rate": 9.737037716868252e-06, "loss": 0.0163, "step": 45790 }, { "epoch": 0.386734500010555, "grad_norm": 0.4917545020580292, "learning_rate": 9.73680184141577e-06, "loss": 0.0206, "step": 45800 }, { "epoch": 0.38681893985771887, "grad_norm": 0.5086447596549988, "learning_rate": 9.736565863080922e-06, "loss": 0.0186, "step": 45810 }, { "epoch": 0.38690337970488275, "grad_norm": 0.3885221481323242, "learning_rate": 9.736329781868835e-06, "loss": 0.0169, "step": 45820 }, { "epoch": 0.3869878195520466, "grad_norm": 0.5249952673912048, "learning_rate": 9.736093597784635e-06, "loss": 0.0092, "step": 45830 }, { "epoch": 0.38707225939921047, "grad_norm": 1.028703212738037, "learning_rate": 9.735857310833455e-06, "loss": 0.0235, "step": 45840 }, { "epoch": 0.38715669924637436, "grad_norm": 1.150355339050293, "learning_rate": 9.735620921020422e-06, "loss": 0.0146, "step": 45850 }, { "epoch": 0.38724113909353824, "grad_norm": 0.18653544783592224, "learning_rate": 9.735384428350673e-06, "loss": 0.0268, "step": 45860 }, { "epoch": 0.38732557894070213, "grad_norm": 0.11807519942522049, "learning_rate": 9.735147832829346e-06, "loss": 0.0125, "step": 45870 }, { "epoch": 0.387410018787866, "grad_norm": 0.039248377084732056, "learning_rate": 9.734911134461578e-06, "loss": 0.0107, "step": 45880 }, { "epoch": 0.38749445863502985, "grad_norm": 0.44433602690696716, "learning_rate": 9.73467433325251e-06, "loss": 0.017, "step": 45890 }, { "epoch": 0.38757889848219373, "grad_norm": 0.18947887420654297, "learning_rate": 9.734437429207286e-06, "loss": 0.0112, "step": 45900 }, { "epoch": 0.3876633383293576, "grad_norm": 0.5994924902915955, "learning_rate": 9.73420042233105e-06, "loss": 0.0114, "step": 45910 }, { "epoch": 0.3877477781765215, "grad_norm": 0.41253095865249634, "learning_rate": 9.733963312628952e-06, "loss": 0.0159, "step": 45920 }, { "epoch": 0.3878322180236854, "grad_norm": 1.067940592765808, "learning_rate": 9.733726100106142e-06, "loss": 0.0243, "step": 45930 }, { "epoch": 0.3879166578708493, "grad_norm": 0.1813625991344452, "learning_rate": 9.73348878476777e-06, "loss": 0.0115, "step": 45940 }, { "epoch": 0.3880010977180131, "grad_norm": 0.27215903997421265, "learning_rate": 9.733251366618992e-06, "loss": 0.0202, "step": 45950 }, { "epoch": 0.388085537565177, "grad_norm": 0.6979047656059265, "learning_rate": 9.733013845664964e-06, "loss": 0.0293, "step": 45960 }, { "epoch": 0.3881699774123409, "grad_norm": 0.4214212894439697, "learning_rate": 9.732776221910845e-06, "loss": 0.0153, "step": 45970 }, { "epoch": 0.38825441725950477, "grad_norm": 1.5605049133300781, "learning_rate": 9.732538495361796e-06, "loss": 0.0251, "step": 45980 }, { "epoch": 0.38833885710666866, "grad_norm": 0.3128381371498108, "learning_rate": 9.73230066602298e-06, "loss": 0.0178, "step": 45990 }, { "epoch": 0.3884232969538325, "grad_norm": 0.5733112096786499, "learning_rate": 9.732062733899565e-06, "loss": 0.0121, "step": 46000 }, { "epoch": 0.3885077368009964, "grad_norm": 0.6775920987129211, "learning_rate": 9.731824698996717e-06, "loss": 0.0147, "step": 46010 }, { "epoch": 0.38859217664816026, "grad_norm": 0.4331096112728119, "learning_rate": 9.731586561319607e-06, "loss": 0.0091, "step": 46020 }, { "epoch": 0.38867661649532415, "grad_norm": 0.45123711228370667, "learning_rate": 9.731348320873403e-06, "loss": 0.0119, "step": 46030 }, { "epoch": 0.38876105634248803, "grad_norm": 0.3565613627433777, "learning_rate": 9.731109977663287e-06, "loss": 0.0185, "step": 46040 }, { "epoch": 0.3888454961896519, "grad_norm": 0.203005850315094, "learning_rate": 9.730871531694429e-06, "loss": 0.0146, "step": 46050 }, { "epoch": 0.38892993603681575, "grad_norm": 0.12521743774414062, "learning_rate": 9.730632982972012e-06, "loss": 0.0225, "step": 46060 }, { "epoch": 0.38901437588397964, "grad_norm": 0.5001010298728943, "learning_rate": 9.730394331501216e-06, "loss": 0.0181, "step": 46070 }, { "epoch": 0.3890988157311435, "grad_norm": 0.35299572348594666, "learning_rate": 9.730155577287224e-06, "loss": 0.015, "step": 46080 }, { "epoch": 0.3891832555783074, "grad_norm": 0.496377557516098, "learning_rate": 9.729916720335221e-06, "loss": 0.0222, "step": 46090 }, { "epoch": 0.3892676954254713, "grad_norm": 0.8308051228523254, "learning_rate": 9.7296777606504e-06, "loss": 0.0159, "step": 46100 }, { "epoch": 0.3893521352726352, "grad_norm": 0.7171558737754822, "learning_rate": 9.729438698237943e-06, "loss": 0.0161, "step": 46110 }, { "epoch": 0.389436575119799, "grad_norm": 0.6511593461036682, "learning_rate": 9.729199533103049e-06, "loss": 0.0139, "step": 46120 }, { "epoch": 0.3895210149669629, "grad_norm": 0.543409526348114, "learning_rate": 9.728960265250909e-06, "loss": 0.0109, "step": 46130 }, { "epoch": 0.3896054548141268, "grad_norm": 0.32504814863204956, "learning_rate": 9.728720894686721e-06, "loss": 0.0209, "step": 46140 }, { "epoch": 0.38968989466129067, "grad_norm": 0.3427049219608307, "learning_rate": 9.728481421415686e-06, "loss": 0.0164, "step": 46150 }, { "epoch": 0.38977433450845456, "grad_norm": 0.2751303017139435, "learning_rate": 9.728241845443002e-06, "loss": 0.0191, "step": 46160 }, { "epoch": 0.38985877435561844, "grad_norm": 0.40777599811553955, "learning_rate": 9.728002166773875e-06, "loss": 0.0225, "step": 46170 }, { "epoch": 0.3899432142027823, "grad_norm": 0.5857659578323364, "learning_rate": 9.727762385413508e-06, "loss": 0.0139, "step": 46180 }, { "epoch": 0.39002765404994616, "grad_norm": 0.44252657890319824, "learning_rate": 9.727522501367112e-06, "loss": 0.0153, "step": 46190 }, { "epoch": 0.39011209389711005, "grad_norm": 0.19522172212600708, "learning_rate": 9.727282514639897e-06, "loss": 0.0124, "step": 46200 }, { "epoch": 0.39019653374427393, "grad_norm": 0.15115156769752502, "learning_rate": 9.727042425237074e-06, "loss": 0.0082, "step": 46210 }, { "epoch": 0.3902809735914378, "grad_norm": 0.19970104098320007, "learning_rate": 9.726802233163857e-06, "loss": 0.0201, "step": 46220 }, { "epoch": 0.39036541343860165, "grad_norm": 0.24232889711856842, "learning_rate": 9.726561938425465e-06, "loss": 0.0193, "step": 46230 }, { "epoch": 0.39044985328576554, "grad_norm": 0.2743108868598938, "learning_rate": 9.726321541027116e-06, "loss": 0.0193, "step": 46240 }, { "epoch": 0.3905342931329294, "grad_norm": 0.44849005341529846, "learning_rate": 9.726081040974031e-06, "loss": 0.0173, "step": 46250 }, { "epoch": 0.3906187329800933, "grad_norm": 0.5798232555389404, "learning_rate": 9.725840438271434e-06, "loss": 0.0148, "step": 46260 }, { "epoch": 0.3907031728272572, "grad_norm": 0.21672770380973816, "learning_rate": 9.725599732924553e-06, "loss": 0.0113, "step": 46270 }, { "epoch": 0.3907876126744211, "grad_norm": 0.31899115443229675, "learning_rate": 9.725358924938613e-06, "loss": 0.014, "step": 46280 }, { "epoch": 0.3908720525215849, "grad_norm": 0.1768081784248352, "learning_rate": 9.725118014318847e-06, "loss": 0.0203, "step": 46290 }, { "epoch": 0.3909564923687488, "grad_norm": 0.4472808837890625, "learning_rate": 9.724877001070485e-06, "loss": 0.022, "step": 46300 }, { "epoch": 0.3910409322159127, "grad_norm": 1.5409700870513916, "learning_rate": 9.72463588519876e-06, "loss": 0.0176, "step": 46310 }, { "epoch": 0.39112537206307657, "grad_norm": 0.5812289714813232, "learning_rate": 9.724394666708916e-06, "loss": 0.0139, "step": 46320 }, { "epoch": 0.39120981191024046, "grad_norm": 0.5095283389091492, "learning_rate": 9.724153345606186e-06, "loss": 0.0107, "step": 46330 }, { "epoch": 0.39129425175740434, "grad_norm": 0.2866933345794678, "learning_rate": 9.723911921895813e-06, "loss": 0.0174, "step": 46340 }, { "epoch": 0.3913786916045682, "grad_norm": 0.5448814034461975, "learning_rate": 9.723670395583042e-06, "loss": 0.0138, "step": 46350 }, { "epoch": 0.39146313145173206, "grad_norm": 0.18554022908210754, "learning_rate": 9.723428766673118e-06, "loss": 0.0106, "step": 46360 }, { "epoch": 0.39154757129889595, "grad_norm": 0.518938422203064, "learning_rate": 9.723187035171288e-06, "loss": 0.0203, "step": 46370 }, { "epoch": 0.39163201114605983, "grad_norm": 0.49174386262893677, "learning_rate": 9.722945201082804e-06, "loss": 0.0148, "step": 46380 }, { "epoch": 0.3917164509932237, "grad_norm": 0.38542842864990234, "learning_rate": 9.722703264412918e-06, "loss": 0.0231, "step": 46390 }, { "epoch": 0.3918008908403876, "grad_norm": 0.08330957591533661, "learning_rate": 9.722461225166886e-06, "loss": 0.0102, "step": 46400 }, { "epoch": 0.39188533068755144, "grad_norm": 0.5189454555511475, "learning_rate": 9.722219083349962e-06, "loss": 0.026, "step": 46410 }, { "epoch": 0.3919697705347153, "grad_norm": 0.7506471872329712, "learning_rate": 9.721976838967408e-06, "loss": 0.0157, "step": 46420 }, { "epoch": 0.3920542103818792, "grad_norm": 0.39601269364356995, "learning_rate": 9.721734492024487e-06, "loss": 0.0268, "step": 46430 }, { "epoch": 0.3921386502290431, "grad_norm": 0.056317657232284546, "learning_rate": 9.721492042526456e-06, "loss": 0.0112, "step": 46440 }, { "epoch": 0.392223090076207, "grad_norm": 0.48551005125045776, "learning_rate": 9.721249490478589e-06, "loss": 0.0152, "step": 46450 }, { "epoch": 0.3923075299233708, "grad_norm": 0.45087188482284546, "learning_rate": 9.721006835886148e-06, "loss": 0.0162, "step": 46460 }, { "epoch": 0.3923919697705347, "grad_norm": 0.349991112947464, "learning_rate": 9.720764078754408e-06, "loss": 0.0095, "step": 46470 }, { "epoch": 0.3924764096176986, "grad_norm": 0.3569599688053131, "learning_rate": 9.720521219088638e-06, "loss": 0.0092, "step": 46480 }, { "epoch": 0.39256084946486247, "grad_norm": 0.44419658184051514, "learning_rate": 9.720278256894116e-06, "loss": 0.014, "step": 46490 }, { "epoch": 0.39264528931202636, "grad_norm": 0.3551136255264282, "learning_rate": 9.720035192176117e-06, "loss": 0.01, "step": 46500 }, { "epoch": 0.39272972915919024, "grad_norm": 0.6511370539665222, "learning_rate": 9.71979202493992e-06, "loss": 0.0146, "step": 46510 }, { "epoch": 0.3928141690063541, "grad_norm": 0.2623238265514374, "learning_rate": 9.719548755190808e-06, "loss": 0.0131, "step": 46520 }, { "epoch": 0.39289860885351796, "grad_norm": 0.2807020843029022, "learning_rate": 9.719305382934065e-06, "loss": 0.0123, "step": 46530 }, { "epoch": 0.39298304870068185, "grad_norm": 0.16075915098190308, "learning_rate": 9.719061908174975e-06, "loss": 0.0127, "step": 46540 }, { "epoch": 0.39306748854784573, "grad_norm": 0.6553332209587097, "learning_rate": 9.718818330918827e-06, "loss": 0.0079, "step": 46550 }, { "epoch": 0.3931519283950096, "grad_norm": 0.5259984135627747, "learning_rate": 9.718574651170913e-06, "loss": 0.0169, "step": 46560 }, { "epoch": 0.3932363682421735, "grad_norm": 0.23021934926509857, "learning_rate": 9.718330868936525e-06, "loss": 0.0133, "step": 46570 }, { "epoch": 0.39332080808933734, "grad_norm": 0.4687240719795227, "learning_rate": 9.718086984220956e-06, "loss": 0.0106, "step": 46580 }, { "epoch": 0.3934052479365012, "grad_norm": 0.5864912867546082, "learning_rate": 9.717842997029504e-06, "loss": 0.0166, "step": 46590 }, { "epoch": 0.3934896877836651, "grad_norm": 0.300861656665802, "learning_rate": 9.717598907367471e-06, "loss": 0.0201, "step": 46600 }, { "epoch": 0.393574127630829, "grad_norm": 0.48648419976234436, "learning_rate": 9.717354715240156e-06, "loss": 0.0142, "step": 46610 }, { "epoch": 0.3936585674779929, "grad_norm": 0.9830610752105713, "learning_rate": 9.717110420652861e-06, "loss": 0.0144, "step": 46620 }, { "epoch": 0.39374300732515677, "grad_norm": 0.26788169145584106, "learning_rate": 9.716866023610896e-06, "loss": 0.017, "step": 46630 }, { "epoch": 0.3938274471723206, "grad_norm": 0.3530467450618744, "learning_rate": 9.716621524119566e-06, "loss": 0.0201, "step": 46640 }, { "epoch": 0.3939118870194845, "grad_norm": 0.8952140212059021, "learning_rate": 9.716376922184185e-06, "loss": 0.0183, "step": 46650 }, { "epoch": 0.39399632686664837, "grad_norm": 0.4198870062828064, "learning_rate": 9.716132217810063e-06, "loss": 0.0145, "step": 46660 }, { "epoch": 0.39408076671381226, "grad_norm": 0.3883994221687317, "learning_rate": 9.715887411002516e-06, "loss": 0.0157, "step": 46670 }, { "epoch": 0.39416520656097614, "grad_norm": 0.5421052575111389, "learning_rate": 9.715642501766862e-06, "loss": 0.0186, "step": 46680 }, { "epoch": 0.39424964640814, "grad_norm": 0.31801638007164, "learning_rate": 9.715397490108417e-06, "loss": 0.0126, "step": 46690 }, { "epoch": 0.39433408625530386, "grad_norm": 0.3374192714691162, "learning_rate": 9.715152376032506e-06, "loss": 0.0098, "step": 46700 }, { "epoch": 0.39441852610246775, "grad_norm": 0.5477150082588196, "learning_rate": 9.714907159544451e-06, "loss": 0.0137, "step": 46710 }, { "epoch": 0.39450296594963163, "grad_norm": 0.6028771996498108, "learning_rate": 9.714661840649579e-06, "loss": 0.0233, "step": 46720 }, { "epoch": 0.3945874057967955, "grad_norm": 0.5312628746032715, "learning_rate": 9.714416419353217e-06, "loss": 0.0165, "step": 46730 }, { "epoch": 0.3946718456439594, "grad_norm": 0.3041234314441681, "learning_rate": 9.714170895660698e-06, "loss": 0.0224, "step": 46740 }, { "epoch": 0.39475628549112324, "grad_norm": 0.32636910676956177, "learning_rate": 9.713925269577352e-06, "loss": 0.0174, "step": 46750 }, { "epoch": 0.3948407253382871, "grad_norm": 0.2992417514324188, "learning_rate": 9.713679541108517e-06, "loss": 0.0094, "step": 46760 }, { "epoch": 0.394925165185451, "grad_norm": 0.36400362849235535, "learning_rate": 9.713433710259526e-06, "loss": 0.0091, "step": 46770 }, { "epoch": 0.3950096050326149, "grad_norm": 0.1644100546836853, "learning_rate": 9.713187777035722e-06, "loss": 0.0152, "step": 46780 }, { "epoch": 0.3950940448797788, "grad_norm": 0.3387826085090637, "learning_rate": 9.712941741442444e-06, "loss": 0.0111, "step": 46790 }, { "epoch": 0.39517848472694267, "grad_norm": 0.27865278720855713, "learning_rate": 9.712695603485038e-06, "loss": 0.0189, "step": 46800 }, { "epoch": 0.3952629245741065, "grad_norm": 0.20920813083648682, "learning_rate": 9.71244936316885e-06, "loss": 0.0089, "step": 46810 }, { "epoch": 0.3953473644212704, "grad_norm": 0.012896629050374031, "learning_rate": 9.712203020499225e-06, "loss": 0.0186, "step": 46820 }, { "epoch": 0.3954318042684343, "grad_norm": 1.3487612009048462, "learning_rate": 9.711956575481519e-06, "loss": 0.0206, "step": 46830 }, { "epoch": 0.39551624411559816, "grad_norm": 0.37735337018966675, "learning_rate": 9.71171002812108e-06, "loss": 0.0163, "step": 46840 }, { "epoch": 0.39560068396276205, "grad_norm": 0.3349364995956421, "learning_rate": 9.711463378423264e-06, "loss": 0.016, "step": 46850 }, { "epoch": 0.39568512380992593, "grad_norm": 0.2781599164009094, "learning_rate": 9.711216626393431e-06, "loss": 0.0133, "step": 46860 }, { "epoch": 0.39576956365708976, "grad_norm": 0.4347391426563263, "learning_rate": 9.710969772036937e-06, "loss": 0.0152, "step": 46870 }, { "epoch": 0.39585400350425365, "grad_norm": 0.517026960849762, "learning_rate": 9.710722815359147e-06, "loss": 0.0137, "step": 46880 }, { "epoch": 0.39593844335141753, "grad_norm": 0.46333470940589905, "learning_rate": 9.71047575636542e-06, "loss": 0.014, "step": 46890 }, { "epoch": 0.3960228831985814, "grad_norm": 0.295530766248703, "learning_rate": 9.710228595061126e-06, "loss": 0.0198, "step": 46900 }, { "epoch": 0.3961073230457453, "grad_norm": 0.19765865802764893, "learning_rate": 9.709981331451632e-06, "loss": 0.0136, "step": 46910 }, { "epoch": 0.39619176289290914, "grad_norm": 0.28228187561035156, "learning_rate": 9.70973396554231e-06, "loss": 0.0227, "step": 46920 }, { "epoch": 0.396276202740073, "grad_norm": 0.7363610863685608, "learning_rate": 9.70948649733853e-06, "loss": 0.021, "step": 46930 }, { "epoch": 0.3963606425872369, "grad_norm": 0.2589600682258606, "learning_rate": 9.709238926845668e-06, "loss": 0.0102, "step": 46940 }, { "epoch": 0.3964450824344008, "grad_norm": 0.28737860918045044, "learning_rate": 9.708991254069101e-06, "loss": 0.012, "step": 46950 }, { "epoch": 0.3965295222815647, "grad_norm": 0.1329750120639801, "learning_rate": 9.70874347901421e-06, "loss": 0.0263, "step": 46960 }, { "epoch": 0.39661396212872857, "grad_norm": 0.34813228249549866, "learning_rate": 9.708495601686376e-06, "loss": 0.0087, "step": 46970 }, { "epoch": 0.3966984019758924, "grad_norm": 0.20310227572917938, "learning_rate": 9.708247622090981e-06, "loss": 0.0236, "step": 46980 }, { "epoch": 0.3967828418230563, "grad_norm": 0.5022609829902649, "learning_rate": 9.707999540233414e-06, "loss": 0.0122, "step": 46990 }, { "epoch": 0.3968672816702202, "grad_norm": 0.8500890731811523, "learning_rate": 9.707751356119061e-06, "loss": 0.0149, "step": 47000 }, { "epoch": 0.39695172151738406, "grad_norm": 0.23796281218528748, "learning_rate": 9.707503069753312e-06, "loss": 0.0098, "step": 47010 }, { "epoch": 0.39703616136454795, "grad_norm": 0.2011803537607193, "learning_rate": 9.707254681141562e-06, "loss": 0.0121, "step": 47020 }, { "epoch": 0.39712060121171183, "grad_norm": 0.7124568223953247, "learning_rate": 9.707006190289206e-06, "loss": 0.0168, "step": 47030 }, { "epoch": 0.39720504105887566, "grad_norm": 0.19267593324184418, "learning_rate": 9.706757597201639e-06, "loss": 0.0138, "step": 47040 }, { "epoch": 0.39728948090603955, "grad_norm": 0.045269954949617386, "learning_rate": 9.706508901884261e-06, "loss": 0.016, "step": 47050 }, { "epoch": 0.39737392075320344, "grad_norm": 0.562377393245697, "learning_rate": 9.706260104342476e-06, "loss": 0.0205, "step": 47060 }, { "epoch": 0.3974583606003673, "grad_norm": 0.5738065838813782, "learning_rate": 9.706011204581684e-06, "loss": 0.0092, "step": 47070 }, { "epoch": 0.3975428004475312, "grad_norm": 0.5259701609611511, "learning_rate": 9.705762202607295e-06, "loss": 0.0134, "step": 47080 }, { "epoch": 0.39762724029469504, "grad_norm": 0.2756150960922241, "learning_rate": 9.705513098424715e-06, "loss": 0.0216, "step": 47090 }, { "epoch": 0.3977116801418589, "grad_norm": 0.8270524740219116, "learning_rate": 9.705263892039352e-06, "loss": 0.0149, "step": 47100 }, { "epoch": 0.3977961199890228, "grad_norm": 0.48747286200523376, "learning_rate": 9.705014583456624e-06, "loss": 0.011, "step": 47110 }, { "epoch": 0.3978805598361867, "grad_norm": 0.7355923652648926, "learning_rate": 9.704765172681942e-06, "loss": 0.0191, "step": 47120 }, { "epoch": 0.3979649996833506, "grad_norm": 0.4727146029472351, "learning_rate": 9.704515659720726e-06, "loss": 0.0242, "step": 47130 }, { "epoch": 0.39804943953051447, "grad_norm": 0.4772973954677582, "learning_rate": 9.704266044578394e-06, "loss": 0.0111, "step": 47140 }, { "epoch": 0.3981338793776783, "grad_norm": 0.5440207719802856, "learning_rate": 9.704016327260365e-06, "loss": 0.0176, "step": 47150 }, { "epoch": 0.3982183192248422, "grad_norm": 0.11000476032495499, "learning_rate": 9.703766507772068e-06, "loss": 0.0142, "step": 47160 }, { "epoch": 0.3983027590720061, "grad_norm": 0.4971379339694977, "learning_rate": 9.703516586118925e-06, "loss": 0.0113, "step": 47170 }, { "epoch": 0.39838719891916996, "grad_norm": 0.204253152012825, "learning_rate": 9.703266562306365e-06, "loss": 0.0223, "step": 47180 }, { "epoch": 0.39847163876633385, "grad_norm": 0.558405339717865, "learning_rate": 9.703016436339821e-06, "loss": 0.008, "step": 47190 }, { "epoch": 0.39855607861349773, "grad_norm": 0.37178412079811096, "learning_rate": 9.702766208224724e-06, "loss": 0.0204, "step": 47200 }, { "epoch": 0.39864051846066156, "grad_norm": 0.5568600296974182, "learning_rate": 9.702515877966506e-06, "loss": 0.0151, "step": 47210 }, { "epoch": 0.39872495830782545, "grad_norm": 0.46691805124282837, "learning_rate": 9.702265445570609e-06, "loss": 0.0111, "step": 47220 }, { "epoch": 0.39880939815498934, "grad_norm": 0.6919215321540833, "learning_rate": 9.70201491104247e-06, "loss": 0.0162, "step": 47230 }, { "epoch": 0.3988938380021532, "grad_norm": 0.14789652824401855, "learning_rate": 9.701764274387528e-06, "loss": 0.0124, "step": 47240 }, { "epoch": 0.3989782778493171, "grad_norm": 0.7843741774559021, "learning_rate": 9.701513535611232e-06, "loss": 0.0219, "step": 47250 }, { "epoch": 0.399062717696481, "grad_norm": 0.10284429043531418, "learning_rate": 9.701262694719024e-06, "loss": 0.0051, "step": 47260 }, { "epoch": 0.3991471575436448, "grad_norm": 0.43254533410072327, "learning_rate": 9.701011751716356e-06, "loss": 0.0103, "step": 47270 }, { "epoch": 0.3992315973908087, "grad_norm": 1.1419074535369873, "learning_rate": 9.700760706608674e-06, "loss": 0.0189, "step": 47280 }, { "epoch": 0.3993160372379726, "grad_norm": 0.35038837790489197, "learning_rate": 9.700509559401434e-06, "loss": 0.0134, "step": 47290 }, { "epoch": 0.3994004770851365, "grad_norm": 0.2558107376098633, "learning_rate": 9.700258310100088e-06, "loss": 0.019, "step": 47300 }, { "epoch": 0.39948491693230037, "grad_norm": 0.0991167202591896, "learning_rate": 9.700006958710096e-06, "loss": 0.0155, "step": 47310 }, { "epoch": 0.3995693567794642, "grad_norm": 0.17658212780952454, "learning_rate": 9.699755505236915e-06, "loss": 0.0142, "step": 47320 }, { "epoch": 0.3996537966266281, "grad_norm": 0.4899980127811432, "learning_rate": 9.699503949686008e-06, "loss": 0.0162, "step": 47330 }, { "epoch": 0.399738236473792, "grad_norm": 0.13735485076904297, "learning_rate": 9.699252292062838e-06, "loss": 0.0099, "step": 47340 }, { "epoch": 0.39982267632095586, "grad_norm": 0.3112533390522003, "learning_rate": 9.69900053237287e-06, "loss": 0.0133, "step": 47350 }, { "epoch": 0.39990711616811975, "grad_norm": 0.8688474893569946, "learning_rate": 9.698748670621575e-06, "loss": 0.0258, "step": 47360 }, { "epoch": 0.39999155601528363, "grad_norm": 0.46054720878601074, "learning_rate": 9.698496706814423e-06, "loss": 0.0155, "step": 47370 }, { "epoch": 0.40007599586244746, "grad_norm": 0.2981504201889038, "learning_rate": 9.698244640956884e-06, "loss": 0.0182, "step": 47380 }, { "epoch": 0.40016043570961135, "grad_norm": 0.6089526414871216, "learning_rate": 9.697992473054434e-06, "loss": 0.0195, "step": 47390 }, { "epoch": 0.40024487555677524, "grad_norm": 0.5202596187591553, "learning_rate": 9.69774020311255e-06, "loss": 0.0111, "step": 47400 }, { "epoch": 0.4003293154039391, "grad_norm": 0.9868739247322083, "learning_rate": 9.697487831136711e-06, "loss": 0.0204, "step": 47410 }, { "epoch": 0.400413755251103, "grad_norm": 0.29540106654167175, "learning_rate": 9.6972353571324e-06, "loss": 0.0128, "step": 47420 }, { "epoch": 0.4004981950982669, "grad_norm": 0.17122246325016022, "learning_rate": 9.6969827811051e-06, "loss": 0.0254, "step": 47430 }, { "epoch": 0.4005826349454307, "grad_norm": 0.9685118794441223, "learning_rate": 9.696730103060295e-06, "loss": 0.0093, "step": 47440 }, { "epoch": 0.4006670747925946, "grad_norm": 0.2316712886095047, "learning_rate": 9.696477323003475e-06, "loss": 0.0246, "step": 47450 }, { "epoch": 0.4007515146397585, "grad_norm": 0.15562953054904938, "learning_rate": 9.69622444094013e-06, "loss": 0.0166, "step": 47460 }, { "epoch": 0.4008359544869224, "grad_norm": 0.28525620698928833, "learning_rate": 9.695971456875754e-06, "loss": 0.0094, "step": 47470 }, { "epoch": 0.40092039433408627, "grad_norm": 0.15594129264354706, "learning_rate": 9.695718370815839e-06, "loss": 0.0093, "step": 47480 }, { "epoch": 0.40100483418125016, "grad_norm": 0.3121804893016815, "learning_rate": 9.695465182765881e-06, "loss": 0.0141, "step": 47490 }, { "epoch": 0.401089274028414, "grad_norm": 0.6107414960861206, "learning_rate": 9.695211892731386e-06, "loss": 0.0196, "step": 47500 }, { "epoch": 0.4011737138755779, "grad_norm": 1.078366756439209, "learning_rate": 9.694958500717848e-06, "loss": 0.0245, "step": 47510 }, { "epoch": 0.40125815372274176, "grad_norm": 0.046285904943943024, "learning_rate": 9.694705006730774e-06, "loss": 0.0156, "step": 47520 }, { "epoch": 0.40134259356990565, "grad_norm": 0.2926976680755615, "learning_rate": 9.69445141077567e-06, "loss": 0.0216, "step": 47530 }, { "epoch": 0.40142703341706953, "grad_norm": 0.7232769131660461, "learning_rate": 9.69419771285804e-06, "loss": 0.0221, "step": 47540 }, { "epoch": 0.40151147326423337, "grad_norm": 0.5711764097213745, "learning_rate": 9.693943912983402e-06, "loss": 0.0148, "step": 47550 }, { "epoch": 0.40159591311139725, "grad_norm": 0.15081126987934113, "learning_rate": 9.693690011157261e-06, "loss": 0.0062, "step": 47560 }, { "epoch": 0.40168035295856114, "grad_norm": 0.5461418032646179, "learning_rate": 9.693436007385137e-06, "loss": 0.0139, "step": 47570 }, { "epoch": 0.401764792805725, "grad_norm": 0.4800623953342438, "learning_rate": 9.693181901672544e-06, "loss": 0.0203, "step": 47580 }, { "epoch": 0.4018492326528889, "grad_norm": 0.20971909165382385, "learning_rate": 9.692927694025001e-06, "loss": 0.0255, "step": 47590 }, { "epoch": 0.4019336725000528, "grad_norm": 0.576064944267273, "learning_rate": 9.69267338444803e-06, "loss": 0.0196, "step": 47600 }, { "epoch": 0.4020181123472166, "grad_norm": 2.71317195892334, "learning_rate": 9.692418972947156e-06, "loss": 0.0319, "step": 47610 }, { "epoch": 0.4021025521943805, "grad_norm": 0.4429440498352051, "learning_rate": 9.692164459527903e-06, "loss": 0.022, "step": 47620 }, { "epoch": 0.4021869920415444, "grad_norm": 0.4474315643310547, "learning_rate": 9.6919098441958e-06, "loss": 0.0152, "step": 47630 }, { "epoch": 0.4022714318887083, "grad_norm": 0.21336710453033447, "learning_rate": 9.691655126956376e-06, "loss": 0.0214, "step": 47640 }, { "epoch": 0.4023558717358722, "grad_norm": 0.6792665719985962, "learning_rate": 9.691400307815165e-06, "loss": 0.0197, "step": 47650 }, { "epoch": 0.40244031158303606, "grad_norm": 0.9871473908424377, "learning_rate": 9.691145386777698e-06, "loss": 0.0184, "step": 47660 }, { "epoch": 0.4025247514301999, "grad_norm": 0.34262266755104065, "learning_rate": 9.690890363849516e-06, "loss": 0.0162, "step": 47670 }, { "epoch": 0.4026091912773638, "grad_norm": 0.6011762022972107, "learning_rate": 9.690635239036156e-06, "loss": 0.0239, "step": 47680 }, { "epoch": 0.40269363112452766, "grad_norm": 0.16715377569198608, "learning_rate": 9.69038001234316e-06, "loss": 0.0208, "step": 47690 }, { "epoch": 0.40277807097169155, "grad_norm": 0.6212661266326904, "learning_rate": 9.690124683776074e-06, "loss": 0.0117, "step": 47700 }, { "epoch": 0.40286251081885543, "grad_norm": 0.5358394980430603, "learning_rate": 9.689869253340438e-06, "loss": 0.0133, "step": 47710 }, { "epoch": 0.4029469506660193, "grad_norm": 0.6028103232383728, "learning_rate": 9.689613721041804e-06, "loss": 0.0169, "step": 47720 }, { "epoch": 0.40303139051318315, "grad_norm": 0.17808286845684052, "learning_rate": 9.68935808688572e-06, "loss": 0.0136, "step": 47730 }, { "epoch": 0.40311583036034704, "grad_norm": 0.1244327574968338, "learning_rate": 9.68910235087774e-06, "loss": 0.0102, "step": 47740 }, { "epoch": 0.4032002702075109, "grad_norm": 0.5371721386909485, "learning_rate": 9.688846513023418e-06, "loss": 0.0146, "step": 47750 }, { "epoch": 0.4032847100546748, "grad_norm": 0.19749261438846588, "learning_rate": 9.688590573328311e-06, "loss": 0.0144, "step": 47760 }, { "epoch": 0.4033691499018387, "grad_norm": 0.6403519511222839, "learning_rate": 9.688334531797975e-06, "loss": 0.0188, "step": 47770 }, { "epoch": 0.40345358974900253, "grad_norm": 0.25966876745224, "learning_rate": 9.688078388437977e-06, "loss": 0.0169, "step": 47780 }, { "epoch": 0.4035380295961664, "grad_norm": 0.2833140194416046, "learning_rate": 9.687822143253875e-06, "loss": 0.0126, "step": 47790 }, { "epoch": 0.4036224694433303, "grad_norm": 0.4067140221595764, "learning_rate": 9.687565796251238e-06, "loss": 0.0134, "step": 47800 }, { "epoch": 0.4037069092904942, "grad_norm": 1.1209853887557983, "learning_rate": 9.687309347435631e-06, "loss": 0.0168, "step": 47810 }, { "epoch": 0.4037913491376581, "grad_norm": 0.45964038372039795, "learning_rate": 9.687052796812628e-06, "loss": 0.0166, "step": 47820 }, { "epoch": 0.40387578898482196, "grad_norm": 0.6720736026763916, "learning_rate": 9.686796144387797e-06, "loss": 0.0124, "step": 47830 }, { "epoch": 0.4039602288319858, "grad_norm": 0.2155361920595169, "learning_rate": 9.686539390166714e-06, "loss": 0.0066, "step": 47840 }, { "epoch": 0.4040446686791497, "grad_norm": 0.2010473906993866, "learning_rate": 9.686282534154956e-06, "loss": 0.017, "step": 47850 }, { "epoch": 0.40412910852631356, "grad_norm": 0.19633011519908905, "learning_rate": 9.6860255763581e-06, "loss": 0.0119, "step": 47860 }, { "epoch": 0.40421354837347745, "grad_norm": 0.3708719313144684, "learning_rate": 9.685768516781732e-06, "loss": 0.0142, "step": 47870 }, { "epoch": 0.40429798822064134, "grad_norm": 0.2640582025051117, "learning_rate": 9.685511355431429e-06, "loss": 0.0142, "step": 47880 }, { "epoch": 0.4043824280678052, "grad_norm": 0.46270689368247986, "learning_rate": 9.685254092312782e-06, "loss": 0.0128, "step": 47890 }, { "epoch": 0.40446686791496905, "grad_norm": 0.42817333340644836, "learning_rate": 9.684996727431375e-06, "loss": 0.0148, "step": 47900 }, { "epoch": 0.40455130776213294, "grad_norm": 0.42043042182922363, "learning_rate": 9.684739260792799e-06, "loss": 0.014, "step": 47910 }, { "epoch": 0.4046357476092968, "grad_norm": 0.27757003903388977, "learning_rate": 9.684481692402648e-06, "loss": 0.0082, "step": 47920 }, { "epoch": 0.4047201874564607, "grad_norm": 0.2288074791431427, "learning_rate": 9.684224022266513e-06, "loss": 0.0147, "step": 47930 }, { "epoch": 0.4048046273036246, "grad_norm": 0.3263421952724457, "learning_rate": 9.683966250389991e-06, "loss": 0.0187, "step": 47940 }, { "epoch": 0.4048890671507885, "grad_norm": 0.45133188366889954, "learning_rate": 9.683708376778685e-06, "loss": 0.0158, "step": 47950 }, { "epoch": 0.4049735069979523, "grad_norm": 0.31585341691970825, "learning_rate": 9.68345040143819e-06, "loss": 0.0155, "step": 47960 }, { "epoch": 0.4050579468451162, "grad_norm": 0.25662392377853394, "learning_rate": 9.683192324374114e-06, "loss": 0.0147, "step": 47970 }, { "epoch": 0.4051423866922801, "grad_norm": 0.28588518500328064, "learning_rate": 9.682934145592058e-06, "loss": 0.0117, "step": 47980 }, { "epoch": 0.405226826539444, "grad_norm": 0.43401092290878296, "learning_rate": 9.682675865097634e-06, "loss": 0.0194, "step": 47990 }, { "epoch": 0.40531126638660786, "grad_norm": 0.2636083960533142, "learning_rate": 9.682417482896448e-06, "loss": 0.0189, "step": 48000 }, { "epoch": 0.4053957062337717, "grad_norm": 0.4072818160057068, "learning_rate": 9.682158998994116e-06, "loss": 0.0173, "step": 48010 }, { "epoch": 0.4054801460809356, "grad_norm": 0.0768107995390892, "learning_rate": 9.681900413396247e-06, "loss": 0.0156, "step": 48020 }, { "epoch": 0.40556458592809946, "grad_norm": 0.3282281756401062, "learning_rate": 9.681641726108463e-06, "loss": 0.0063, "step": 48030 }, { "epoch": 0.40564902577526335, "grad_norm": 0.2067234367132187, "learning_rate": 9.681382937136381e-06, "loss": 0.0122, "step": 48040 }, { "epoch": 0.40573346562242724, "grad_norm": 2.2733397483825684, "learning_rate": 9.681124046485619e-06, "loss": 0.0118, "step": 48050 }, { "epoch": 0.4058179054695911, "grad_norm": 0.3455767035484314, "learning_rate": 9.680865054161802e-06, "loss": 0.0208, "step": 48060 }, { "epoch": 0.40590234531675495, "grad_norm": 0.16464781761169434, "learning_rate": 9.680605960170554e-06, "loss": 0.0154, "step": 48070 }, { "epoch": 0.40598678516391884, "grad_norm": 0.08357029408216476, "learning_rate": 9.680346764517507e-06, "loss": 0.017, "step": 48080 }, { "epoch": 0.4060712250110827, "grad_norm": 0.7371446490287781, "learning_rate": 9.680087467208285e-06, "loss": 0.0191, "step": 48090 }, { "epoch": 0.4061556648582466, "grad_norm": 1.302742838859558, "learning_rate": 9.679828068248523e-06, "loss": 0.0152, "step": 48100 }, { "epoch": 0.4062401047054105, "grad_norm": 0.4368840157985687, "learning_rate": 9.679568567643855e-06, "loss": 0.0119, "step": 48110 }, { "epoch": 0.4063245445525744, "grad_norm": 0.21868032217025757, "learning_rate": 9.679308965399916e-06, "loss": 0.0197, "step": 48120 }, { "epoch": 0.4064089843997382, "grad_norm": 0.23047639429569244, "learning_rate": 9.679049261522345e-06, "loss": 0.0198, "step": 48130 }, { "epoch": 0.4064934242469021, "grad_norm": 0.2616678476333618, "learning_rate": 9.678789456016782e-06, "loss": 0.0127, "step": 48140 }, { "epoch": 0.406577864094066, "grad_norm": 0.43107807636260986, "learning_rate": 9.678529548888873e-06, "loss": 0.0114, "step": 48150 }, { "epoch": 0.4066623039412299, "grad_norm": 0.9772319793701172, "learning_rate": 9.678269540144258e-06, "loss": 0.0144, "step": 48160 }, { "epoch": 0.40674674378839376, "grad_norm": 0.7474779486656189, "learning_rate": 9.678009429788589e-06, "loss": 0.0169, "step": 48170 }, { "epoch": 0.40683118363555765, "grad_norm": 0.48974019289016724, "learning_rate": 9.677749217827514e-06, "loss": 0.0134, "step": 48180 }, { "epoch": 0.4069156234827215, "grad_norm": 0.5782164335250854, "learning_rate": 9.677488904266685e-06, "loss": 0.0098, "step": 48190 }, { "epoch": 0.40700006332988536, "grad_norm": 1.0389283895492554, "learning_rate": 9.677228489111755e-06, "loss": 0.0256, "step": 48200 }, { "epoch": 0.40708450317704925, "grad_norm": 0.2879721224308014, "learning_rate": 9.676967972368379e-06, "loss": 0.0107, "step": 48210 }, { "epoch": 0.40716894302421314, "grad_norm": 0.1812462955713272, "learning_rate": 9.676707354042219e-06, "loss": 0.0152, "step": 48220 }, { "epoch": 0.407253382871377, "grad_norm": 0.2798064053058624, "learning_rate": 9.676446634138933e-06, "loss": 0.0106, "step": 48230 }, { "epoch": 0.40733782271854085, "grad_norm": 0.24816837906837463, "learning_rate": 9.676185812664184e-06, "loss": 0.0186, "step": 48240 }, { "epoch": 0.40742226256570474, "grad_norm": 0.26550930738449097, "learning_rate": 9.675924889623638e-06, "loss": 0.0104, "step": 48250 }, { "epoch": 0.4075067024128686, "grad_norm": 0.9988551735877991, "learning_rate": 9.675663865022961e-06, "loss": 0.0126, "step": 48260 }, { "epoch": 0.4075911422600325, "grad_norm": 0.24505724012851715, "learning_rate": 9.675402738867823e-06, "loss": 0.0178, "step": 48270 }, { "epoch": 0.4076755821071964, "grad_norm": 0.42754703760147095, "learning_rate": 9.675141511163896e-06, "loss": 0.0142, "step": 48280 }, { "epoch": 0.4077600219543603, "grad_norm": 0.248442143201828, "learning_rate": 9.674880181916854e-06, "loss": 0.0172, "step": 48290 }, { "epoch": 0.4078444618015241, "grad_norm": 0.7723191380500793, "learning_rate": 9.674618751132371e-06, "loss": 0.0151, "step": 48300 }, { "epoch": 0.407928901648688, "grad_norm": 0.6094405055046082, "learning_rate": 9.674357218816128e-06, "loss": 0.0301, "step": 48310 }, { "epoch": 0.4080133414958519, "grad_norm": 0.7433838844299316, "learning_rate": 9.674095584973804e-06, "loss": 0.0211, "step": 48320 }, { "epoch": 0.4080977813430158, "grad_norm": 0.23966290056705475, "learning_rate": 9.67383384961108e-06, "loss": 0.0202, "step": 48330 }, { "epoch": 0.40818222119017966, "grad_norm": 0.3931000828742981, "learning_rate": 9.673572012733645e-06, "loss": 0.0251, "step": 48340 }, { "epoch": 0.40826666103734355, "grad_norm": 0.5743752121925354, "learning_rate": 9.673310074347182e-06, "loss": 0.014, "step": 48350 }, { "epoch": 0.4083511008845074, "grad_norm": 0.254774808883667, "learning_rate": 9.673048034457382e-06, "loss": 0.0148, "step": 48360 }, { "epoch": 0.40843554073167126, "grad_norm": 0.4796096384525299, "learning_rate": 9.672785893069937e-06, "loss": 0.0172, "step": 48370 }, { "epoch": 0.40851998057883515, "grad_norm": 0.9755672812461853, "learning_rate": 9.67252365019054e-06, "loss": 0.0247, "step": 48380 }, { "epoch": 0.40860442042599904, "grad_norm": 0.08124259859323502, "learning_rate": 9.672261305824887e-06, "loss": 0.0131, "step": 48390 }, { "epoch": 0.4086888602731629, "grad_norm": 0.22570735216140747, "learning_rate": 9.671998859978673e-06, "loss": 0.0219, "step": 48400 }, { "epoch": 0.40877330012032675, "grad_norm": 0.3791678249835968, "learning_rate": 9.671736312657604e-06, "loss": 0.0139, "step": 48410 }, { "epoch": 0.40885773996749064, "grad_norm": 0.6259231567382812, "learning_rate": 9.671473663867378e-06, "loss": 0.0184, "step": 48420 }, { "epoch": 0.4089421798146545, "grad_norm": 0.33596330881118774, "learning_rate": 9.671210913613702e-06, "loss": 0.0197, "step": 48430 }, { "epoch": 0.4090266196618184, "grad_norm": 0.03896318003535271, "learning_rate": 9.670948061902283e-06, "loss": 0.0161, "step": 48440 }, { "epoch": 0.4091110595089823, "grad_norm": 0.5222927927970886, "learning_rate": 9.670685108738828e-06, "loss": 0.0176, "step": 48450 }, { "epoch": 0.4091954993561462, "grad_norm": 0.5839948654174805, "learning_rate": 9.670422054129051e-06, "loss": 0.0191, "step": 48460 }, { "epoch": 0.40927993920331, "grad_norm": 0.0030608251690864563, "learning_rate": 9.670158898078664e-06, "loss": 0.0201, "step": 48470 }, { "epoch": 0.4093643790504739, "grad_norm": 0.24967776238918304, "learning_rate": 9.669895640593381e-06, "loss": 0.0165, "step": 48480 }, { "epoch": 0.4094488188976378, "grad_norm": 0.3876175582408905, "learning_rate": 9.669632281678922e-06, "loss": 0.0129, "step": 48490 }, { "epoch": 0.4095332587448017, "grad_norm": 0.5771124958992004, "learning_rate": 9.669368821341008e-06, "loss": 0.0198, "step": 48500 }, { "epoch": 0.40961769859196556, "grad_norm": 0.16366899013519287, "learning_rate": 9.66910525958536e-06, "loss": 0.0188, "step": 48510 }, { "epoch": 0.40970213843912945, "grad_norm": 0.7304313778877258, "learning_rate": 9.668841596417703e-06, "loss": 0.0121, "step": 48520 }, { "epoch": 0.4097865782862933, "grad_norm": 0.5236789584159851, "learning_rate": 9.66857783184376e-06, "loss": 0.0084, "step": 48530 }, { "epoch": 0.40987101813345717, "grad_norm": 0.6813965439796448, "learning_rate": 9.668313965869267e-06, "loss": 0.0076, "step": 48540 }, { "epoch": 0.40995545798062105, "grad_norm": 0.21248987317085266, "learning_rate": 9.668049998499948e-06, "loss": 0.0096, "step": 48550 }, { "epoch": 0.41003989782778494, "grad_norm": 0.626669704914093, "learning_rate": 9.667785929741542e-06, "loss": 0.0118, "step": 48560 }, { "epoch": 0.4101243376749488, "grad_norm": 0.2636741101741791, "learning_rate": 9.667521759599782e-06, "loss": 0.0146, "step": 48570 }, { "epoch": 0.4102087775221127, "grad_norm": 0.31324952840805054, "learning_rate": 9.667257488080405e-06, "loss": 0.0289, "step": 48580 }, { "epoch": 0.41029321736927654, "grad_norm": 0.5121110677719116, "learning_rate": 9.666993115189152e-06, "loss": 0.009, "step": 48590 }, { "epoch": 0.41037765721644043, "grad_norm": 4.557004928588867, "learning_rate": 9.666728640931764e-06, "loss": 0.0172, "step": 48600 }, { "epoch": 0.4104620970636043, "grad_norm": 0.2424023151397705, "learning_rate": 9.666464065313987e-06, "loss": 0.024, "step": 48610 }, { "epoch": 0.4105465369107682, "grad_norm": 0.20027749240398407, "learning_rate": 9.666199388341568e-06, "loss": 0.0069, "step": 48620 }, { "epoch": 0.4106309767579321, "grad_norm": 0.2154524028301239, "learning_rate": 9.665934610020253e-06, "loss": 0.0079, "step": 48630 }, { "epoch": 0.4107154166050959, "grad_norm": 0.8387544751167297, "learning_rate": 9.665669730355795e-06, "loss": 0.0149, "step": 48640 }, { "epoch": 0.4107998564522598, "grad_norm": 0.5625001192092896, "learning_rate": 9.665404749353946e-06, "loss": 0.0137, "step": 48650 }, { "epoch": 0.4108842962994237, "grad_norm": 0.12175406515598297, "learning_rate": 9.665139667020462e-06, "loss": 0.0146, "step": 48660 }, { "epoch": 0.4109687361465876, "grad_norm": 0.5196977853775024, "learning_rate": 9.664874483361101e-06, "loss": 0.011, "step": 48670 }, { "epoch": 0.41105317599375146, "grad_norm": 0.20544028282165527, "learning_rate": 9.664609198381623e-06, "loss": 0.0133, "step": 48680 }, { "epoch": 0.41113761584091535, "grad_norm": 0.3106830418109894, "learning_rate": 9.664343812087786e-06, "loss": 0.0103, "step": 48690 }, { "epoch": 0.4112220556880792, "grad_norm": 0.6906141042709351, "learning_rate": 9.66407832448536e-06, "loss": 0.0161, "step": 48700 }, { "epoch": 0.41130649553524307, "grad_norm": 0.569938063621521, "learning_rate": 9.663812735580106e-06, "loss": 0.0101, "step": 48710 }, { "epoch": 0.41139093538240695, "grad_norm": 0.6244493722915649, "learning_rate": 9.663547045377798e-06, "loss": 0.0185, "step": 48720 }, { "epoch": 0.41147537522957084, "grad_norm": 0.48153072595596313, "learning_rate": 9.663281253884203e-06, "loss": 0.0103, "step": 48730 }, { "epoch": 0.4115598150767347, "grad_norm": 0.1838473677635193, "learning_rate": 9.663015361105094e-06, "loss": 0.0082, "step": 48740 }, { "epoch": 0.4116442549238986, "grad_norm": 0.7485368251800537, "learning_rate": 9.662749367046246e-06, "loss": 0.0128, "step": 48750 }, { "epoch": 0.41172869477106244, "grad_norm": 0.2506016492843628, "learning_rate": 9.662483271713439e-06, "loss": 0.0147, "step": 48760 }, { "epoch": 0.41181313461822633, "grad_norm": 0.37708598375320435, "learning_rate": 9.66221707511245e-06, "loss": 0.0154, "step": 48770 }, { "epoch": 0.4118975744653902, "grad_norm": 0.7556203007698059, "learning_rate": 9.661950777249062e-06, "loss": 0.0084, "step": 48780 }, { "epoch": 0.4119820143125541, "grad_norm": 0.6749032735824585, "learning_rate": 9.661684378129059e-06, "loss": 0.0117, "step": 48790 }, { "epoch": 0.412066454159718, "grad_norm": 0.24189315736293793, "learning_rate": 9.661417877758226e-06, "loss": 0.0212, "step": 48800 }, { "epoch": 0.4121508940068819, "grad_norm": 0.26867184042930603, "learning_rate": 9.66115127614235e-06, "loss": 0.0151, "step": 48810 }, { "epoch": 0.4122353338540457, "grad_norm": 0.3546833395957947, "learning_rate": 9.660884573287225e-06, "loss": 0.0181, "step": 48820 }, { "epoch": 0.4123197737012096, "grad_norm": 0.7381319403648376, "learning_rate": 9.660617769198644e-06, "loss": 0.0181, "step": 48830 }, { "epoch": 0.4124042135483735, "grad_norm": 0.4928869605064392, "learning_rate": 9.660350863882397e-06, "loss": 0.0121, "step": 48840 }, { "epoch": 0.41248865339553736, "grad_norm": 0.14909620583057404, "learning_rate": 9.660083857344287e-06, "loss": 0.0102, "step": 48850 }, { "epoch": 0.41257309324270125, "grad_norm": 0.20260946452617645, "learning_rate": 9.659816749590108e-06, "loss": 0.0092, "step": 48860 }, { "epoch": 0.4126575330898651, "grad_norm": 0.43222126364707947, "learning_rate": 9.659549540625665e-06, "loss": 0.0219, "step": 48870 }, { "epoch": 0.41274197293702897, "grad_norm": 0.4029684066772461, "learning_rate": 9.659282230456762e-06, "loss": 0.0135, "step": 48880 }, { "epoch": 0.41282641278419285, "grad_norm": 0.36444932222366333, "learning_rate": 9.659014819089203e-06, "loss": 0.0134, "step": 48890 }, { "epoch": 0.41291085263135674, "grad_norm": 0.6907669305801392, "learning_rate": 9.658747306528796e-06, "loss": 0.0161, "step": 48900 }, { "epoch": 0.4129952924785206, "grad_norm": 0.6233653426170349, "learning_rate": 9.658479692781353e-06, "loss": 0.014, "step": 48910 }, { "epoch": 0.4130797323256845, "grad_norm": 0.3255934417247772, "learning_rate": 9.658211977852685e-06, "loss": 0.0112, "step": 48920 }, { "epoch": 0.41316417217284834, "grad_norm": 0.5160901546478271, "learning_rate": 9.657944161748608e-06, "loss": 0.0213, "step": 48930 }, { "epoch": 0.41324861202001223, "grad_norm": 0.7305413484573364, "learning_rate": 9.657676244474939e-06, "loss": 0.0303, "step": 48940 }, { "epoch": 0.4133330518671761, "grad_norm": 0.4747922420501709, "learning_rate": 9.657408226037495e-06, "loss": 0.0224, "step": 48950 }, { "epoch": 0.41341749171434, "grad_norm": 0.30797550082206726, "learning_rate": 9.6571401064421e-06, "loss": 0.0154, "step": 48960 }, { "epoch": 0.4135019315615039, "grad_norm": 0.3518759310245514, "learning_rate": 9.656871885694575e-06, "loss": 0.016, "step": 48970 }, { "epoch": 0.4135863714086678, "grad_norm": 0.5800378918647766, "learning_rate": 9.656603563800747e-06, "loss": 0.0129, "step": 48980 }, { "epoch": 0.4136708112558316, "grad_norm": 0.32903584837913513, "learning_rate": 9.656335140766444e-06, "loss": 0.0228, "step": 48990 }, { "epoch": 0.4137552511029955, "grad_norm": 0.4447082281112671, "learning_rate": 9.656066616597498e-06, "loss": 0.0214, "step": 49000 }, { "epoch": 0.4138396909501594, "grad_norm": 0.32882389426231384, "learning_rate": 9.655797991299736e-06, "loss": 0.0199, "step": 49010 }, { "epoch": 0.41392413079732326, "grad_norm": 0.3467368483543396, "learning_rate": 9.655529264878997e-06, "loss": 0.0128, "step": 49020 }, { "epoch": 0.41400857064448715, "grad_norm": 0.4664914906024933, "learning_rate": 9.655260437341116e-06, "loss": 0.0119, "step": 49030 }, { "epoch": 0.41409301049165104, "grad_norm": 0.39653611183166504, "learning_rate": 9.654991508691934e-06, "loss": 0.0076, "step": 49040 }, { "epoch": 0.41417745033881487, "grad_norm": 0.5673829317092896, "learning_rate": 9.654722478937288e-06, "loss": 0.0116, "step": 49050 }, { "epoch": 0.41426189018597875, "grad_norm": 0.6199439764022827, "learning_rate": 9.654453348083026e-06, "loss": 0.0167, "step": 49060 }, { "epoch": 0.41434633003314264, "grad_norm": 0.1781410127878189, "learning_rate": 9.65418411613499e-06, "loss": 0.0164, "step": 49070 }, { "epoch": 0.4144307698803065, "grad_norm": 0.17803522944450378, "learning_rate": 9.653914783099028e-06, "loss": 0.0215, "step": 49080 }, { "epoch": 0.4145152097274704, "grad_norm": 1.138421654701233, "learning_rate": 9.653645348980991e-06, "loss": 0.0232, "step": 49090 }, { "epoch": 0.41459964957463424, "grad_norm": 0.5188801884651184, "learning_rate": 9.653375813786731e-06, "loss": 0.0154, "step": 49100 }, { "epoch": 0.41468408942179813, "grad_norm": 0.6687106490135193, "learning_rate": 9.6531061775221e-06, "loss": 0.0162, "step": 49110 }, { "epoch": 0.414768529268962, "grad_norm": 0.09769334644079208, "learning_rate": 9.652836440192959e-06, "loss": 0.015, "step": 49120 }, { "epoch": 0.4148529691161259, "grad_norm": 0.5679866671562195, "learning_rate": 9.652566601805162e-06, "loss": 0.0235, "step": 49130 }, { "epoch": 0.4149374089632898, "grad_norm": 0.34171032905578613, "learning_rate": 9.652296662364574e-06, "loss": 0.016, "step": 49140 }, { "epoch": 0.4150218488104537, "grad_norm": 0.36852553486824036, "learning_rate": 9.652026621877054e-06, "loss": 0.0175, "step": 49150 }, { "epoch": 0.4151062886576175, "grad_norm": 0.8603254556655884, "learning_rate": 9.65175648034847e-06, "loss": 0.0187, "step": 49160 }, { "epoch": 0.4151907285047814, "grad_norm": 0.4634445011615753, "learning_rate": 9.651486237784689e-06, "loss": 0.0224, "step": 49170 }, { "epoch": 0.4152751683519453, "grad_norm": 0.20572789013385773, "learning_rate": 9.651215894191577e-06, "loss": 0.0113, "step": 49180 }, { "epoch": 0.41535960819910916, "grad_norm": 0.4881800413131714, "learning_rate": 9.650945449575013e-06, "loss": 0.0113, "step": 49190 }, { "epoch": 0.41544404804627305, "grad_norm": 0.22409360110759735, "learning_rate": 9.650674903940864e-06, "loss": 0.0213, "step": 49200 }, { "epoch": 0.41552848789343694, "grad_norm": 0.24558991193771362, "learning_rate": 9.65040425729501e-06, "loss": 0.0143, "step": 49210 }, { "epoch": 0.41561292774060077, "grad_norm": 0.6913562417030334, "learning_rate": 9.650133509643329e-06, "loss": 0.0129, "step": 49220 }, { "epoch": 0.41569736758776465, "grad_norm": 0.4870592951774597, "learning_rate": 9.6498626609917e-06, "loss": 0.0124, "step": 49230 }, { "epoch": 0.41578180743492854, "grad_norm": 0.4078560471534729, "learning_rate": 9.649591711346008e-06, "loss": 0.0089, "step": 49240 }, { "epoch": 0.4158662472820924, "grad_norm": 0.8315919041633606, "learning_rate": 9.649320660712135e-06, "loss": 0.0345, "step": 49250 }, { "epoch": 0.4159506871292563, "grad_norm": 0.7591879367828369, "learning_rate": 9.649049509095974e-06, "loss": 0.0149, "step": 49260 }, { "epoch": 0.4160351269764202, "grad_norm": 0.4995865225791931, "learning_rate": 9.648778256503406e-06, "loss": 0.0137, "step": 49270 }, { "epoch": 0.41611956682358403, "grad_norm": 0.9026956558227539, "learning_rate": 9.64850690294033e-06, "loss": 0.0102, "step": 49280 }, { "epoch": 0.4162040066707479, "grad_norm": 0.5760571360588074, "learning_rate": 9.648235448412635e-06, "loss": 0.0164, "step": 49290 }, { "epoch": 0.4162884465179118, "grad_norm": 0.7256211042404175, "learning_rate": 9.647963892926217e-06, "loss": 0.0111, "step": 49300 }, { "epoch": 0.4163728863650757, "grad_norm": 0.7250210642814636, "learning_rate": 9.647692236486978e-06, "loss": 0.0096, "step": 49310 }, { "epoch": 0.4164573262122396, "grad_norm": 0.4339219033718109, "learning_rate": 9.647420479100817e-06, "loss": 0.0139, "step": 49320 }, { "epoch": 0.4165417660594034, "grad_norm": 0.5241221189498901, "learning_rate": 9.647148620773633e-06, "loss": 0.0111, "step": 49330 }, { "epoch": 0.4166262059065673, "grad_norm": 0.9571835994720459, "learning_rate": 9.646876661511336e-06, "loss": 0.021, "step": 49340 }, { "epoch": 0.4167106457537312, "grad_norm": 0.3834715783596039, "learning_rate": 9.646604601319827e-06, "loss": 0.0133, "step": 49350 }, { "epoch": 0.41679508560089507, "grad_norm": 0.3294227123260498, "learning_rate": 9.64633244020502e-06, "loss": 0.0114, "step": 49360 }, { "epoch": 0.41687952544805895, "grad_norm": 0.3239247798919678, "learning_rate": 9.646060178172824e-06, "loss": 0.0144, "step": 49370 }, { "epoch": 0.41696396529522284, "grad_norm": 0.17865824699401855, "learning_rate": 9.645787815229153e-06, "loss": 0.0256, "step": 49380 }, { "epoch": 0.41704840514238667, "grad_norm": 0.4947957992553711, "learning_rate": 9.645515351379924e-06, "loss": 0.0186, "step": 49390 }, { "epoch": 0.41713284498955056, "grad_norm": 0.44431763887405396, "learning_rate": 9.645242786631052e-06, "loss": 0.0251, "step": 49400 }, { "epoch": 0.41721728483671444, "grad_norm": 0.3509969115257263, "learning_rate": 9.644970120988458e-06, "loss": 0.0101, "step": 49410 }, { "epoch": 0.41730172468387833, "grad_norm": 0.29507261514663696, "learning_rate": 9.644697354458065e-06, "loss": 0.0129, "step": 49420 }, { "epoch": 0.4173861645310422, "grad_norm": 0.3206820487976074, "learning_rate": 9.644424487045799e-06, "loss": 0.0184, "step": 49430 }, { "epoch": 0.4174706043782061, "grad_norm": 0.46002018451690674, "learning_rate": 9.644151518757584e-06, "loss": 0.014, "step": 49440 }, { "epoch": 0.41755504422536993, "grad_norm": 1.1491034030914307, "learning_rate": 9.64387844959935e-06, "loss": 0.0143, "step": 49450 }, { "epoch": 0.4176394840725338, "grad_norm": 0.6920192241668701, "learning_rate": 9.643605279577026e-06, "loss": 0.023, "step": 49460 }, { "epoch": 0.4177239239196977, "grad_norm": 0.5031805038452148, "learning_rate": 9.643332008696549e-06, "loss": 0.0118, "step": 49470 }, { "epoch": 0.4178083637668616, "grad_norm": 1.2134677171707153, "learning_rate": 9.643058636963851e-06, "loss": 0.0196, "step": 49480 }, { "epoch": 0.4178928036140255, "grad_norm": 0.6582565903663635, "learning_rate": 9.64278516438487e-06, "loss": 0.0243, "step": 49490 }, { "epoch": 0.41797724346118936, "grad_norm": 0.3790087103843689, "learning_rate": 9.64251159096555e-06, "loss": 0.018, "step": 49500 }, { "epoch": 0.4180616833083532, "grad_norm": 0.49286577105522156, "learning_rate": 9.642237916711828e-06, "loss": 0.013, "step": 49510 }, { "epoch": 0.4181461231555171, "grad_norm": 0.6413414478302002, "learning_rate": 9.64196414162965e-06, "loss": 0.0158, "step": 49520 }, { "epoch": 0.41823056300268097, "grad_norm": 0.3634885251522064, "learning_rate": 9.64169026572496e-06, "loss": 0.012, "step": 49530 }, { "epoch": 0.41831500284984485, "grad_norm": 0.6857268810272217, "learning_rate": 9.64141628900371e-06, "loss": 0.0178, "step": 49540 }, { "epoch": 0.41839944269700874, "grad_norm": 0.5870158672332764, "learning_rate": 9.64114221147185e-06, "loss": 0.0228, "step": 49550 }, { "epoch": 0.41848388254417257, "grad_norm": 0.3201458156108856, "learning_rate": 9.640868033135331e-06, "loss": 0.0161, "step": 49560 }, { "epoch": 0.41856832239133646, "grad_norm": 0.1485864520072937, "learning_rate": 9.64059375400011e-06, "loss": 0.0108, "step": 49570 }, { "epoch": 0.41865276223850034, "grad_norm": 0.4188668727874756, "learning_rate": 9.640319374072144e-06, "loss": 0.0123, "step": 49580 }, { "epoch": 0.41873720208566423, "grad_norm": 0.06968483328819275, "learning_rate": 9.64004489335739e-06, "loss": 0.0098, "step": 49590 }, { "epoch": 0.4188216419328281, "grad_norm": 0.38014528155326843, "learning_rate": 9.639770311861814e-06, "loss": 0.0152, "step": 49600 }, { "epoch": 0.418906081779992, "grad_norm": 0.1375202238559723, "learning_rate": 9.639495629591376e-06, "loss": 0.0119, "step": 49610 }, { "epoch": 0.41899052162715583, "grad_norm": 0.324419230222702, "learning_rate": 9.639220846552045e-06, "loss": 0.015, "step": 49620 }, { "epoch": 0.4190749614743197, "grad_norm": 0.36222654581069946, "learning_rate": 9.638945962749789e-06, "loss": 0.0147, "step": 49630 }, { "epoch": 0.4191594013214836, "grad_norm": 0.11074647307395935, "learning_rate": 9.638670978190574e-06, "loss": 0.0168, "step": 49640 }, { "epoch": 0.4192438411686475, "grad_norm": 0.28212350606918335, "learning_rate": 9.63839589288038e-06, "loss": 0.0258, "step": 49650 }, { "epoch": 0.4193282810158114, "grad_norm": 0.6676247715950012, "learning_rate": 9.638120706825174e-06, "loss": 0.0193, "step": 49660 }, { "epoch": 0.41941272086297526, "grad_norm": 0.2056153565645218, "learning_rate": 9.637845420030937e-06, "loss": 0.0073, "step": 49670 }, { "epoch": 0.4194971607101391, "grad_norm": 0.24737484753131866, "learning_rate": 9.63757003250365e-06, "loss": 0.0195, "step": 49680 }, { "epoch": 0.419581600557303, "grad_norm": 0.4221281111240387, "learning_rate": 9.63729454424929e-06, "loss": 0.0311, "step": 49690 }, { "epoch": 0.41966604040446687, "grad_norm": 0.6381593942642212, "learning_rate": 9.637018955273845e-06, "loss": 0.0193, "step": 49700 }, { "epoch": 0.41975048025163075, "grad_norm": 0.11414030939340591, "learning_rate": 9.636743265583298e-06, "loss": 0.014, "step": 49710 }, { "epoch": 0.41983492009879464, "grad_norm": 0.4332643151283264, "learning_rate": 9.636467475183635e-06, "loss": 0.0171, "step": 49720 }, { "epoch": 0.41991935994595847, "grad_norm": 0.7704703211784363, "learning_rate": 9.63619158408085e-06, "loss": 0.0111, "step": 49730 }, { "epoch": 0.42000379979312236, "grad_norm": 0.43901702761650085, "learning_rate": 9.635915592280936e-06, "loss": 0.0125, "step": 49740 }, { "epoch": 0.42008823964028624, "grad_norm": 0.48388126492500305, "learning_rate": 9.635639499789883e-06, "loss": 0.0115, "step": 49750 }, { "epoch": 0.42017267948745013, "grad_norm": 0.899091362953186, "learning_rate": 9.63536330661369e-06, "loss": 0.0203, "step": 49760 }, { "epoch": 0.420257119334614, "grad_norm": 0.8853805661201477, "learning_rate": 9.635087012758356e-06, "loss": 0.0141, "step": 49770 }, { "epoch": 0.4203415591817779, "grad_norm": 0.570271372795105, "learning_rate": 9.634810618229882e-06, "loss": 0.0088, "step": 49780 }, { "epoch": 0.42042599902894173, "grad_norm": 0.005441333167254925, "learning_rate": 9.634534123034272e-06, "loss": 0.0214, "step": 49790 }, { "epoch": 0.4205104388761056, "grad_norm": 0.3037099242210388, "learning_rate": 9.63425752717753e-06, "loss": 0.0133, "step": 49800 }, { "epoch": 0.4205948787232695, "grad_norm": 1.0758544206619263, "learning_rate": 9.633980830665663e-06, "loss": 0.0219, "step": 49810 }, { "epoch": 0.4206793185704334, "grad_norm": 0.9874342679977417, "learning_rate": 9.633704033504685e-06, "loss": 0.0164, "step": 49820 }, { "epoch": 0.4207637584175973, "grad_norm": 0.3683077096939087, "learning_rate": 9.633427135700601e-06, "loss": 0.0076, "step": 49830 }, { "epoch": 0.42084819826476116, "grad_norm": 0.32124975323677063, "learning_rate": 9.633150137259432e-06, "loss": 0.0207, "step": 49840 }, { "epoch": 0.420932638111925, "grad_norm": 0.2561127543449402, "learning_rate": 9.632873038187191e-06, "loss": 0.0124, "step": 49850 }, { "epoch": 0.4210170779590889, "grad_norm": 0.6561243534088135, "learning_rate": 9.632595838489896e-06, "loss": 0.0269, "step": 49860 }, { "epoch": 0.42110151780625277, "grad_norm": 0.3204054534435272, "learning_rate": 9.632318538173569e-06, "loss": 0.0213, "step": 49870 }, { "epoch": 0.42118595765341665, "grad_norm": 0.47587713599205017, "learning_rate": 9.632041137244234e-06, "loss": 0.013, "step": 49880 }, { "epoch": 0.42127039750058054, "grad_norm": 0.32612550258636475, "learning_rate": 9.631763635707914e-06, "loss": 0.0102, "step": 49890 }, { "epoch": 0.4213548373477444, "grad_norm": 0.28820958733558655, "learning_rate": 9.631486033570636e-06, "loss": 0.0306, "step": 49900 }, { "epoch": 0.42143927719490826, "grad_norm": 0.2784460484981537, "learning_rate": 9.63120833083843e-06, "loss": 0.0113, "step": 49910 }, { "epoch": 0.42152371704207214, "grad_norm": 0.7501488327980042, "learning_rate": 9.630930527517329e-06, "loss": 0.0252, "step": 49920 }, { "epoch": 0.42160815688923603, "grad_norm": 0.6999574899673462, "learning_rate": 9.630652623613365e-06, "loss": 0.0151, "step": 49930 }, { "epoch": 0.4216925967363999, "grad_norm": 0.37976911664009094, "learning_rate": 9.630374619132577e-06, "loss": 0.0106, "step": 49940 }, { "epoch": 0.4217770365835638, "grad_norm": 0.3701522648334503, "learning_rate": 9.630096514081e-06, "loss": 0.0155, "step": 49950 }, { "epoch": 0.42186147643072763, "grad_norm": 0.22902823984622955, "learning_rate": 9.629818308464676e-06, "loss": 0.0099, "step": 49960 }, { "epoch": 0.4219459162778915, "grad_norm": 0.24206404387950897, "learning_rate": 9.629540002289646e-06, "loss": 0.0128, "step": 49970 }, { "epoch": 0.4220303561250554, "grad_norm": 0.06445787847042084, "learning_rate": 9.629261595561956e-06, "loss": 0.0162, "step": 49980 }, { "epoch": 0.4221147959722193, "grad_norm": 0.9906672239303589, "learning_rate": 9.628983088287654e-06, "loss": 0.0158, "step": 49990 }, { "epoch": 0.4221992358193832, "grad_norm": 0.35611897706985474, "learning_rate": 9.628704480472786e-06, "loss": 0.0174, "step": 50000 }, { "epoch": 0.42228367566654706, "grad_norm": 0.09200850129127502, "learning_rate": 9.628425772123406e-06, "loss": 0.0124, "step": 50010 }, { "epoch": 0.4223681155137109, "grad_norm": 0.33546993136405945, "learning_rate": 9.628146963245569e-06, "loss": 0.0144, "step": 50020 }, { "epoch": 0.4224525553608748, "grad_norm": 0.01251484639942646, "learning_rate": 9.627868053845324e-06, "loss": 0.0209, "step": 50030 }, { "epoch": 0.42253699520803867, "grad_norm": 0.6290921568870544, "learning_rate": 9.627589043928735e-06, "loss": 0.0134, "step": 50040 }, { "epoch": 0.42262143505520255, "grad_norm": 0.40213465690612793, "learning_rate": 9.627309933501863e-06, "loss": 0.0132, "step": 50050 }, { "epoch": 0.42270587490236644, "grad_norm": 0.30036675930023193, "learning_rate": 9.627030722570764e-06, "loss": 0.0137, "step": 50060 }, { "epoch": 0.4227903147495303, "grad_norm": 0.9375391006469727, "learning_rate": 9.626751411141507e-06, "loss": 0.0158, "step": 50070 }, { "epoch": 0.42287475459669416, "grad_norm": 0.2587231397628784, "learning_rate": 9.626471999220157e-06, "loss": 0.0205, "step": 50080 }, { "epoch": 0.42295919444385804, "grad_norm": 0.23564748466014862, "learning_rate": 9.626192486812783e-06, "loss": 0.0132, "step": 50090 }, { "epoch": 0.42304363429102193, "grad_norm": 0.681734561920166, "learning_rate": 9.625912873925459e-06, "loss": 0.0217, "step": 50100 }, { "epoch": 0.4231280741381858, "grad_norm": 0.36617177724838257, "learning_rate": 9.625633160564253e-06, "loss": 0.015, "step": 50110 }, { "epoch": 0.4232125139853497, "grad_norm": 0.1846238076686859, "learning_rate": 9.625353346735242e-06, "loss": 0.0175, "step": 50120 }, { "epoch": 0.4232969538325136, "grad_norm": 0.2844659090042114, "learning_rate": 9.625073432444506e-06, "loss": 0.0112, "step": 50130 }, { "epoch": 0.4233813936796774, "grad_norm": 0.40098926424980164, "learning_rate": 9.624793417698122e-06, "loss": 0.0108, "step": 50140 }, { "epoch": 0.4234658335268413, "grad_norm": 1.0497212409973145, "learning_rate": 9.624513302502172e-06, "loss": 0.0225, "step": 50150 }, { "epoch": 0.4235502733740052, "grad_norm": 0.4506777226924896, "learning_rate": 9.624233086862741e-06, "loss": 0.0186, "step": 50160 }, { "epoch": 0.4236347132211691, "grad_norm": 0.24843771755695343, "learning_rate": 9.623952770785916e-06, "loss": 0.0156, "step": 50170 }, { "epoch": 0.42371915306833297, "grad_norm": 0.2602195143699646, "learning_rate": 9.623672354277784e-06, "loss": 0.0168, "step": 50180 }, { "epoch": 0.4238035929154968, "grad_norm": 1.053064227104187, "learning_rate": 9.623391837344434e-06, "loss": 0.013, "step": 50190 }, { "epoch": 0.4238880327626607, "grad_norm": 0.11522403359413147, "learning_rate": 9.623111219991962e-06, "loss": 0.0164, "step": 50200 }, { "epoch": 0.42397247260982457, "grad_norm": 0.2888794541358948, "learning_rate": 9.622830502226462e-06, "loss": 0.017, "step": 50210 }, { "epoch": 0.42405691245698846, "grad_norm": 0.6708943247795105, "learning_rate": 9.62254968405403e-06, "loss": 0.0162, "step": 50220 }, { "epoch": 0.42414135230415234, "grad_norm": 0.18981723487377167, "learning_rate": 9.622268765480768e-06, "loss": 0.0151, "step": 50230 }, { "epoch": 0.42422579215131623, "grad_norm": 0.41480889916419983, "learning_rate": 9.621987746512774e-06, "loss": 0.0115, "step": 50240 }, { "epoch": 0.42431023199848006, "grad_norm": 0.4192391335964203, "learning_rate": 9.621706627156155e-06, "loss": 0.0257, "step": 50250 }, { "epoch": 0.42439467184564394, "grad_norm": 0.15194682776927948, "learning_rate": 9.621425407417014e-06, "loss": 0.0187, "step": 50260 }, { "epoch": 0.42447911169280783, "grad_norm": 0.360551118850708, "learning_rate": 9.621144087301459e-06, "loss": 0.0155, "step": 50270 }, { "epoch": 0.4245635515399717, "grad_norm": 0.568776547908783, "learning_rate": 9.620862666815602e-06, "loss": 0.0222, "step": 50280 }, { "epoch": 0.4246479913871356, "grad_norm": 0.6784580945968628, "learning_rate": 9.620581145965554e-06, "loss": 0.0106, "step": 50290 }, { "epoch": 0.4247324312342995, "grad_norm": 0.14060594141483307, "learning_rate": 9.620299524757433e-06, "loss": 0.0117, "step": 50300 }, { "epoch": 0.4248168710814633, "grad_norm": 0.4239858090877533, "learning_rate": 9.620017803197352e-06, "loss": 0.0125, "step": 50310 }, { "epoch": 0.4249013109286272, "grad_norm": 0.28024759888648987, "learning_rate": 9.61973598129143e-06, "loss": 0.016, "step": 50320 }, { "epoch": 0.4249857507757911, "grad_norm": 0.5748295783996582, "learning_rate": 9.61945405904579e-06, "loss": 0.0158, "step": 50330 }, { "epoch": 0.425070190622955, "grad_norm": 0.3780383765697479, "learning_rate": 9.619172036466555e-06, "loss": 0.0119, "step": 50340 }, { "epoch": 0.42515463047011887, "grad_norm": 0.362848162651062, "learning_rate": 9.618889913559849e-06, "loss": 0.0159, "step": 50350 }, { "epoch": 0.42523907031728275, "grad_norm": 0.5288821458816528, "learning_rate": 9.6186076903318e-06, "loss": 0.0154, "step": 50360 }, { "epoch": 0.4253235101644466, "grad_norm": 0.4487074017524719, "learning_rate": 9.618325366788538e-06, "loss": 0.024, "step": 50370 }, { "epoch": 0.42540795001161047, "grad_norm": 0.15514834225177765, "learning_rate": 9.6180429429362e-06, "loss": 0.0144, "step": 50380 }, { "epoch": 0.42549238985877436, "grad_norm": 0.316938579082489, "learning_rate": 9.61776041878091e-06, "loss": 0.023, "step": 50390 }, { "epoch": 0.42557682970593824, "grad_norm": 0.2517208158969879, "learning_rate": 9.617477794328813e-06, "loss": 0.0143, "step": 50400 }, { "epoch": 0.42566126955310213, "grad_norm": 1.3129210472106934, "learning_rate": 9.617195069586043e-06, "loss": 0.0102, "step": 50410 }, { "epoch": 0.42574570940026596, "grad_norm": 0.34724488854408264, "learning_rate": 9.616912244558743e-06, "loss": 0.0158, "step": 50420 }, { "epoch": 0.42583014924742985, "grad_norm": 0.540511429309845, "learning_rate": 9.616629319253055e-06, "loss": 0.0232, "step": 50430 }, { "epoch": 0.42591458909459373, "grad_norm": 0.29794326424598694, "learning_rate": 9.616346293675126e-06, "loss": 0.0176, "step": 50440 }, { "epoch": 0.4259990289417576, "grad_norm": 1.1414740085601807, "learning_rate": 9.616063167831098e-06, "loss": 0.0119, "step": 50450 }, { "epoch": 0.4260834687889215, "grad_norm": 0.6392119526863098, "learning_rate": 9.615779941727126e-06, "loss": 0.0164, "step": 50460 }, { "epoch": 0.4261679086360854, "grad_norm": 0.7975460290908813, "learning_rate": 9.61549661536936e-06, "loss": 0.0205, "step": 50470 }, { "epoch": 0.4262523484832492, "grad_norm": 0.360995352268219, "learning_rate": 9.615213188763952e-06, "loss": 0.0222, "step": 50480 }, { "epoch": 0.4263367883304131, "grad_norm": 0.4079287350177765, "learning_rate": 9.614929661917059e-06, "loss": 0.0149, "step": 50490 }, { "epoch": 0.426421228177577, "grad_norm": 0.6804162859916687, "learning_rate": 9.61464603483484e-06, "loss": 0.0155, "step": 50500 }, { "epoch": 0.4265056680247409, "grad_norm": 0.45403122901916504, "learning_rate": 9.614362307523457e-06, "loss": 0.015, "step": 50510 }, { "epoch": 0.42659010787190477, "grad_norm": 0.3203665018081665, "learning_rate": 9.614078479989068e-06, "loss": 0.0155, "step": 50520 }, { "epoch": 0.42667454771906865, "grad_norm": 0.6923982501029968, "learning_rate": 9.613794552237839e-06, "loss": 0.0199, "step": 50530 }, { "epoch": 0.4267589875662325, "grad_norm": 0.20006616413593292, "learning_rate": 9.61351052427594e-06, "loss": 0.0202, "step": 50540 }, { "epoch": 0.42684342741339637, "grad_norm": 0.4129423499107361, "learning_rate": 9.613226396109536e-06, "loss": 0.0164, "step": 50550 }, { "epoch": 0.42692786726056026, "grad_norm": 0.43358659744262695, "learning_rate": 9.612942167744801e-06, "loss": 0.0136, "step": 50560 }, { "epoch": 0.42701230710772414, "grad_norm": 0.23883230984210968, "learning_rate": 9.612657839187907e-06, "loss": 0.0154, "step": 50570 }, { "epoch": 0.42709674695488803, "grad_norm": 0.3877975344657898, "learning_rate": 9.61237341044503e-06, "loss": 0.0146, "step": 50580 }, { "epoch": 0.4271811868020519, "grad_norm": 0.6354855895042419, "learning_rate": 9.612088881522348e-06, "loss": 0.0112, "step": 50590 }, { "epoch": 0.42726562664921575, "grad_norm": 0.10338442027568817, "learning_rate": 9.61180425242604e-06, "loss": 0.0122, "step": 50600 }, { "epoch": 0.42735006649637963, "grad_norm": 0.2543497681617737, "learning_rate": 9.61151952316229e-06, "loss": 0.0149, "step": 50610 }, { "epoch": 0.4274345063435435, "grad_norm": 0.3852337896823883, "learning_rate": 9.61123469373728e-06, "loss": 0.0156, "step": 50620 }, { "epoch": 0.4275189461907074, "grad_norm": 0.2575954794883728, "learning_rate": 9.610949764157198e-06, "loss": 0.0132, "step": 50630 }, { "epoch": 0.4276033860378713, "grad_norm": 0.001632589497603476, "learning_rate": 9.61066473442823e-06, "loss": 0.0097, "step": 50640 }, { "epoch": 0.4276878258850351, "grad_norm": 0.5587042570114136, "learning_rate": 9.610379604556572e-06, "loss": 0.0136, "step": 50650 }, { "epoch": 0.427772265732199, "grad_norm": 0.48108989000320435, "learning_rate": 9.610094374548411e-06, "loss": 0.009, "step": 50660 }, { "epoch": 0.4278567055793629, "grad_norm": 0.46626460552215576, "learning_rate": 9.609809044409946e-06, "loss": 0.0151, "step": 50670 }, { "epoch": 0.4279411454265268, "grad_norm": 0.33044350147247314, "learning_rate": 9.609523614147374e-06, "loss": 0.0172, "step": 50680 }, { "epoch": 0.42802558527369067, "grad_norm": 0.28572458028793335, "learning_rate": 9.609238083766891e-06, "loss": 0.0156, "step": 50690 }, { "epoch": 0.42811002512085455, "grad_norm": 0.8637473583221436, "learning_rate": 9.608952453274706e-06, "loss": 0.0208, "step": 50700 }, { "epoch": 0.4281944649680184, "grad_norm": 0.47575750946998596, "learning_rate": 9.608666722677013e-06, "loss": 0.0152, "step": 50710 }, { "epoch": 0.42827890481518227, "grad_norm": 0.17289459705352783, "learning_rate": 9.608380891980027e-06, "loss": 0.0077, "step": 50720 }, { "epoch": 0.42836334466234616, "grad_norm": 0.41965651512145996, "learning_rate": 9.60809496118995e-06, "loss": 0.0207, "step": 50730 }, { "epoch": 0.42844778450951004, "grad_norm": 0.7261382937431335, "learning_rate": 9.607808930312996e-06, "loss": 0.0128, "step": 50740 }, { "epoch": 0.42853222435667393, "grad_norm": 1.387182354927063, "learning_rate": 9.607522799355376e-06, "loss": 0.0204, "step": 50750 }, { "epoch": 0.4286166642038378, "grad_norm": 0.6371561288833618, "learning_rate": 9.607236568323304e-06, "loss": 0.0116, "step": 50760 }, { "epoch": 0.42870110405100165, "grad_norm": 1.2622617483139038, "learning_rate": 9.606950237222998e-06, "loss": 0.0127, "step": 50770 }, { "epoch": 0.42878554389816553, "grad_norm": 0.06654315441846848, "learning_rate": 9.606663806060679e-06, "loss": 0.0137, "step": 50780 }, { "epoch": 0.4288699837453294, "grad_norm": 0.5168298482894897, "learning_rate": 9.606377274842564e-06, "loss": 0.0196, "step": 50790 }, { "epoch": 0.4289544235924933, "grad_norm": 0.512147843837738, "learning_rate": 9.60609064357488e-06, "loss": 0.0132, "step": 50800 }, { "epoch": 0.4290388634396572, "grad_norm": 0.36135056614875793, "learning_rate": 9.60580391226385e-06, "loss": 0.0252, "step": 50810 }, { "epoch": 0.4291233032868211, "grad_norm": 0.25236982107162476, "learning_rate": 9.605517080915703e-06, "loss": 0.0094, "step": 50820 }, { "epoch": 0.4292077431339849, "grad_norm": 0.21493683755397797, "learning_rate": 9.605230149536668e-06, "loss": 0.0184, "step": 50830 }, { "epoch": 0.4292921829811488, "grad_norm": 0.24987278878688812, "learning_rate": 9.60494311813298e-06, "loss": 0.0131, "step": 50840 }, { "epoch": 0.4293766228283127, "grad_norm": 0.300427109003067, "learning_rate": 9.604655986710869e-06, "loss": 0.0117, "step": 50850 }, { "epoch": 0.42946106267547657, "grad_norm": 0.19046440720558167, "learning_rate": 9.604368755276575e-06, "loss": 0.0107, "step": 50860 }, { "epoch": 0.42954550252264045, "grad_norm": 0.0343993604183197, "learning_rate": 9.604081423836335e-06, "loss": 0.0153, "step": 50870 }, { "epoch": 0.4296299423698043, "grad_norm": 0.5927232503890991, "learning_rate": 9.603793992396388e-06, "loss": 0.0147, "step": 50880 }, { "epoch": 0.42971438221696817, "grad_norm": 0.09482678025960922, "learning_rate": 9.60350646096298e-06, "loss": 0.0218, "step": 50890 }, { "epoch": 0.42979882206413206, "grad_norm": 0.3625955283641815, "learning_rate": 9.603218829542358e-06, "loss": 0.0126, "step": 50900 }, { "epoch": 0.42988326191129594, "grad_norm": 0.515688955783844, "learning_rate": 9.602931098140763e-06, "loss": 0.0141, "step": 50910 }, { "epoch": 0.42996770175845983, "grad_norm": 0.5788493752479553, "learning_rate": 9.602643266764449e-06, "loss": 0.0266, "step": 50920 }, { "epoch": 0.4300521416056237, "grad_norm": 0.18565157055854797, "learning_rate": 9.602355335419667e-06, "loss": 0.0168, "step": 50930 }, { "epoch": 0.43013658145278755, "grad_norm": 0.14617913961410522, "learning_rate": 9.602067304112667e-06, "loss": 0.0163, "step": 50940 }, { "epoch": 0.43022102129995143, "grad_norm": 0.49870729446411133, "learning_rate": 9.601779172849712e-06, "loss": 0.0175, "step": 50950 }, { "epoch": 0.4303054611471153, "grad_norm": 0.3052530586719513, "learning_rate": 9.601490941637055e-06, "loss": 0.0098, "step": 50960 }, { "epoch": 0.4303899009942792, "grad_norm": 0.18532732129096985, "learning_rate": 9.601202610480956e-06, "loss": 0.0077, "step": 50970 }, { "epoch": 0.4304743408414431, "grad_norm": 0.5762552618980408, "learning_rate": 9.600914179387682e-06, "loss": 0.0144, "step": 50980 }, { "epoch": 0.430558780688607, "grad_norm": 0.5196888446807861, "learning_rate": 9.600625648363493e-06, "loss": 0.0147, "step": 50990 }, { "epoch": 0.4306432205357708, "grad_norm": 0.06996883451938629, "learning_rate": 9.600337017414658e-06, "loss": 0.0091, "step": 51000 }, { "epoch": 0.4307276603829347, "grad_norm": 1.6380095481872559, "learning_rate": 9.600048286547445e-06, "loss": 0.0109, "step": 51010 }, { "epoch": 0.4308121002300986, "grad_norm": 0.4885128140449524, "learning_rate": 9.599759455768128e-06, "loss": 0.0142, "step": 51020 }, { "epoch": 0.43089654007726247, "grad_norm": 0.12953761219978333, "learning_rate": 9.599470525082976e-06, "loss": 0.0121, "step": 51030 }, { "epoch": 0.43098097992442636, "grad_norm": 0.7475237846374512, "learning_rate": 9.599181494498267e-06, "loss": 0.0161, "step": 51040 }, { "epoch": 0.4310654197715902, "grad_norm": 0.3102700114250183, "learning_rate": 9.59889236402028e-06, "loss": 0.0165, "step": 51050 }, { "epoch": 0.43114985961875407, "grad_norm": 0.18190430104732513, "learning_rate": 9.59860313365529e-06, "loss": 0.0089, "step": 51060 }, { "epoch": 0.43123429946591796, "grad_norm": 0.4558594524860382, "learning_rate": 9.598313803409586e-06, "loss": 0.0109, "step": 51070 }, { "epoch": 0.43131873931308184, "grad_norm": 0.4429517090320587, "learning_rate": 9.598024373289447e-06, "loss": 0.0177, "step": 51080 }, { "epoch": 0.43140317916024573, "grad_norm": 1.3067940473556519, "learning_rate": 9.597734843301162e-06, "loss": 0.0257, "step": 51090 }, { "epoch": 0.4314876190074096, "grad_norm": 0.29184773564338684, "learning_rate": 9.597445213451017e-06, "loss": 0.0183, "step": 51100 }, { "epoch": 0.43157205885457345, "grad_norm": 0.3133932054042816, "learning_rate": 9.597155483745304e-06, "loss": 0.0164, "step": 51110 }, { "epoch": 0.43165649870173733, "grad_norm": 0.5048472285270691, "learning_rate": 9.596865654190315e-06, "loss": 0.0164, "step": 51120 }, { "epoch": 0.4317409385489012, "grad_norm": 0.3592281937599182, "learning_rate": 9.596575724792348e-06, "loss": 0.0184, "step": 51130 }, { "epoch": 0.4318253783960651, "grad_norm": 0.438401460647583, "learning_rate": 9.596285695557697e-06, "loss": 0.0125, "step": 51140 }, { "epoch": 0.431909818243229, "grad_norm": 0.11934380978345871, "learning_rate": 9.595995566492663e-06, "loss": 0.0106, "step": 51150 }, { "epoch": 0.4319942580903929, "grad_norm": 1.0979132652282715, "learning_rate": 9.595705337603547e-06, "loss": 0.0249, "step": 51160 }, { "epoch": 0.4320786979375567, "grad_norm": 0.3904224932193756, "learning_rate": 9.595415008896654e-06, "loss": 0.0138, "step": 51170 }, { "epoch": 0.4321631377847206, "grad_norm": 0.4013804495334625, "learning_rate": 9.595124580378286e-06, "loss": 0.0224, "step": 51180 }, { "epoch": 0.4322475776318845, "grad_norm": 1.5549544095993042, "learning_rate": 9.594834052054755e-06, "loss": 0.0189, "step": 51190 }, { "epoch": 0.43233201747904837, "grad_norm": 0.6787439584732056, "learning_rate": 9.594543423932372e-06, "loss": 0.0185, "step": 51200 }, { "epoch": 0.43241645732621226, "grad_norm": 0.9971423149108887, "learning_rate": 9.594252696017445e-06, "loss": 0.0164, "step": 51210 }, { "epoch": 0.43250089717337614, "grad_norm": 0.8658219575881958, "learning_rate": 9.593961868316292e-06, "loss": 0.0155, "step": 51220 }, { "epoch": 0.43258533702054, "grad_norm": 0.4226808547973633, "learning_rate": 9.593670940835226e-06, "loss": 0.0188, "step": 51230 }, { "epoch": 0.43266977686770386, "grad_norm": 0.4436056613922119, "learning_rate": 9.593379913580571e-06, "loss": 0.0173, "step": 51240 }, { "epoch": 0.43275421671486775, "grad_norm": 0.8491525053977966, "learning_rate": 9.593088786558646e-06, "loss": 0.0111, "step": 51250 }, { "epoch": 0.43283865656203163, "grad_norm": 1.0429069995880127, "learning_rate": 9.592797559775773e-06, "loss": 0.0206, "step": 51260 }, { "epoch": 0.4329230964091955, "grad_norm": 0.14580771327018738, "learning_rate": 9.592506233238279e-06, "loss": 0.007, "step": 51270 }, { "epoch": 0.43300753625635935, "grad_norm": 0.5887890458106995, "learning_rate": 9.59221480695249e-06, "loss": 0.017, "step": 51280 }, { "epoch": 0.43309197610352324, "grad_norm": 0.17185203731060028, "learning_rate": 9.591923280924735e-06, "loss": 0.0148, "step": 51290 }, { "epoch": 0.4331764159506871, "grad_norm": 0.6980109810829163, "learning_rate": 9.591631655161348e-06, "loss": 0.0163, "step": 51300 }, { "epoch": 0.433260855797851, "grad_norm": 0.45745736360549927, "learning_rate": 9.591339929668664e-06, "loss": 0.0227, "step": 51310 }, { "epoch": 0.4333452956450149, "grad_norm": 0.3359411358833313, "learning_rate": 9.591048104453016e-06, "loss": 0.0144, "step": 51320 }, { "epoch": 0.4334297354921788, "grad_norm": 0.626381516456604, "learning_rate": 9.590756179520745e-06, "loss": 0.0188, "step": 51330 }, { "epoch": 0.4335141753393426, "grad_norm": 0.19750158488750458, "learning_rate": 9.59046415487819e-06, "loss": 0.0135, "step": 51340 }, { "epoch": 0.4335986151865065, "grad_norm": 0.4410412013530731, "learning_rate": 9.590172030531694e-06, "loss": 0.0128, "step": 51350 }, { "epoch": 0.4336830550336704, "grad_norm": 0.6686597466468811, "learning_rate": 9.589879806487603e-06, "loss": 0.0163, "step": 51360 }, { "epoch": 0.43376749488083427, "grad_norm": 0.6181614398956299, "learning_rate": 9.589587482752262e-06, "loss": 0.0188, "step": 51370 }, { "epoch": 0.43385193472799816, "grad_norm": 0.21123600006103516, "learning_rate": 9.589295059332021e-06, "loss": 0.0178, "step": 51380 }, { "epoch": 0.43393637457516204, "grad_norm": 0.16850920021533966, "learning_rate": 9.589002536233233e-06, "loss": 0.0204, "step": 51390 }, { "epoch": 0.4340208144223259, "grad_norm": 0.35341447591781616, "learning_rate": 9.588709913462251e-06, "loss": 0.0154, "step": 51400 }, { "epoch": 0.43410525426948976, "grad_norm": 0.3133808970451355, "learning_rate": 9.588417191025428e-06, "loss": 0.0124, "step": 51410 }, { "epoch": 0.43418969411665365, "grad_norm": 0.3500630855560303, "learning_rate": 9.588124368929125e-06, "loss": 0.0134, "step": 51420 }, { "epoch": 0.43427413396381753, "grad_norm": 0.9349406957626343, "learning_rate": 9.5878314471797e-06, "loss": 0.0112, "step": 51430 }, { "epoch": 0.4343585738109814, "grad_norm": 0.2886989414691925, "learning_rate": 9.587538425783517e-06, "loss": 0.0124, "step": 51440 }, { "epoch": 0.4344430136581453, "grad_norm": 0.371674507856369, "learning_rate": 9.587245304746939e-06, "loss": 0.013, "step": 51450 }, { "epoch": 0.43452745350530914, "grad_norm": 1.173740267753601, "learning_rate": 9.586952084076333e-06, "loss": 0.0227, "step": 51460 }, { "epoch": 0.434611893352473, "grad_norm": 0.16525855660438538, "learning_rate": 9.586658763778068e-06, "loss": 0.0103, "step": 51470 }, { "epoch": 0.4346963331996369, "grad_norm": 0.5816681385040283, "learning_rate": 9.586365343858515e-06, "loss": 0.0249, "step": 51480 }, { "epoch": 0.4347807730468008, "grad_norm": 0.24647270143032074, "learning_rate": 9.586071824324045e-06, "loss": 0.0212, "step": 51490 }, { "epoch": 0.4348652128939647, "grad_norm": 0.30879607796669006, "learning_rate": 9.585778205181037e-06, "loss": 0.0142, "step": 51500 }, { "epoch": 0.4349496527411285, "grad_norm": 0.11110838502645493, "learning_rate": 9.585484486435864e-06, "loss": 0.008, "step": 51510 }, { "epoch": 0.4350340925882924, "grad_norm": 0.8156601190567017, "learning_rate": 9.58519066809491e-06, "loss": 0.0161, "step": 51520 }, { "epoch": 0.4351185324354563, "grad_norm": 0.1344907134771347, "learning_rate": 9.584896750164551e-06, "loss": 0.0119, "step": 51530 }, { "epoch": 0.43520297228262017, "grad_norm": 0.31005311012268066, "learning_rate": 9.584602732651177e-06, "loss": 0.0226, "step": 51540 }, { "epoch": 0.43528741212978406, "grad_norm": 0.3823949992656708, "learning_rate": 9.58430861556117e-06, "loss": 0.0165, "step": 51550 }, { "epoch": 0.43537185197694794, "grad_norm": 0.3609740436077118, "learning_rate": 9.584014398900919e-06, "loss": 0.0063, "step": 51560 }, { "epoch": 0.4354562918241118, "grad_norm": 0.2749216556549072, "learning_rate": 9.583720082676815e-06, "loss": 0.0133, "step": 51570 }, { "epoch": 0.43554073167127566, "grad_norm": 1.0838080644607544, "learning_rate": 9.58342566689525e-06, "loss": 0.015, "step": 51580 }, { "epoch": 0.43562517151843955, "grad_norm": 0.4603908061981201, "learning_rate": 9.583131151562619e-06, "loss": 0.0102, "step": 51590 }, { "epoch": 0.43570961136560343, "grad_norm": 0.28520578145980835, "learning_rate": 9.582836536685318e-06, "loss": 0.0331, "step": 51600 }, { "epoch": 0.4357940512127673, "grad_norm": 0.690269947052002, "learning_rate": 9.582541822269747e-06, "loss": 0.0204, "step": 51610 }, { "epoch": 0.4358784910599312, "grad_norm": 0.6217715740203857, "learning_rate": 9.582247008322307e-06, "loss": 0.0227, "step": 51620 }, { "epoch": 0.43596293090709504, "grad_norm": 0.3925186097621918, "learning_rate": 9.5819520948494e-06, "loss": 0.0108, "step": 51630 }, { "epoch": 0.4360473707542589, "grad_norm": 0.9215199947357178, "learning_rate": 9.581657081857433e-06, "loss": 0.0197, "step": 51640 }, { "epoch": 0.4361318106014228, "grad_norm": 0.9222204685211182, "learning_rate": 9.581361969352812e-06, "loss": 0.0179, "step": 51650 }, { "epoch": 0.4362162504485867, "grad_norm": 0.06893075257539749, "learning_rate": 9.581066757341949e-06, "loss": 0.0161, "step": 51660 }, { "epoch": 0.4363006902957506, "grad_norm": 0.7687744498252869, "learning_rate": 9.580771445831254e-06, "loss": 0.0223, "step": 51670 }, { "epoch": 0.43638513014291447, "grad_norm": 0.32757633924484253, "learning_rate": 9.580476034827144e-06, "loss": 0.0115, "step": 51680 }, { "epoch": 0.4364695699900783, "grad_norm": 0.26581671833992004, "learning_rate": 9.580180524336031e-06, "loss": 0.0088, "step": 51690 }, { "epoch": 0.4365540098372422, "grad_norm": 0.1892591118812561, "learning_rate": 9.579884914364337e-06, "loss": 0.0108, "step": 51700 }, { "epoch": 0.43663844968440607, "grad_norm": 0.006793709006160498, "learning_rate": 9.57958920491848e-06, "loss": 0.009, "step": 51710 }, { "epoch": 0.43672288953156996, "grad_norm": 0.4978507161140442, "learning_rate": 9.579293396004883e-06, "loss": 0.01, "step": 51720 }, { "epoch": 0.43680732937873384, "grad_norm": 0.8638880848884583, "learning_rate": 9.578997487629974e-06, "loss": 0.021, "step": 51730 }, { "epoch": 0.4368917692258977, "grad_norm": 0.5832643508911133, "learning_rate": 9.578701479800176e-06, "loss": 0.0149, "step": 51740 }, { "epoch": 0.43697620907306156, "grad_norm": 0.2519225776195526, "learning_rate": 9.578405372521922e-06, "loss": 0.0081, "step": 51750 }, { "epoch": 0.43706064892022545, "grad_norm": 0.09961698204278946, "learning_rate": 9.578109165801641e-06, "loss": 0.0107, "step": 51760 }, { "epoch": 0.43714508876738933, "grad_norm": 0.5224193930625916, "learning_rate": 9.577812859645767e-06, "loss": 0.0107, "step": 51770 }, { "epoch": 0.4372295286145532, "grad_norm": 0.6201652884483337, "learning_rate": 9.577516454060734e-06, "loss": 0.0118, "step": 51780 }, { "epoch": 0.4373139684617171, "grad_norm": 0.6402381062507629, "learning_rate": 9.577219949052984e-06, "loss": 0.0158, "step": 51790 }, { "epoch": 0.43739840830888094, "grad_norm": 0.753341794013977, "learning_rate": 9.576923344628954e-06, "loss": 0.0121, "step": 51800 }, { "epoch": 0.4374828481560448, "grad_norm": 0.5032881498336792, "learning_rate": 9.576626640795085e-06, "loss": 0.0105, "step": 51810 }, { "epoch": 0.4375672880032087, "grad_norm": 0.12315394729375839, "learning_rate": 9.576329837557826e-06, "loss": 0.0132, "step": 51820 }, { "epoch": 0.4376517278503726, "grad_norm": 0.24482257664203644, "learning_rate": 9.57603293492362e-06, "loss": 0.0153, "step": 51830 }, { "epoch": 0.4377361676975365, "grad_norm": 1.1607234477996826, "learning_rate": 9.575735932898915e-06, "loss": 0.0167, "step": 51840 }, { "epoch": 0.43782060754470037, "grad_norm": 0.09787261486053467, "learning_rate": 9.575438831490164e-06, "loss": 0.0169, "step": 51850 }, { "epoch": 0.4379050473918642, "grad_norm": 0.1272408664226532, "learning_rate": 9.575141630703819e-06, "loss": 0.0133, "step": 51860 }, { "epoch": 0.4379894872390281, "grad_norm": 0.6511638760566711, "learning_rate": 9.574844330546336e-06, "loss": 0.0243, "step": 51870 }, { "epoch": 0.43807392708619197, "grad_norm": 0.23347246646881104, "learning_rate": 9.57454693102417e-06, "loss": 0.0088, "step": 51880 }, { "epoch": 0.43815836693335586, "grad_norm": 0.17777468264102936, "learning_rate": 9.574249432143785e-06, "loss": 0.0135, "step": 51890 }, { "epoch": 0.43824280678051974, "grad_norm": 0.40205517411231995, "learning_rate": 9.573951833911639e-06, "loss": 0.0136, "step": 51900 }, { "epoch": 0.43832724662768363, "grad_norm": 0.13004598021507263, "learning_rate": 9.573654136334193e-06, "loss": 0.0111, "step": 51910 }, { "epoch": 0.43841168647484746, "grad_norm": 0.45198875665664673, "learning_rate": 9.57335633941792e-06, "loss": 0.0143, "step": 51920 }, { "epoch": 0.43849612632201135, "grad_norm": 0.47342923283576965, "learning_rate": 9.573058443169283e-06, "loss": 0.013, "step": 51930 }, { "epoch": 0.43858056616917523, "grad_norm": 0.626474142074585, "learning_rate": 9.572760447594755e-06, "loss": 0.0235, "step": 51940 }, { "epoch": 0.4386650060163391, "grad_norm": 0.5672516226768494, "learning_rate": 9.572462352700807e-06, "loss": 0.0137, "step": 51950 }, { "epoch": 0.438749445863503, "grad_norm": 0.6571836471557617, "learning_rate": 9.572164158493911e-06, "loss": 0.0154, "step": 51960 }, { "epoch": 0.43883388571066684, "grad_norm": 0.3570147156715393, "learning_rate": 9.57186586498055e-06, "loss": 0.0139, "step": 51970 }, { "epoch": 0.4389183255578307, "grad_norm": 0.6521725654602051, "learning_rate": 9.571567472167196e-06, "loss": 0.0169, "step": 51980 }, { "epoch": 0.4390027654049946, "grad_norm": 0.3731040060520172, "learning_rate": 9.571268980060336e-06, "loss": 0.0148, "step": 51990 }, { "epoch": 0.4390872052521585, "grad_norm": 0.6246458888053894, "learning_rate": 9.570970388666448e-06, "loss": 0.0186, "step": 52000 }, { "epoch": 0.4391716450993224, "grad_norm": 0.11484195291996002, "learning_rate": 9.570671697992022e-06, "loss": 0.0149, "step": 52010 }, { "epoch": 0.43925608494648627, "grad_norm": 0.3104894161224365, "learning_rate": 9.570372908043542e-06, "loss": 0.0077, "step": 52020 }, { "epoch": 0.4393405247936501, "grad_norm": 0.32415324449539185, "learning_rate": 9.5700740188275e-06, "loss": 0.0137, "step": 52030 }, { "epoch": 0.439424964640814, "grad_norm": 0.4863685369491577, "learning_rate": 9.569775030350386e-06, "loss": 0.0157, "step": 52040 }, { "epoch": 0.4395094044879779, "grad_norm": 0.4697245657444, "learning_rate": 9.569475942618696e-06, "loss": 0.0174, "step": 52050 }, { "epoch": 0.43959384433514176, "grad_norm": 0.5670286417007446, "learning_rate": 9.569176755638923e-06, "loss": 0.0188, "step": 52060 }, { "epoch": 0.43967828418230565, "grad_norm": 0.20428049564361572, "learning_rate": 9.568877469417568e-06, "loss": 0.0201, "step": 52070 }, { "epoch": 0.43976272402946953, "grad_norm": 0.267017126083374, "learning_rate": 9.568578083961131e-06, "loss": 0.0117, "step": 52080 }, { "epoch": 0.43984716387663336, "grad_norm": 0.5793972611427307, "learning_rate": 9.568278599276114e-06, "loss": 0.0147, "step": 52090 }, { "epoch": 0.43993160372379725, "grad_norm": 0.594512403011322, "learning_rate": 9.567979015369023e-06, "loss": 0.0261, "step": 52100 }, { "epoch": 0.44001604357096114, "grad_norm": 0.07950147986412048, "learning_rate": 9.567679332246362e-06, "loss": 0.0162, "step": 52110 }, { "epoch": 0.440100483418125, "grad_norm": 0.2366737574338913, "learning_rate": 9.567379549914642e-06, "loss": 0.0176, "step": 52120 }, { "epoch": 0.4401849232652889, "grad_norm": 0.5973590016365051, "learning_rate": 9.567079668380374e-06, "loss": 0.0145, "step": 52130 }, { "epoch": 0.4402693631124528, "grad_norm": 0.486648827791214, "learning_rate": 9.566779687650073e-06, "loss": 0.0122, "step": 52140 }, { "epoch": 0.4403538029596166, "grad_norm": 0.31175458431243896, "learning_rate": 9.566479607730253e-06, "loss": 0.0125, "step": 52150 }, { "epoch": 0.4404382428067805, "grad_norm": 0.13925164937973022, "learning_rate": 9.56617942862743e-06, "loss": 0.016, "step": 52160 }, { "epoch": 0.4405226826539444, "grad_norm": 0.7558507323265076, "learning_rate": 9.565879150348128e-06, "loss": 0.0128, "step": 52170 }, { "epoch": 0.4406071225011083, "grad_norm": 0.18041889369487762, "learning_rate": 9.565578772898865e-06, "loss": 0.0106, "step": 52180 }, { "epoch": 0.44069156234827217, "grad_norm": 0.28571441769599915, "learning_rate": 9.565278296286166e-06, "loss": 0.0147, "step": 52190 }, { "epoch": 0.440776002195436, "grad_norm": 0.3205094337463379, "learning_rate": 9.564977720516558e-06, "loss": 0.0168, "step": 52200 }, { "epoch": 0.4408604420425999, "grad_norm": 0.4474185109138489, "learning_rate": 9.56467704559657e-06, "loss": 0.0204, "step": 52210 }, { "epoch": 0.4409448818897638, "grad_norm": 0.4948882460594177, "learning_rate": 9.564376271532732e-06, "loss": 0.0199, "step": 52220 }, { "epoch": 0.44102932173692766, "grad_norm": 0.3615049719810486, "learning_rate": 9.564075398331577e-06, "loss": 0.0093, "step": 52230 }, { "epoch": 0.44111376158409155, "grad_norm": 0.5273950099945068, "learning_rate": 9.56377442599964e-06, "loss": 0.0106, "step": 52240 }, { "epoch": 0.44119820143125543, "grad_norm": 0.17779292166233063, "learning_rate": 9.563473354543455e-06, "loss": 0.011, "step": 52250 }, { "epoch": 0.44128264127841926, "grad_norm": 0.26461055874824524, "learning_rate": 9.563172183969566e-06, "loss": 0.0152, "step": 52260 }, { "epoch": 0.44136708112558315, "grad_norm": 0.30386731028556824, "learning_rate": 9.562870914284513e-06, "loss": 0.0124, "step": 52270 }, { "epoch": 0.44145152097274704, "grad_norm": 0.3268604874610901, "learning_rate": 9.562569545494838e-06, "loss": 0.0156, "step": 52280 }, { "epoch": 0.4415359608199109, "grad_norm": 0.4366362690925598, "learning_rate": 9.562268077607087e-06, "loss": 0.0118, "step": 52290 }, { "epoch": 0.4416204006670748, "grad_norm": 0.4807400703430176, "learning_rate": 9.561966510627808e-06, "loss": 0.0157, "step": 52300 }, { "epoch": 0.4417048405142387, "grad_norm": 0.35326918959617615, "learning_rate": 9.561664844563552e-06, "loss": 0.0123, "step": 52310 }, { "epoch": 0.4417892803614025, "grad_norm": 0.10650557279586792, "learning_rate": 9.561363079420869e-06, "loss": 0.0109, "step": 52320 }, { "epoch": 0.4418737202085664, "grad_norm": 0.25616663694381714, "learning_rate": 9.561061215206317e-06, "loss": 0.0106, "step": 52330 }, { "epoch": 0.4419581600557303, "grad_norm": 0.49107879400253296, "learning_rate": 9.560759251926448e-06, "loss": 0.0104, "step": 52340 }, { "epoch": 0.4420425999028942, "grad_norm": 0.25520849227905273, "learning_rate": 9.560457189587824e-06, "loss": 0.0234, "step": 52350 }, { "epoch": 0.44212703975005807, "grad_norm": 0.2968113422393799, "learning_rate": 9.560155028197004e-06, "loss": 0.0123, "step": 52360 }, { "epoch": 0.4422114795972219, "grad_norm": 0.41964006423950195, "learning_rate": 9.55985276776055e-06, "loss": 0.012, "step": 52370 }, { "epoch": 0.4422959194443858, "grad_norm": 0.4320264160633087, "learning_rate": 9.55955040828503e-06, "loss": 0.0182, "step": 52380 }, { "epoch": 0.4423803592915497, "grad_norm": 0.46236932277679443, "learning_rate": 9.559247949777009e-06, "loss": 0.011, "step": 52390 }, { "epoch": 0.44246479913871356, "grad_norm": 0.5101226568222046, "learning_rate": 9.558945392243056e-06, "loss": 0.0156, "step": 52400 }, { "epoch": 0.44254923898587745, "grad_norm": 0.8985642194747925, "learning_rate": 9.558642735689743e-06, "loss": 0.0225, "step": 52410 }, { "epoch": 0.44263367883304133, "grad_norm": 0.42039215564727783, "learning_rate": 9.558339980123646e-06, "loss": 0.0145, "step": 52420 }, { "epoch": 0.44271811868020516, "grad_norm": 0.08728785067796707, "learning_rate": 9.558037125551337e-06, "loss": 0.0133, "step": 52430 }, { "epoch": 0.44280255852736905, "grad_norm": 0.4652606248855591, "learning_rate": 9.557734171979396e-06, "loss": 0.0118, "step": 52440 }, { "epoch": 0.44288699837453294, "grad_norm": 0.8809570670127869, "learning_rate": 9.557431119414402e-06, "loss": 0.0125, "step": 52450 }, { "epoch": 0.4429714382216968, "grad_norm": 0.10261079668998718, "learning_rate": 9.55712796786294e-06, "loss": 0.0171, "step": 52460 }, { "epoch": 0.4430558780688607, "grad_norm": 0.36345937848091125, "learning_rate": 9.556824717331592e-06, "loss": 0.019, "step": 52470 }, { "epoch": 0.4431403179160246, "grad_norm": 0.19242483377456665, "learning_rate": 9.556521367826944e-06, "loss": 0.0114, "step": 52480 }, { "epoch": 0.4432247577631884, "grad_norm": 0.6459448337554932, "learning_rate": 9.556217919355585e-06, "loss": 0.0197, "step": 52490 }, { "epoch": 0.4433091976103523, "grad_norm": 0.4364336133003235, "learning_rate": 9.555914371924106e-06, "loss": 0.0127, "step": 52500 }, { "epoch": 0.4433936374575162, "grad_norm": 0.47939789295196533, "learning_rate": 9.555610725539103e-06, "loss": 0.0192, "step": 52510 }, { "epoch": 0.4434780773046801, "grad_norm": 0.4873110353946686, "learning_rate": 9.555306980207167e-06, "loss": 0.0145, "step": 52520 }, { "epoch": 0.44356251715184397, "grad_norm": 0.22543373703956604, "learning_rate": 9.555003135934897e-06, "loss": 0.0193, "step": 52530 }, { "epoch": 0.44364695699900786, "grad_norm": 0.7219192981719971, "learning_rate": 9.554699192728894e-06, "loss": 0.0124, "step": 52540 }, { "epoch": 0.4437313968461717, "grad_norm": 1.082777738571167, "learning_rate": 9.554395150595754e-06, "loss": 0.0107, "step": 52550 }, { "epoch": 0.4438158366933356, "grad_norm": 0.8114920854568481, "learning_rate": 9.554091009542088e-06, "loss": 0.0173, "step": 52560 }, { "epoch": 0.44390027654049946, "grad_norm": 0.45094963908195496, "learning_rate": 9.553786769574498e-06, "loss": 0.0102, "step": 52570 }, { "epoch": 0.44398471638766335, "grad_norm": 0.2645946145057678, "learning_rate": 9.553482430699593e-06, "loss": 0.0083, "step": 52580 }, { "epoch": 0.44406915623482723, "grad_norm": 0.30220574140548706, "learning_rate": 9.55317799292398e-06, "loss": 0.0127, "step": 52590 }, { "epoch": 0.44415359608199106, "grad_norm": 0.28254207968711853, "learning_rate": 9.552873456254276e-06, "loss": 0.0191, "step": 52600 }, { "epoch": 0.44423803592915495, "grad_norm": 0.8489605188369751, "learning_rate": 9.552568820697092e-06, "loss": 0.0268, "step": 52610 }, { "epoch": 0.44432247577631884, "grad_norm": 0.09339063614606857, "learning_rate": 9.552264086259048e-06, "loss": 0.0159, "step": 52620 }, { "epoch": 0.4444069156234827, "grad_norm": 0.31275925040245056, "learning_rate": 9.551959252946761e-06, "loss": 0.008, "step": 52630 }, { "epoch": 0.4444913554706466, "grad_norm": 0.62803715467453, "learning_rate": 9.55165432076685e-06, "loss": 0.0131, "step": 52640 }, { "epoch": 0.4445757953178105, "grad_norm": 0.25128376483917236, "learning_rate": 9.551349289725943e-06, "loss": 0.0095, "step": 52650 }, { "epoch": 0.4446602351649743, "grad_norm": 1.6297430992126465, "learning_rate": 9.551044159830658e-06, "loss": 0.0122, "step": 52660 }, { "epoch": 0.4447446750121382, "grad_norm": 0.29162728786468506, "learning_rate": 9.550738931087629e-06, "loss": 0.012, "step": 52670 }, { "epoch": 0.4448291148593021, "grad_norm": 0.4901353120803833, "learning_rate": 9.550433603503482e-06, "loss": 0.0161, "step": 52680 }, { "epoch": 0.444913554706466, "grad_norm": 0.3644351363182068, "learning_rate": 9.55012817708485e-06, "loss": 0.0108, "step": 52690 }, { "epoch": 0.44499799455362987, "grad_norm": 0.8469729423522949, "learning_rate": 9.549822651838367e-06, "loss": 0.0196, "step": 52700 }, { "epoch": 0.44508243440079376, "grad_norm": 0.6586781740188599, "learning_rate": 9.549517027770667e-06, "loss": 0.01, "step": 52710 }, { "epoch": 0.4451668742479576, "grad_norm": 0.4151666760444641, "learning_rate": 9.549211304888391e-06, "loss": 0.0153, "step": 52720 }, { "epoch": 0.4452513140951215, "grad_norm": 0.42379525303840637, "learning_rate": 9.548905483198176e-06, "loss": 0.0104, "step": 52730 }, { "epoch": 0.44533575394228536, "grad_norm": 0.7172935605049133, "learning_rate": 9.548599562706666e-06, "loss": 0.0077, "step": 52740 }, { "epoch": 0.44542019378944925, "grad_norm": 0.6597310304641724, "learning_rate": 9.548293543420508e-06, "loss": 0.0106, "step": 52750 }, { "epoch": 0.44550463363661313, "grad_norm": 0.5554659366607666, "learning_rate": 9.547987425346344e-06, "loss": 0.011, "step": 52760 }, { "epoch": 0.445589073483777, "grad_norm": 0.47120577096939087, "learning_rate": 9.547681208490827e-06, "loss": 0.0151, "step": 52770 }, { "epoch": 0.44567351333094085, "grad_norm": 0.394944429397583, "learning_rate": 9.547374892860604e-06, "loss": 0.0112, "step": 52780 }, { "epoch": 0.44575795317810474, "grad_norm": 0.1812431812286377, "learning_rate": 9.547068478462332e-06, "loss": 0.0205, "step": 52790 }, { "epoch": 0.4458423930252686, "grad_norm": 0.541191577911377, "learning_rate": 9.546761965302662e-06, "loss": 0.0182, "step": 52800 }, { "epoch": 0.4459268328724325, "grad_norm": 0.5206982493400574, "learning_rate": 9.546455353388255e-06, "loss": 0.0102, "step": 52810 }, { "epoch": 0.4460112727195964, "grad_norm": 0.19832757115364075, "learning_rate": 9.546148642725771e-06, "loss": 0.014, "step": 52820 }, { "epoch": 0.4460957125667602, "grad_norm": 0.398192822933197, "learning_rate": 9.545841833321869e-06, "loss": 0.0087, "step": 52830 }, { "epoch": 0.4461801524139241, "grad_norm": 0.2291734665632248, "learning_rate": 9.545534925183212e-06, "loss": 0.0169, "step": 52840 }, { "epoch": 0.446264592261088, "grad_norm": 0.5482807159423828, "learning_rate": 9.54522791831647e-06, "loss": 0.0177, "step": 52850 }, { "epoch": 0.4463490321082519, "grad_norm": 0.29115018248558044, "learning_rate": 9.544920812728308e-06, "loss": 0.0082, "step": 52860 }, { "epoch": 0.4464334719554158, "grad_norm": 0.28393444418907166, "learning_rate": 9.5446136084254e-06, "loss": 0.0183, "step": 52870 }, { "epoch": 0.44651791180257966, "grad_norm": 0.17520521581172943, "learning_rate": 9.544306305414413e-06, "loss": 0.0175, "step": 52880 }, { "epoch": 0.4466023516497435, "grad_norm": 0.7657309174537659, "learning_rate": 9.543998903702025e-06, "loss": 0.0158, "step": 52890 }, { "epoch": 0.4466867914969074, "grad_norm": 0.15692898631095886, "learning_rate": 9.543691403294912e-06, "loss": 0.0114, "step": 52900 }, { "epoch": 0.44677123134407126, "grad_norm": 0.493280827999115, "learning_rate": 9.543383804199752e-06, "loss": 0.0198, "step": 52910 }, { "epoch": 0.44685567119123515, "grad_norm": 0.41342419385910034, "learning_rate": 9.543076106423228e-06, "loss": 0.0121, "step": 52920 }, { "epoch": 0.44694011103839903, "grad_norm": 0.7744587659835815, "learning_rate": 9.54276830997202e-06, "loss": 0.0118, "step": 52930 }, { "epoch": 0.4470245508855629, "grad_norm": 0.34969383478164673, "learning_rate": 9.542460414852818e-06, "loss": 0.0143, "step": 52940 }, { "epoch": 0.44710899073272675, "grad_norm": 0.1941075474023819, "learning_rate": 9.542152421072306e-06, "loss": 0.0163, "step": 52950 }, { "epoch": 0.44719343057989064, "grad_norm": 0.2313636690378189, "learning_rate": 9.541844328637173e-06, "loss": 0.0229, "step": 52960 }, { "epoch": 0.4472778704270545, "grad_norm": 0.37799739837646484, "learning_rate": 9.541536137554112e-06, "loss": 0.012, "step": 52970 }, { "epoch": 0.4473623102742184, "grad_norm": 0.4689353406429291, "learning_rate": 9.541227847829817e-06, "loss": 0.0186, "step": 52980 }, { "epoch": 0.4474467501213823, "grad_norm": 0.28978413343429565, "learning_rate": 9.540919459470985e-06, "loss": 0.0108, "step": 52990 }, { "epoch": 0.4475311899685462, "grad_norm": 0.49074867367744446, "learning_rate": 9.540610972484309e-06, "loss": 0.0131, "step": 53000 }, { "epoch": 0.44761562981571, "grad_norm": 0.40925607085227966, "learning_rate": 9.540302386876498e-06, "loss": 0.0171, "step": 53010 }, { "epoch": 0.4477000696628739, "grad_norm": 0.24138887226581573, "learning_rate": 9.539993702654246e-06, "loss": 0.0158, "step": 53020 }, { "epoch": 0.4477845095100378, "grad_norm": 0.40264302492141724, "learning_rate": 9.539684919824264e-06, "loss": 0.0099, "step": 53030 }, { "epoch": 0.4478689493572017, "grad_norm": 0.38589558005332947, "learning_rate": 9.539376038393254e-06, "loss": 0.0203, "step": 53040 }, { "epoch": 0.44795338920436556, "grad_norm": 0.13314786553382874, "learning_rate": 9.539067058367927e-06, "loss": 0.012, "step": 53050 }, { "epoch": 0.4480378290515294, "grad_norm": 0.4489869177341461, "learning_rate": 9.538757979754994e-06, "loss": 0.0105, "step": 53060 }, { "epoch": 0.4481222688986933, "grad_norm": 0.08719722926616669, "learning_rate": 9.538448802561166e-06, "loss": 0.0107, "step": 53070 }, { "epoch": 0.44820670874585716, "grad_norm": 0.6386085748672485, "learning_rate": 9.538139526793162e-06, "loss": 0.0114, "step": 53080 }, { "epoch": 0.44829114859302105, "grad_norm": 0.5333084464073181, "learning_rate": 9.537830152457696e-06, "loss": 0.0148, "step": 53090 }, { "epoch": 0.44837558844018494, "grad_norm": 0.16185274720191956, "learning_rate": 9.537520679561489e-06, "loss": 0.0276, "step": 53100 }, { "epoch": 0.4484600282873488, "grad_norm": 0.13759444653987885, "learning_rate": 9.537211108111265e-06, "loss": 0.0156, "step": 53110 }, { "epoch": 0.44854446813451265, "grad_norm": 0.5207539200782776, "learning_rate": 9.536901438113744e-06, "loss": 0.0113, "step": 53120 }, { "epoch": 0.44862890798167654, "grad_norm": 0.638705313205719, "learning_rate": 9.536591669575653e-06, "loss": 0.0205, "step": 53130 }, { "epoch": 0.4487133478288404, "grad_norm": 0.10381348431110382, "learning_rate": 9.536281802503722e-06, "loss": 0.0118, "step": 53140 }, { "epoch": 0.4487977876760043, "grad_norm": 0.5962833762168884, "learning_rate": 9.535971836904678e-06, "loss": 0.0111, "step": 53150 }, { "epoch": 0.4488822275231682, "grad_norm": 0.28655174374580383, "learning_rate": 9.535661772785257e-06, "loss": 0.0161, "step": 53160 }, { "epoch": 0.4489666673703321, "grad_norm": 0.030457835644483566, "learning_rate": 9.53535161015219e-06, "loss": 0.0104, "step": 53170 }, { "epoch": 0.4490511072174959, "grad_norm": 0.1787354052066803, "learning_rate": 9.535041349012214e-06, "loss": 0.0179, "step": 53180 }, { "epoch": 0.4491355470646598, "grad_norm": 0.48331621289253235, "learning_rate": 9.534730989372073e-06, "loss": 0.0156, "step": 53190 }, { "epoch": 0.4492199869118237, "grad_norm": 0.27743032574653625, "learning_rate": 9.534420531238501e-06, "loss": 0.0145, "step": 53200 }, { "epoch": 0.4493044267589876, "grad_norm": 0.3233190178871155, "learning_rate": 9.534109974618248e-06, "loss": 0.0241, "step": 53210 }, { "epoch": 0.44938886660615146, "grad_norm": 0.6446808576583862, "learning_rate": 9.533799319518054e-06, "loss": 0.0139, "step": 53220 }, { "epoch": 0.44947330645331535, "grad_norm": 0.7719149589538574, "learning_rate": 9.533488565944666e-06, "loss": 0.0239, "step": 53230 }, { "epoch": 0.4495577463004792, "grad_norm": 0.7260241508483887, "learning_rate": 9.533177713904836e-06, "loss": 0.0103, "step": 53240 }, { "epoch": 0.44964218614764306, "grad_norm": 0.30655065178871155, "learning_rate": 9.532866763405315e-06, "loss": 0.0139, "step": 53250 }, { "epoch": 0.44972662599480695, "grad_norm": 0.6409379839897156, "learning_rate": 9.532555714452858e-06, "loss": 0.0145, "step": 53260 }, { "epoch": 0.44981106584197084, "grad_norm": 0.4243273437023163, "learning_rate": 9.532244567054218e-06, "loss": 0.0191, "step": 53270 }, { "epoch": 0.4498955056891347, "grad_norm": 0.8083030581474304, "learning_rate": 9.531933321216155e-06, "loss": 0.0152, "step": 53280 }, { "epoch": 0.44997994553629855, "grad_norm": 0.3520828187465668, "learning_rate": 9.53162197694543e-06, "loss": 0.0147, "step": 53290 }, { "epoch": 0.45006438538346244, "grad_norm": 0.10798392444849014, "learning_rate": 9.531310534248805e-06, "loss": 0.0172, "step": 53300 }, { "epoch": 0.4501488252306263, "grad_norm": 0.4338356554508209, "learning_rate": 9.530998993133043e-06, "loss": 0.0062, "step": 53310 }, { "epoch": 0.4502332650777902, "grad_norm": 0.44079214334487915, "learning_rate": 9.53068735360491e-06, "loss": 0.0153, "step": 53320 }, { "epoch": 0.4503177049249541, "grad_norm": 0.8767895698547363, "learning_rate": 9.530375615671177e-06, "loss": 0.0199, "step": 53330 }, { "epoch": 0.450402144772118, "grad_norm": 0.8063251376152039, "learning_rate": 9.530063779338614e-06, "loss": 0.0177, "step": 53340 }, { "epoch": 0.4504865846192818, "grad_norm": 0.26660218834877014, "learning_rate": 9.529751844613995e-06, "loss": 0.0152, "step": 53350 }, { "epoch": 0.4505710244664457, "grad_norm": 0.3995330035686493, "learning_rate": 9.529439811504093e-06, "loss": 0.0175, "step": 53360 }, { "epoch": 0.4506554643136096, "grad_norm": 0.22629328072071075, "learning_rate": 9.529127680015686e-06, "loss": 0.0096, "step": 53370 }, { "epoch": 0.4507399041607735, "grad_norm": 0.5139350891113281, "learning_rate": 9.528815450155555e-06, "loss": 0.0134, "step": 53380 }, { "epoch": 0.45082434400793736, "grad_norm": 0.330234169960022, "learning_rate": 9.528503121930481e-06, "loss": 0.0099, "step": 53390 }, { "epoch": 0.45090878385510125, "grad_norm": 0.7055093050003052, "learning_rate": 9.528190695347247e-06, "loss": 0.0173, "step": 53400 }, { "epoch": 0.4509932237022651, "grad_norm": 0.27585169672966003, "learning_rate": 9.527878170412636e-06, "loss": 0.0153, "step": 53410 }, { "epoch": 0.45107766354942896, "grad_norm": 0.1638176292181015, "learning_rate": 9.527565547133442e-06, "loss": 0.0156, "step": 53420 }, { "epoch": 0.45116210339659285, "grad_norm": 0.6440040469169617, "learning_rate": 9.52725282551645e-06, "loss": 0.0147, "step": 53430 }, { "epoch": 0.45124654324375674, "grad_norm": 0.3462508022785187, "learning_rate": 9.526940005568457e-06, "loss": 0.0272, "step": 53440 }, { "epoch": 0.4513309830909206, "grad_norm": 0.6742969155311584, "learning_rate": 9.526627087296254e-06, "loss": 0.0093, "step": 53450 }, { "epoch": 0.4514154229380845, "grad_norm": 0.19977833330631256, "learning_rate": 9.526314070706637e-06, "loss": 0.0116, "step": 53460 }, { "epoch": 0.45149986278524834, "grad_norm": 0.3265783190727234, "learning_rate": 9.526000955806408e-06, "loss": 0.0153, "step": 53470 }, { "epoch": 0.4515843026324122, "grad_norm": 0.2802448868751526, "learning_rate": 9.525687742602362e-06, "loss": 0.0083, "step": 53480 }, { "epoch": 0.4516687424795761, "grad_norm": 0.23834525048732758, "learning_rate": 9.525374431101309e-06, "loss": 0.0118, "step": 53490 }, { "epoch": 0.45175318232674, "grad_norm": 0.686564564704895, "learning_rate": 9.52506102131005e-06, "loss": 0.0245, "step": 53500 }, { "epoch": 0.4518376221739039, "grad_norm": 0.7165586352348328, "learning_rate": 9.524747513235393e-06, "loss": 0.0157, "step": 53510 }, { "epoch": 0.4519220620210677, "grad_norm": 0.3682840168476105, "learning_rate": 9.524433906884146e-06, "loss": 0.0102, "step": 53520 }, { "epoch": 0.4520065018682316, "grad_norm": 0.2606180012226105, "learning_rate": 9.524120202263123e-06, "loss": 0.0131, "step": 53530 }, { "epoch": 0.4520909417153955, "grad_norm": 0.138619527220726, "learning_rate": 9.523806399379136e-06, "loss": 0.0085, "step": 53540 }, { "epoch": 0.4521753815625594, "grad_norm": 0.9238854646682739, "learning_rate": 9.523492498239e-06, "loss": 0.0121, "step": 53550 }, { "epoch": 0.45225982140972326, "grad_norm": 0.30370649695396423, "learning_rate": 9.523178498849535e-06, "loss": 0.0163, "step": 53560 }, { "epoch": 0.45234426125688715, "grad_norm": 0.43681031465530396, "learning_rate": 9.52286440121756e-06, "loss": 0.0127, "step": 53570 }, { "epoch": 0.452428701104051, "grad_norm": 0.19831345975399017, "learning_rate": 9.522550205349896e-06, "loss": 0.0112, "step": 53580 }, { "epoch": 0.45251314095121487, "grad_norm": 0.3073713779449463, "learning_rate": 9.522235911253368e-06, "loss": 0.0156, "step": 53590 }, { "epoch": 0.45259758079837875, "grad_norm": 0.2662659287452698, "learning_rate": 9.521921518934805e-06, "loss": 0.0129, "step": 53600 }, { "epoch": 0.45268202064554264, "grad_norm": 0.21525758504867554, "learning_rate": 9.521607028401031e-06, "loss": 0.0136, "step": 53610 }, { "epoch": 0.4527664604927065, "grad_norm": 0.14708212018013, "learning_rate": 9.52129243965888e-06, "loss": 0.013, "step": 53620 }, { "epoch": 0.4528509003398704, "grad_norm": 0.34192076325416565, "learning_rate": 9.520977752715184e-06, "loss": 0.0178, "step": 53630 }, { "epoch": 0.45293534018703424, "grad_norm": 0.4752205014228821, "learning_rate": 9.520662967576777e-06, "loss": 0.0161, "step": 53640 }, { "epoch": 0.4530197800341981, "grad_norm": 0.24976715445518494, "learning_rate": 9.520348084250497e-06, "loss": 0.0139, "step": 53650 }, { "epoch": 0.453104219881362, "grad_norm": 0.43111228942871094, "learning_rate": 9.520033102743183e-06, "loss": 0.017, "step": 53660 }, { "epoch": 0.4531886597285259, "grad_norm": 0.6113178133964539, "learning_rate": 9.519718023061675e-06, "loss": 0.0171, "step": 53670 }, { "epoch": 0.4532730995756898, "grad_norm": 0.22509583830833435, "learning_rate": 9.519402845212819e-06, "loss": 0.015, "step": 53680 }, { "epoch": 0.4533575394228536, "grad_norm": 0.8082652688026428, "learning_rate": 9.519087569203458e-06, "loss": 0.0137, "step": 53690 }, { "epoch": 0.4534419792700175, "grad_norm": 0.3484548032283783, "learning_rate": 9.518772195040442e-06, "loss": 0.0217, "step": 53700 }, { "epoch": 0.4535264191171814, "grad_norm": 0.6154754757881165, "learning_rate": 9.51845672273062e-06, "loss": 0.0205, "step": 53710 }, { "epoch": 0.4536108589643453, "grad_norm": 0.22873333096504211, "learning_rate": 9.518141152280844e-06, "loss": 0.013, "step": 53720 }, { "epoch": 0.45369529881150916, "grad_norm": 0.8366191387176514, "learning_rate": 9.517825483697969e-06, "loss": 0.0116, "step": 53730 }, { "epoch": 0.45377973865867305, "grad_norm": 1.0322149991989136, "learning_rate": 9.51750971698885e-06, "loss": 0.0154, "step": 53740 }, { "epoch": 0.4538641785058369, "grad_norm": 0.16196395456790924, "learning_rate": 9.517193852160343e-06, "loss": 0.0137, "step": 53750 }, { "epoch": 0.45394861835300077, "grad_norm": 0.2751491665840149, "learning_rate": 9.516877889219314e-06, "loss": 0.0118, "step": 53760 }, { "epoch": 0.45403305820016465, "grad_norm": 0.3193785548210144, "learning_rate": 9.516561828172622e-06, "loss": 0.0112, "step": 53770 }, { "epoch": 0.45411749804732854, "grad_norm": 0.33148789405822754, "learning_rate": 9.51624566902713e-06, "loss": 0.0123, "step": 53780 }, { "epoch": 0.4542019378944924, "grad_norm": 0.48560139536857605, "learning_rate": 9.515929411789712e-06, "loss": 0.0117, "step": 53790 }, { "epoch": 0.4542863777416563, "grad_norm": 0.15263445675373077, "learning_rate": 9.51561305646723e-06, "loss": 0.0201, "step": 53800 }, { "epoch": 0.45437081758882014, "grad_norm": 0.5557089447975159, "learning_rate": 9.51529660306656e-06, "loss": 0.0135, "step": 53810 }, { "epoch": 0.45445525743598403, "grad_norm": 0.4524369537830353, "learning_rate": 9.51498005159457e-06, "loss": 0.0147, "step": 53820 }, { "epoch": 0.4545396972831479, "grad_norm": 0.887183427810669, "learning_rate": 9.51466340205814e-06, "loss": 0.0144, "step": 53830 }, { "epoch": 0.4546241371303118, "grad_norm": 0.4244095981121063, "learning_rate": 9.514346654464146e-06, "loss": 0.0109, "step": 53840 }, { "epoch": 0.4547085769774757, "grad_norm": 0.5331766605377197, "learning_rate": 9.514029808819468e-06, "loss": 0.0092, "step": 53850 }, { "epoch": 0.4547930168246396, "grad_norm": 0.4168091118335724, "learning_rate": 9.513712865130986e-06, "loss": 0.0092, "step": 53860 }, { "epoch": 0.4548774566718034, "grad_norm": 0.5290350317955017, "learning_rate": 9.513395823405587e-06, "loss": 0.0129, "step": 53870 }, { "epoch": 0.4549618965189673, "grad_norm": 0.7209897041320801, "learning_rate": 9.513078683650157e-06, "loss": 0.0116, "step": 53880 }, { "epoch": 0.4550463363661312, "grad_norm": 0.45345401763916016, "learning_rate": 9.51276144587158e-06, "loss": 0.0136, "step": 53890 }, { "epoch": 0.45513077621329506, "grad_norm": 0.5358315706253052, "learning_rate": 9.51244411007675e-06, "loss": 0.007, "step": 53900 }, { "epoch": 0.45521521606045895, "grad_norm": 0.9402153491973877, "learning_rate": 9.51212667627256e-06, "loss": 0.018, "step": 53910 }, { "epoch": 0.4552996559076228, "grad_norm": 0.1036718413233757, "learning_rate": 9.511809144465901e-06, "loss": 0.0083, "step": 53920 }, { "epoch": 0.45538409575478667, "grad_norm": 0.6783795952796936, "learning_rate": 9.511491514663674e-06, "loss": 0.0216, "step": 53930 }, { "epoch": 0.45546853560195055, "grad_norm": 0.6147048473358154, "learning_rate": 9.511173786872775e-06, "loss": 0.0211, "step": 53940 }, { "epoch": 0.45555297544911444, "grad_norm": 0.27312469482421875, "learning_rate": 9.510855961100105e-06, "loss": 0.0182, "step": 53950 }, { "epoch": 0.4556374152962783, "grad_norm": 0.3080972731113434, "learning_rate": 9.51053803735257e-06, "loss": 0.0229, "step": 53960 }, { "epoch": 0.4557218551434422, "grad_norm": 0.6095848083496094, "learning_rate": 9.51022001563707e-06, "loss": 0.0135, "step": 53970 }, { "epoch": 0.45580629499060604, "grad_norm": 0.2946303188800812, "learning_rate": 9.509901895960518e-06, "loss": 0.0109, "step": 53980 }, { "epoch": 0.45589073483776993, "grad_norm": 0.506598174571991, "learning_rate": 9.509583678329818e-06, "loss": 0.024, "step": 53990 }, { "epoch": 0.4559751746849338, "grad_norm": 1.4204059839248657, "learning_rate": 9.50926536275189e-06, "loss": 0.014, "step": 54000 }, { "epoch": 0.4560596145320977, "grad_norm": 0.22403383255004883, "learning_rate": 9.508946949233638e-06, "loss": 0.0147, "step": 54010 }, { "epoch": 0.4561440543792616, "grad_norm": 0.7081162929534912, "learning_rate": 9.508628437781983e-06, "loss": 0.0256, "step": 54020 }, { "epoch": 0.4562284942264255, "grad_norm": 0.4291483461856842, "learning_rate": 9.508309828403843e-06, "loss": 0.0163, "step": 54030 }, { "epoch": 0.4563129340735893, "grad_norm": 0.5737635493278503, "learning_rate": 9.507991121106136e-06, "loss": 0.0117, "step": 54040 }, { "epoch": 0.4563973739207532, "grad_norm": 1.0135695934295654, "learning_rate": 9.507672315895787e-06, "loss": 0.016, "step": 54050 }, { "epoch": 0.4564818137679171, "grad_norm": 0.6021358370780945, "learning_rate": 9.507353412779719e-06, "loss": 0.0126, "step": 54060 }, { "epoch": 0.45656625361508096, "grad_norm": 0.44088688492774963, "learning_rate": 9.507034411764857e-06, "loss": 0.0133, "step": 54070 }, { "epoch": 0.45665069346224485, "grad_norm": 0.49175193905830383, "learning_rate": 9.506715312858132e-06, "loss": 0.015, "step": 54080 }, { "epoch": 0.45673513330940874, "grad_norm": 0.2162940800189972, "learning_rate": 9.506396116066475e-06, "loss": 0.008, "step": 54090 }, { "epoch": 0.45681957315657257, "grad_norm": 0.37714412808418274, "learning_rate": 9.506076821396817e-06, "loss": 0.011, "step": 54100 }, { "epoch": 0.45690401300373645, "grad_norm": 0.29140371084213257, "learning_rate": 9.505757428856095e-06, "loss": 0.017, "step": 54110 }, { "epoch": 0.45698845285090034, "grad_norm": 0.32032787799835205, "learning_rate": 9.505437938451243e-06, "loss": 0.0157, "step": 54120 }, { "epoch": 0.4570728926980642, "grad_norm": 0.24156807363033295, "learning_rate": 9.505118350189203e-06, "loss": 0.0074, "step": 54130 }, { "epoch": 0.4571573325452281, "grad_norm": 0.41469019651412964, "learning_rate": 9.504798664076916e-06, "loss": 0.0155, "step": 54140 }, { "epoch": 0.45724177239239194, "grad_norm": 0.35255125164985657, "learning_rate": 9.504478880121324e-06, "loss": 0.0128, "step": 54150 }, { "epoch": 0.45732621223955583, "grad_norm": 0.1262344866991043, "learning_rate": 9.504158998329374e-06, "loss": 0.0114, "step": 54160 }, { "epoch": 0.4574106520867197, "grad_norm": 0.3087804317474365, "learning_rate": 9.503839018708016e-06, "loss": 0.0121, "step": 54170 }, { "epoch": 0.4574950919338836, "grad_norm": 0.31940752267837524, "learning_rate": 9.503518941264194e-06, "loss": 0.0151, "step": 54180 }, { "epoch": 0.4575795317810475, "grad_norm": 0.812614381313324, "learning_rate": 9.503198766004866e-06, "loss": 0.0101, "step": 54190 }, { "epoch": 0.4576639716282114, "grad_norm": 0.657490074634552, "learning_rate": 9.502878492936982e-06, "loss": 0.0161, "step": 54200 }, { "epoch": 0.4577484114753752, "grad_norm": 0.3687148988246918, "learning_rate": 9.5025581220675e-06, "loss": 0.0121, "step": 54210 }, { "epoch": 0.4578328513225391, "grad_norm": 0.415181964635849, "learning_rate": 9.502237653403379e-06, "loss": 0.0108, "step": 54220 }, { "epoch": 0.457917291169703, "grad_norm": 0.38789641857147217, "learning_rate": 9.501917086951579e-06, "loss": 0.0172, "step": 54230 }, { "epoch": 0.45800173101686686, "grad_norm": 1.0396435260772705, "learning_rate": 9.501596422719061e-06, "loss": 0.0208, "step": 54240 }, { "epoch": 0.45808617086403075, "grad_norm": 0.6185749173164368, "learning_rate": 9.501275660712792e-06, "loss": 0.0217, "step": 54250 }, { "epoch": 0.45817061071119464, "grad_norm": 0.1751166135072708, "learning_rate": 9.500954800939739e-06, "loss": 0.0082, "step": 54260 }, { "epoch": 0.45825505055835847, "grad_norm": 0.3997519612312317, "learning_rate": 9.500633843406869e-06, "loss": 0.0171, "step": 54270 }, { "epoch": 0.45833949040552235, "grad_norm": 0.6793903708457947, "learning_rate": 9.500312788121154e-06, "loss": 0.0209, "step": 54280 }, { "epoch": 0.45842393025268624, "grad_norm": 0.3657231330871582, "learning_rate": 9.499991635089567e-06, "loss": 0.0148, "step": 54290 }, { "epoch": 0.4585083700998501, "grad_norm": 0.539832592010498, "learning_rate": 9.499670384319085e-06, "loss": 0.0174, "step": 54300 }, { "epoch": 0.458592809947014, "grad_norm": 0.29719236493110657, "learning_rate": 9.499349035816685e-06, "loss": 0.0098, "step": 54310 }, { "epoch": 0.4586772497941779, "grad_norm": 0.7660928964614868, "learning_rate": 9.499027589589345e-06, "loss": 0.0087, "step": 54320 }, { "epoch": 0.45876168964134173, "grad_norm": 0.4833326041698456, "learning_rate": 9.498706045644047e-06, "loss": 0.0108, "step": 54330 }, { "epoch": 0.4588461294885056, "grad_norm": 0.05649106577038765, "learning_rate": 9.498384403987777e-06, "loss": 0.0086, "step": 54340 }, { "epoch": 0.4589305693356695, "grad_norm": 0.17306658625602722, "learning_rate": 9.498062664627518e-06, "loss": 0.0235, "step": 54350 }, { "epoch": 0.4590150091828334, "grad_norm": 0.10257905721664429, "learning_rate": 9.49774082757026e-06, "loss": 0.0127, "step": 54360 }, { "epoch": 0.4590994490299973, "grad_norm": 0.5792535543441772, "learning_rate": 9.497418892822994e-06, "loss": 0.0164, "step": 54370 }, { "epoch": 0.4591838888771611, "grad_norm": 0.8661380410194397, "learning_rate": 9.49709686039271e-06, "loss": 0.0115, "step": 54380 }, { "epoch": 0.459268328724325, "grad_norm": 0.3481643795967102, "learning_rate": 9.496774730286404e-06, "loss": 0.0078, "step": 54390 }, { "epoch": 0.4593527685714889, "grad_norm": 0.7434557676315308, "learning_rate": 9.496452502511073e-06, "loss": 0.0151, "step": 54400 }, { "epoch": 0.45943720841865276, "grad_norm": 0.34190791845321655, "learning_rate": 9.496130177073714e-06, "loss": 0.0116, "step": 54410 }, { "epoch": 0.45952164826581665, "grad_norm": 0.5275778770446777, "learning_rate": 9.49580775398133e-06, "loss": 0.0137, "step": 54420 }, { "epoch": 0.45960608811298054, "grad_norm": 0.2588292360305786, "learning_rate": 9.495485233240922e-06, "loss": 0.0158, "step": 54430 }, { "epoch": 0.45969052796014437, "grad_norm": 0.7473177909851074, "learning_rate": 9.495162614859496e-06, "loss": 0.0102, "step": 54440 }, { "epoch": 0.45977496780730825, "grad_norm": 0.46130481362342834, "learning_rate": 9.49483989884406e-06, "loss": 0.0213, "step": 54450 }, { "epoch": 0.45985940765447214, "grad_norm": 0.4513324201107025, "learning_rate": 9.494517085201621e-06, "loss": 0.0116, "step": 54460 }, { "epoch": 0.459943847501636, "grad_norm": 0.20499886572360992, "learning_rate": 9.494194173939193e-06, "loss": 0.0151, "step": 54470 }, { "epoch": 0.4600282873487999, "grad_norm": 0.33764609694480896, "learning_rate": 9.493871165063789e-06, "loss": 0.016, "step": 54480 }, { "epoch": 0.4601127271959638, "grad_norm": 0.18464477360248566, "learning_rate": 9.493548058582423e-06, "loss": 0.0226, "step": 54490 }, { "epoch": 0.46019716704312763, "grad_norm": 0.2905460596084595, "learning_rate": 9.493224854502114e-06, "loss": 0.0181, "step": 54500 }, { "epoch": 0.4602816068902915, "grad_norm": 0.5830632448196411, "learning_rate": 9.492901552829883e-06, "loss": 0.019, "step": 54510 }, { "epoch": 0.4603660467374554, "grad_norm": 0.6222373843193054, "learning_rate": 9.49257815357275e-06, "loss": 0.011, "step": 54520 }, { "epoch": 0.4604504865846193, "grad_norm": 0.3413332402706146, "learning_rate": 9.49225465673774e-06, "loss": 0.0163, "step": 54530 }, { "epoch": 0.4605349264317832, "grad_norm": 0.6719583868980408, "learning_rate": 9.49193106233188e-06, "loss": 0.0136, "step": 54540 }, { "epoch": 0.46061936627894706, "grad_norm": 0.8010790944099426, "learning_rate": 9.4916073703622e-06, "loss": 0.0096, "step": 54550 }, { "epoch": 0.4607038061261109, "grad_norm": 0.22379976511001587, "learning_rate": 9.491283580835724e-06, "loss": 0.0102, "step": 54560 }, { "epoch": 0.4607882459732748, "grad_norm": 0.5056582093238831, "learning_rate": 9.490959693759492e-06, "loss": 0.0113, "step": 54570 }, { "epoch": 0.46087268582043867, "grad_norm": 0.46540939807891846, "learning_rate": 9.490635709140535e-06, "loss": 0.0152, "step": 54580 }, { "epoch": 0.46095712566760255, "grad_norm": 0.44379258155822754, "learning_rate": 9.49031162698589e-06, "loss": 0.0162, "step": 54590 }, { "epoch": 0.46104156551476644, "grad_norm": 0.3955187499523163, "learning_rate": 9.489987447302597e-06, "loss": 0.0192, "step": 54600 }, { "epoch": 0.46112600536193027, "grad_norm": 0.40008625388145447, "learning_rate": 9.489663170097696e-06, "loss": 0.0116, "step": 54610 }, { "epoch": 0.46121044520909416, "grad_norm": 0.4098415672779083, "learning_rate": 9.489338795378232e-06, "loss": 0.0186, "step": 54620 }, { "epoch": 0.46129488505625804, "grad_norm": 0.6584219932556152, "learning_rate": 9.489014323151248e-06, "loss": 0.0306, "step": 54630 }, { "epoch": 0.46137932490342193, "grad_norm": 0.4233623445034027, "learning_rate": 9.488689753423795e-06, "loss": 0.0064, "step": 54640 }, { "epoch": 0.4614637647505858, "grad_norm": 0.3619938790798187, "learning_rate": 9.48836508620292e-06, "loss": 0.018, "step": 54650 }, { "epoch": 0.4615482045977497, "grad_norm": 0.4599871337413788, "learning_rate": 9.488040321495672e-06, "loss": 0.0161, "step": 54660 }, { "epoch": 0.46163264444491353, "grad_norm": 0.6073504090309143, "learning_rate": 9.487715459309111e-06, "loss": 0.0149, "step": 54670 }, { "epoch": 0.4617170842920774, "grad_norm": 0.44451695680618286, "learning_rate": 9.48739049965029e-06, "loss": 0.015, "step": 54680 }, { "epoch": 0.4618015241392413, "grad_norm": 0.3890503942966461, "learning_rate": 9.487065442526265e-06, "loss": 0.0195, "step": 54690 }, { "epoch": 0.4618859639864052, "grad_norm": 0.32038095593452454, "learning_rate": 9.4867402879441e-06, "loss": 0.0096, "step": 54700 }, { "epoch": 0.4619704038335691, "grad_norm": 0.10640376061201096, "learning_rate": 9.486415035910853e-06, "loss": 0.0134, "step": 54710 }, { "epoch": 0.46205484368073296, "grad_norm": 0.48121583461761475, "learning_rate": 9.486089686433593e-06, "loss": 0.0192, "step": 54720 }, { "epoch": 0.4621392835278968, "grad_norm": 0.4203643500804901, "learning_rate": 9.485764239519384e-06, "loss": 0.0194, "step": 54730 }, { "epoch": 0.4622237233750607, "grad_norm": 0.47976863384246826, "learning_rate": 9.485438695175293e-06, "loss": 0.016, "step": 54740 }, { "epoch": 0.46230816322222457, "grad_norm": 0.7909425497055054, "learning_rate": 9.485113053408394e-06, "loss": 0.0199, "step": 54750 }, { "epoch": 0.46239260306938845, "grad_norm": 0.4577922224998474, "learning_rate": 9.48478731422576e-06, "loss": 0.0116, "step": 54760 }, { "epoch": 0.46247704291655234, "grad_norm": 0.3134574890136719, "learning_rate": 9.484461477634463e-06, "loss": 0.0171, "step": 54770 }, { "epoch": 0.4625614827637162, "grad_norm": 0.1852075457572937, "learning_rate": 9.484135543641582e-06, "loss": 0.0127, "step": 54780 }, { "epoch": 0.46264592261088006, "grad_norm": 0.4881267249584198, "learning_rate": 9.483809512254195e-06, "loss": 0.0133, "step": 54790 }, { "epoch": 0.46273036245804394, "grad_norm": 0.3862256407737732, "learning_rate": 9.483483383479385e-06, "loss": 0.0111, "step": 54800 }, { "epoch": 0.46281480230520783, "grad_norm": 0.33172500133514404, "learning_rate": 9.483157157324234e-06, "loss": 0.0125, "step": 54810 }, { "epoch": 0.4628992421523717, "grad_norm": 1.6237976551055908, "learning_rate": 9.482830833795829e-06, "loss": 0.0252, "step": 54820 }, { "epoch": 0.4629836819995356, "grad_norm": 0.26183125376701355, "learning_rate": 9.482504412901257e-06, "loss": 0.0102, "step": 54830 }, { "epoch": 0.46306812184669943, "grad_norm": 0.26888442039489746, "learning_rate": 9.482177894647608e-06, "loss": 0.0151, "step": 54840 }, { "epoch": 0.4631525616938633, "grad_norm": 0.0566713772714138, "learning_rate": 9.481851279041973e-06, "loss": 0.0113, "step": 54850 }, { "epoch": 0.4632370015410272, "grad_norm": 0.5589843988418579, "learning_rate": 9.481524566091448e-06, "loss": 0.0187, "step": 54860 }, { "epoch": 0.4633214413881911, "grad_norm": 0.47212228178977966, "learning_rate": 9.481197755803125e-06, "loss": 0.013, "step": 54870 }, { "epoch": 0.463405881235355, "grad_norm": 0.37052062153816223, "learning_rate": 9.480870848184106e-06, "loss": 0.0216, "step": 54880 }, { "epoch": 0.46349032108251886, "grad_norm": 0.6057702898979187, "learning_rate": 9.480543843241492e-06, "loss": 0.012, "step": 54890 }, { "epoch": 0.4635747609296827, "grad_norm": 0.27470770478248596, "learning_rate": 9.480216740982383e-06, "loss": 0.0139, "step": 54900 }, { "epoch": 0.4636592007768466, "grad_norm": 0.47971218824386597, "learning_rate": 9.479889541413882e-06, "loss": 0.0162, "step": 54910 }, { "epoch": 0.46374364062401047, "grad_norm": 0.3388943374156952, "learning_rate": 9.479562244543102e-06, "loss": 0.015, "step": 54920 }, { "epoch": 0.46382808047117435, "grad_norm": 0.07360217720270157, "learning_rate": 9.479234850377145e-06, "loss": 0.0183, "step": 54930 }, { "epoch": 0.46391252031833824, "grad_norm": 0.3880072832107544, "learning_rate": 9.478907358923126e-06, "loss": 0.0136, "step": 54940 }, { "epoch": 0.4639969601655021, "grad_norm": 0.500584602355957, "learning_rate": 9.478579770188158e-06, "loss": 0.0139, "step": 54950 }, { "epoch": 0.46408140001266596, "grad_norm": 0.4124821722507477, "learning_rate": 9.478252084179355e-06, "loss": 0.0152, "step": 54960 }, { "epoch": 0.46416583985982984, "grad_norm": 0.850371241569519, "learning_rate": 9.477924300903833e-06, "loss": 0.0204, "step": 54970 }, { "epoch": 0.46425027970699373, "grad_norm": 0.5902002453804016, "learning_rate": 9.477596420368714e-06, "loss": 0.0116, "step": 54980 }, { "epoch": 0.4643347195541576, "grad_norm": 0.22470659017562866, "learning_rate": 9.477268442581118e-06, "loss": 0.0185, "step": 54990 }, { "epoch": 0.4644191594013215, "grad_norm": 0.6127181053161621, "learning_rate": 9.476940367548169e-06, "loss": 0.0169, "step": 55000 }, { "epoch": 0.46450359924848533, "grad_norm": 0.3639587461948395, "learning_rate": 9.476612195276993e-06, "loss": 0.0192, "step": 55010 }, { "epoch": 0.4645880390956492, "grad_norm": 0.6010247468948364, "learning_rate": 9.476283925774717e-06, "loss": 0.0122, "step": 55020 }, { "epoch": 0.4646724789428131, "grad_norm": 0.13347329199314117, "learning_rate": 9.475955559048471e-06, "loss": 0.0134, "step": 55030 }, { "epoch": 0.464756918789977, "grad_norm": 0.7406262159347534, "learning_rate": 9.475627095105388e-06, "loss": 0.0132, "step": 55040 }, { "epoch": 0.4648413586371409, "grad_norm": 0.5441479682922363, "learning_rate": 9.475298533952602e-06, "loss": 0.0118, "step": 55050 }, { "epoch": 0.46492579848430476, "grad_norm": 0.30641934275627136, "learning_rate": 9.474969875597248e-06, "loss": 0.0177, "step": 55060 }, { "epoch": 0.4650102383314686, "grad_norm": 0.35519376397132874, "learning_rate": 9.474641120046466e-06, "loss": 0.0174, "step": 55070 }, { "epoch": 0.4650946781786325, "grad_norm": 0.2630533277988434, "learning_rate": 9.474312267307396e-06, "loss": 0.0161, "step": 55080 }, { "epoch": 0.46517911802579637, "grad_norm": 0.3701639175415039, "learning_rate": 9.473983317387179e-06, "loss": 0.0199, "step": 55090 }, { "epoch": 0.46526355787296025, "grad_norm": 1.1887526512145996, "learning_rate": 9.473654270292962e-06, "loss": 0.0142, "step": 55100 }, { "epoch": 0.46534799772012414, "grad_norm": 0.6681655645370483, "learning_rate": 9.473325126031892e-06, "loss": 0.0219, "step": 55110 }, { "epoch": 0.465432437567288, "grad_norm": 0.36366331577301025, "learning_rate": 9.472995884611115e-06, "loss": 0.0148, "step": 55120 }, { "epoch": 0.46551687741445186, "grad_norm": 0.2321690022945404, "learning_rate": 9.472666546037786e-06, "loss": 0.0089, "step": 55130 }, { "epoch": 0.46560131726161574, "grad_norm": 0.3225875496864319, "learning_rate": 9.472337110319054e-06, "loss": 0.0102, "step": 55140 }, { "epoch": 0.46568575710877963, "grad_norm": 1.7985668182373047, "learning_rate": 9.472007577462077e-06, "loss": 0.0184, "step": 55150 }, { "epoch": 0.4657701969559435, "grad_norm": 0.5228533148765564, "learning_rate": 9.471677947474012e-06, "loss": 0.0102, "step": 55160 }, { "epoch": 0.4658546368031074, "grad_norm": 0.6596148610115051, "learning_rate": 9.47134822036202e-06, "loss": 0.0129, "step": 55170 }, { "epoch": 0.4659390766502713, "grad_norm": 0.32649683952331543, "learning_rate": 9.47101839613326e-06, "loss": 0.0176, "step": 55180 }, { "epoch": 0.4660235164974351, "grad_norm": 0.1973356157541275, "learning_rate": 9.470688474794896e-06, "loss": 0.0139, "step": 55190 }, { "epoch": 0.466107956344599, "grad_norm": 1.3224927186965942, "learning_rate": 9.470358456354095e-06, "loss": 0.0229, "step": 55200 }, { "epoch": 0.4661923961917629, "grad_norm": 0.18106815218925476, "learning_rate": 9.470028340818022e-06, "loss": 0.0202, "step": 55210 }, { "epoch": 0.4662768360389268, "grad_norm": 0.6039352416992188, "learning_rate": 9.469698128193852e-06, "loss": 0.018, "step": 55220 }, { "epoch": 0.46636127588609066, "grad_norm": 0.303866446018219, "learning_rate": 9.469367818488754e-06, "loss": 0.016, "step": 55230 }, { "epoch": 0.4664457157332545, "grad_norm": 0.1215127632021904, "learning_rate": 9.469037411709905e-06, "loss": 0.0131, "step": 55240 }, { "epoch": 0.4665301555804184, "grad_norm": 0.36163926124572754, "learning_rate": 9.468706907864476e-06, "loss": 0.025, "step": 55250 }, { "epoch": 0.46661459542758227, "grad_norm": 0.4153844118118286, "learning_rate": 9.46837630695965e-06, "loss": 0.0164, "step": 55260 }, { "epoch": 0.46669903527474615, "grad_norm": 0.5924056172370911, "learning_rate": 9.468045609002606e-06, "loss": 0.0194, "step": 55270 }, { "epoch": 0.46678347512191004, "grad_norm": 0.5263228416442871, "learning_rate": 9.467714814000528e-06, "loss": 0.0226, "step": 55280 }, { "epoch": 0.4668679149690739, "grad_norm": 0.19157588481903076, "learning_rate": 9.467383921960598e-06, "loss": 0.0116, "step": 55290 }, { "epoch": 0.46695235481623776, "grad_norm": 0.0966135784983635, "learning_rate": 9.467052932890006e-06, "loss": 0.0155, "step": 55300 }, { "epoch": 0.46703679466340164, "grad_norm": 0.39036938548088074, "learning_rate": 9.466721846795938e-06, "loss": 0.0237, "step": 55310 }, { "epoch": 0.46712123451056553, "grad_norm": 0.32906660437583923, "learning_rate": 9.46639066368559e-06, "loss": 0.0124, "step": 55320 }, { "epoch": 0.4672056743577294, "grad_norm": 0.04408339783549309, "learning_rate": 9.466059383566148e-06, "loss": 0.015, "step": 55330 }, { "epoch": 0.4672901142048933, "grad_norm": 0.6887995004653931, "learning_rate": 9.465728006444814e-06, "loss": 0.014, "step": 55340 }, { "epoch": 0.4673745540520572, "grad_norm": 0.3143931031227112, "learning_rate": 9.465396532328784e-06, "loss": 0.0143, "step": 55350 }, { "epoch": 0.467458993899221, "grad_norm": 0.22291532158851624, "learning_rate": 9.465064961225254e-06, "loss": 0.0156, "step": 55360 }, { "epoch": 0.4675434337463849, "grad_norm": 0.44635576009750366, "learning_rate": 9.464733293141428e-06, "loss": 0.0136, "step": 55370 }, { "epoch": 0.4676278735935488, "grad_norm": 0.27510905265808105, "learning_rate": 9.46440152808451e-06, "loss": 0.0099, "step": 55380 }, { "epoch": 0.4677123134407127, "grad_norm": 0.6647131443023682, "learning_rate": 9.464069666061707e-06, "loss": 0.0243, "step": 55390 }, { "epoch": 0.46779675328787657, "grad_norm": 0.3251849114894867, "learning_rate": 9.463737707080224e-06, "loss": 0.0131, "step": 55400 }, { "epoch": 0.46788119313504045, "grad_norm": 1.044998288154602, "learning_rate": 9.463405651147275e-06, "loss": 0.0182, "step": 55410 }, { "epoch": 0.4679656329822043, "grad_norm": 0.11750184744596481, "learning_rate": 9.463073498270067e-06, "loss": 0.0133, "step": 55420 }, { "epoch": 0.46805007282936817, "grad_norm": 0.7527426481246948, "learning_rate": 9.462741248455818e-06, "loss": 0.0137, "step": 55430 }, { "epoch": 0.46813451267653206, "grad_norm": 0.4618700444698334, "learning_rate": 9.462408901711744e-06, "loss": 0.012, "step": 55440 }, { "epoch": 0.46821895252369594, "grad_norm": 0.6305046677589417, "learning_rate": 9.462076458045063e-06, "loss": 0.0145, "step": 55450 }, { "epoch": 0.46830339237085983, "grad_norm": 0.7583473324775696, "learning_rate": 9.461743917462997e-06, "loss": 0.0164, "step": 55460 }, { "epoch": 0.46838783221802366, "grad_norm": 0.7365331053733826, "learning_rate": 9.461411279972767e-06, "loss": 0.0117, "step": 55470 }, { "epoch": 0.46847227206518754, "grad_norm": 0.8553006649017334, "learning_rate": 9.461078545581597e-06, "loss": 0.0119, "step": 55480 }, { "epoch": 0.46855671191235143, "grad_norm": 0.6874058246612549, "learning_rate": 9.460745714296715e-06, "loss": 0.0137, "step": 55490 }, { "epoch": 0.4686411517595153, "grad_norm": 0.22544828057289124, "learning_rate": 9.46041278612535e-06, "loss": 0.0166, "step": 55500 }, { "epoch": 0.4687255916066792, "grad_norm": 0.6366386413574219, "learning_rate": 9.460079761074735e-06, "loss": 0.0124, "step": 55510 }, { "epoch": 0.4688100314538431, "grad_norm": 0.4412824809551239, "learning_rate": 9.459746639152101e-06, "loss": 0.0187, "step": 55520 }, { "epoch": 0.4688944713010069, "grad_norm": 0.3788931667804718, "learning_rate": 9.459413420364685e-06, "loss": 0.0216, "step": 55530 }, { "epoch": 0.4689789111481708, "grad_norm": 0.053063418716192245, "learning_rate": 9.45908010471972e-06, "loss": 0.0099, "step": 55540 }, { "epoch": 0.4690633509953347, "grad_norm": 0.5364916920661926, "learning_rate": 9.458746692224451e-06, "loss": 0.0213, "step": 55550 }, { "epoch": 0.4691477908424986, "grad_norm": 0.33105674386024475, "learning_rate": 9.458413182886117e-06, "loss": 0.0179, "step": 55560 }, { "epoch": 0.46923223068966247, "grad_norm": 0.2985836863517761, "learning_rate": 9.458079576711963e-06, "loss": 0.0098, "step": 55570 }, { "epoch": 0.46931667053682635, "grad_norm": 0.1408649981021881, "learning_rate": 9.457745873709234e-06, "loss": 0.0249, "step": 55580 }, { "epoch": 0.4694011103839902, "grad_norm": 0.7956172227859497, "learning_rate": 9.457412073885178e-06, "loss": 0.0126, "step": 55590 }, { "epoch": 0.46948555023115407, "grad_norm": 0.11627402901649475, "learning_rate": 9.457078177247046e-06, "loss": 0.01, "step": 55600 }, { "epoch": 0.46956999007831796, "grad_norm": 0.20820488035678864, "learning_rate": 9.456744183802086e-06, "loss": 0.013, "step": 55610 }, { "epoch": 0.46965442992548184, "grad_norm": 0.43786710500717163, "learning_rate": 9.45641009355756e-06, "loss": 0.0154, "step": 55620 }, { "epoch": 0.46973886977264573, "grad_norm": 0.3506656587123871, "learning_rate": 9.456075906520716e-06, "loss": 0.0146, "step": 55630 }, { "epoch": 0.4698233096198096, "grad_norm": 0.12707331776618958, "learning_rate": 9.45574162269882e-06, "loss": 0.0151, "step": 55640 }, { "epoch": 0.46990774946697345, "grad_norm": 0.2669796943664551, "learning_rate": 9.455407242099127e-06, "loss": 0.013, "step": 55650 }, { "epoch": 0.46999218931413733, "grad_norm": 0.08438706398010254, "learning_rate": 9.455072764728903e-06, "loss": 0.0193, "step": 55660 }, { "epoch": 0.4700766291613012, "grad_norm": 0.3632752001285553, "learning_rate": 9.45473819059541e-06, "loss": 0.0094, "step": 55670 }, { "epoch": 0.4701610690084651, "grad_norm": 0.3186614215373993, "learning_rate": 9.454403519705917e-06, "loss": 0.0103, "step": 55680 }, { "epoch": 0.470245508855629, "grad_norm": 0.715764045715332, "learning_rate": 9.454068752067691e-06, "loss": 0.014, "step": 55690 }, { "epoch": 0.4703299487027928, "grad_norm": 0.4702211916446686, "learning_rate": 9.453733887688005e-06, "loss": 0.0172, "step": 55700 }, { "epoch": 0.4704143885499567, "grad_norm": 0.39915308356285095, "learning_rate": 9.453398926574133e-06, "loss": 0.0177, "step": 55710 }, { "epoch": 0.4704988283971206, "grad_norm": 0.8757774233818054, "learning_rate": 9.45306386873335e-06, "loss": 0.0222, "step": 55720 }, { "epoch": 0.4705832682442845, "grad_norm": 0.588272213935852, "learning_rate": 9.45272871417293e-06, "loss": 0.0101, "step": 55730 }, { "epoch": 0.47066770809144837, "grad_norm": 0.47122225165367126, "learning_rate": 9.452393462900155e-06, "loss": 0.0226, "step": 55740 }, { "epoch": 0.47075214793861225, "grad_norm": 0.29867175221443176, "learning_rate": 9.452058114922307e-06, "loss": 0.0134, "step": 55750 }, { "epoch": 0.4708365877857761, "grad_norm": 0.4037233293056488, "learning_rate": 9.451722670246668e-06, "loss": 0.0138, "step": 55760 }, { "epoch": 0.47092102763293997, "grad_norm": 0.4501856863498688, "learning_rate": 9.451387128880525e-06, "loss": 0.0129, "step": 55770 }, { "epoch": 0.47100546748010386, "grad_norm": 0.5261572003364563, "learning_rate": 9.451051490831167e-06, "loss": 0.014, "step": 55780 }, { "epoch": 0.47108990732726774, "grad_norm": 0.47974905371665955, "learning_rate": 9.450715756105882e-06, "loss": 0.0252, "step": 55790 }, { "epoch": 0.47117434717443163, "grad_norm": 0.11389385908842087, "learning_rate": 9.450379924711963e-06, "loss": 0.0101, "step": 55800 }, { "epoch": 0.4712587870215955, "grad_norm": 0.3470819592475891, "learning_rate": 9.450043996656703e-06, "loss": 0.0131, "step": 55810 }, { "epoch": 0.47134322686875935, "grad_norm": 0.3732777535915375, "learning_rate": 9.449707971947401e-06, "loss": 0.0179, "step": 55820 }, { "epoch": 0.47142766671592323, "grad_norm": 0.5096341371536255, "learning_rate": 9.449371850591353e-06, "loss": 0.0122, "step": 55830 }, { "epoch": 0.4715121065630871, "grad_norm": 0.08001147955656052, "learning_rate": 9.44903563259586e-06, "loss": 0.0148, "step": 55840 }, { "epoch": 0.471596546410251, "grad_norm": 0.5408311486244202, "learning_rate": 9.448699317968224e-06, "loss": 0.0146, "step": 55850 }, { "epoch": 0.4716809862574149, "grad_norm": 0.7636139392852783, "learning_rate": 9.448362906715751e-06, "loss": 0.0094, "step": 55860 }, { "epoch": 0.4717654261045788, "grad_norm": 0.4048703908920288, "learning_rate": 9.448026398845749e-06, "loss": 0.0162, "step": 55870 }, { "epoch": 0.4718498659517426, "grad_norm": 0.16113430261611938, "learning_rate": 9.447689794365522e-06, "loss": 0.0219, "step": 55880 }, { "epoch": 0.4719343057989065, "grad_norm": 0.551121711730957, "learning_rate": 9.447353093282387e-06, "loss": 0.017, "step": 55890 }, { "epoch": 0.4720187456460704, "grad_norm": 0.4901403784751892, "learning_rate": 9.447016295603653e-06, "loss": 0.0115, "step": 55900 }, { "epoch": 0.47210318549323427, "grad_norm": 0.3010060787200928, "learning_rate": 9.446679401336636e-06, "loss": 0.0131, "step": 55910 }, { "epoch": 0.47218762534039815, "grad_norm": 0.23734647035598755, "learning_rate": 9.446342410488654e-06, "loss": 0.0121, "step": 55920 }, { "epoch": 0.472272065187562, "grad_norm": 0.011262081563472748, "learning_rate": 9.446005323067024e-06, "loss": 0.0218, "step": 55930 }, { "epoch": 0.47235650503472587, "grad_norm": 0.4474472105503082, "learning_rate": 9.445668139079072e-06, "loss": 0.0102, "step": 55940 }, { "epoch": 0.47244094488188976, "grad_norm": 0.13794918358325958, "learning_rate": 9.445330858532119e-06, "loss": 0.013, "step": 55950 }, { "epoch": 0.47252538472905364, "grad_norm": 0.4673496186733246, "learning_rate": 9.44499348143349e-06, "loss": 0.0137, "step": 55960 }, { "epoch": 0.47260982457621753, "grad_norm": 0.6369178891181946, "learning_rate": 9.444656007790512e-06, "loss": 0.0151, "step": 55970 }, { "epoch": 0.4726942644233814, "grad_norm": 0.3238092362880707, "learning_rate": 9.444318437610519e-06, "loss": 0.0123, "step": 55980 }, { "epoch": 0.47277870427054525, "grad_norm": 0.4527870714664459, "learning_rate": 9.443980770900838e-06, "loss": 0.0204, "step": 55990 }, { "epoch": 0.47286314411770913, "grad_norm": 0.3026486933231354, "learning_rate": 9.443643007668806e-06, "loss": 0.0153, "step": 56000 }, { "epoch": 0.472947583964873, "grad_norm": 0.3641689419746399, "learning_rate": 9.443305147921758e-06, "loss": 0.0207, "step": 56010 }, { "epoch": 0.4730320238120369, "grad_norm": 0.18378934264183044, "learning_rate": 9.442967191667034e-06, "loss": 0.012, "step": 56020 }, { "epoch": 0.4731164636592008, "grad_norm": 0.7614682912826538, "learning_rate": 9.442629138911971e-06, "loss": 0.0157, "step": 56030 }, { "epoch": 0.4732009035063647, "grad_norm": 0.47426357865333557, "learning_rate": 9.442290989663916e-06, "loss": 0.0135, "step": 56040 }, { "epoch": 0.4732853433535285, "grad_norm": 0.22780095040798187, "learning_rate": 9.44195274393021e-06, "loss": 0.0186, "step": 56050 }, { "epoch": 0.4733697832006924, "grad_norm": 0.7305237054824829, "learning_rate": 9.4416144017182e-06, "loss": 0.0049, "step": 56060 }, { "epoch": 0.4734542230478563, "grad_norm": 0.21008633077144623, "learning_rate": 9.441275963035236e-06, "loss": 0.0146, "step": 56070 }, { "epoch": 0.47353866289502017, "grad_norm": 0.4758507013320923, "learning_rate": 9.440937427888669e-06, "loss": 0.0086, "step": 56080 }, { "epoch": 0.47362310274218405, "grad_norm": 0.2589736878871918, "learning_rate": 9.44059879628585e-06, "loss": 0.0144, "step": 56090 }, { "epoch": 0.47370754258934794, "grad_norm": 0.46106648445129395, "learning_rate": 9.440260068234136e-06, "loss": 0.02, "step": 56100 }, { "epoch": 0.47379198243651177, "grad_norm": 0.1485806703567505, "learning_rate": 9.439921243740881e-06, "loss": 0.0143, "step": 56110 }, { "epoch": 0.47387642228367566, "grad_norm": 0.4406994879245758, "learning_rate": 9.43958232281345e-06, "loss": 0.0161, "step": 56120 }, { "epoch": 0.47396086213083954, "grad_norm": 0.5381771326065063, "learning_rate": 9.439243305459199e-06, "loss": 0.018, "step": 56130 }, { "epoch": 0.47404530197800343, "grad_norm": 3.237363338470459, "learning_rate": 9.438904191685492e-06, "loss": 0.021, "step": 56140 }, { "epoch": 0.4741297418251673, "grad_norm": 0.5355216860771179, "learning_rate": 9.438564981499697e-06, "loss": 0.0176, "step": 56150 }, { "epoch": 0.47421418167233115, "grad_norm": 0.5015213489532471, "learning_rate": 9.438225674909178e-06, "loss": 0.0207, "step": 56160 }, { "epoch": 0.47429862151949503, "grad_norm": 0.2623213827610016, "learning_rate": 9.437886271921308e-06, "loss": 0.0098, "step": 56170 }, { "epoch": 0.4743830613666589, "grad_norm": 0.5182377099990845, "learning_rate": 9.437546772543459e-06, "loss": 0.0188, "step": 56180 }, { "epoch": 0.4744675012138228, "grad_norm": 0.11251263320446014, "learning_rate": 9.437207176783002e-06, "loss": 0.0129, "step": 56190 }, { "epoch": 0.4745519410609867, "grad_norm": 0.16320562362670898, "learning_rate": 9.436867484647315e-06, "loss": 0.0122, "step": 56200 }, { "epoch": 0.4746363809081506, "grad_norm": 0.5865346193313599, "learning_rate": 9.436527696143774e-06, "loss": 0.0118, "step": 56210 }, { "epoch": 0.4747208207553144, "grad_norm": 0.4385211765766144, "learning_rate": 9.436187811279762e-06, "loss": 0.0354, "step": 56220 }, { "epoch": 0.4748052606024783, "grad_norm": 0.6010545492172241, "learning_rate": 9.43584783006266e-06, "loss": 0.0207, "step": 56230 }, { "epoch": 0.4748897004496422, "grad_norm": 0.19508150219917297, "learning_rate": 9.435507752499851e-06, "loss": 0.0153, "step": 56240 }, { "epoch": 0.47497414029680607, "grad_norm": 0.2537192106246948, "learning_rate": 9.435167578598723e-06, "loss": 0.0124, "step": 56250 }, { "epoch": 0.47505858014396996, "grad_norm": 0.46073585748672485, "learning_rate": 9.434827308366665e-06, "loss": 0.0166, "step": 56260 }, { "epoch": 0.47514301999113384, "grad_norm": 0.1283601075410843, "learning_rate": 9.434486941811065e-06, "loss": 0.0145, "step": 56270 }, { "epoch": 0.4752274598382977, "grad_norm": 0.6213489174842834, "learning_rate": 9.434146478939319e-06, "loss": 0.0127, "step": 56280 }, { "epoch": 0.47531189968546156, "grad_norm": 0.4509374797344208, "learning_rate": 9.43380591975882e-06, "loss": 0.0203, "step": 56290 }, { "epoch": 0.47539633953262544, "grad_norm": 0.3483826816082001, "learning_rate": 9.433465264276964e-06, "loss": 0.0129, "step": 56300 }, { "epoch": 0.47548077937978933, "grad_norm": 1.0453745126724243, "learning_rate": 9.43312451250115e-06, "loss": 0.0112, "step": 56310 }, { "epoch": 0.4755652192269532, "grad_norm": 0.2512204647064209, "learning_rate": 9.432783664438782e-06, "loss": 0.021, "step": 56320 }, { "epoch": 0.47564965907411705, "grad_norm": 0.4661106765270233, "learning_rate": 9.432442720097262e-06, "loss": 0.0166, "step": 56330 }, { "epoch": 0.47573409892128093, "grad_norm": 0.5436275601387024, "learning_rate": 9.432101679483991e-06, "loss": 0.0085, "step": 56340 }, { "epoch": 0.4758185387684448, "grad_norm": 0.24801091849803925, "learning_rate": 9.431760542606383e-06, "loss": 0.0147, "step": 56350 }, { "epoch": 0.4759029786156087, "grad_norm": 0.1849755346775055, "learning_rate": 9.431419309471842e-06, "loss": 0.0154, "step": 56360 }, { "epoch": 0.4759874184627726, "grad_norm": 0.17719414830207825, "learning_rate": 9.431077980087783e-06, "loss": 0.0186, "step": 56370 }, { "epoch": 0.4760718583099365, "grad_norm": 0.20294183492660522, "learning_rate": 9.430736554461617e-06, "loss": 0.0138, "step": 56380 }, { "epoch": 0.4761562981571003, "grad_norm": 0.6729612946510315, "learning_rate": 9.430395032600762e-06, "loss": 0.017, "step": 56390 }, { "epoch": 0.4762407380042642, "grad_norm": 0.35019147396087646, "learning_rate": 9.430053414512635e-06, "loss": 0.0158, "step": 56400 }, { "epoch": 0.4763251778514281, "grad_norm": 0.3146020770072937, "learning_rate": 9.429711700204655e-06, "loss": 0.0165, "step": 56410 }, { "epoch": 0.47640961769859197, "grad_norm": 0.2719232439994812, "learning_rate": 9.429369889684246e-06, "loss": 0.0125, "step": 56420 }, { "epoch": 0.47649405754575586, "grad_norm": 0.7129795551300049, "learning_rate": 9.429027982958828e-06, "loss": 0.0145, "step": 56430 }, { "epoch": 0.47657849739291974, "grad_norm": 0.06996630132198334, "learning_rate": 9.428685980035832e-06, "loss": 0.0095, "step": 56440 }, { "epoch": 0.4766629372400836, "grad_norm": 0.37507835030555725, "learning_rate": 9.428343880922683e-06, "loss": 0.0109, "step": 56450 }, { "epoch": 0.47674737708724746, "grad_norm": 0.3462090790271759, "learning_rate": 9.428001685626812e-06, "loss": 0.0121, "step": 56460 }, { "epoch": 0.47683181693441135, "grad_norm": 0.06623220443725586, "learning_rate": 9.427659394155653e-06, "loss": 0.0105, "step": 56470 }, { "epoch": 0.47691625678157523, "grad_norm": 0.448280394077301, "learning_rate": 9.427317006516638e-06, "loss": 0.0146, "step": 56480 }, { "epoch": 0.4770006966287391, "grad_norm": 0.24689441919326782, "learning_rate": 9.426974522717207e-06, "loss": 0.0164, "step": 56490 }, { "epoch": 0.477085136475903, "grad_norm": 0.73842853307724, "learning_rate": 9.426631942764794e-06, "loss": 0.0059, "step": 56500 }, { "epoch": 0.47716957632306684, "grad_norm": 0.4905654489994049, "learning_rate": 9.426289266666844e-06, "loss": 0.017, "step": 56510 }, { "epoch": 0.4772540161702307, "grad_norm": 0.48584869503974915, "learning_rate": 9.425946494430797e-06, "loss": 0.0151, "step": 56520 }, { "epoch": 0.4773384560173946, "grad_norm": 0.35557153820991516, "learning_rate": 9.4256036260641e-06, "loss": 0.014, "step": 56530 }, { "epoch": 0.4774228958645585, "grad_norm": 0.2458178550004959, "learning_rate": 9.425260661574197e-06, "loss": 0.0177, "step": 56540 }, { "epoch": 0.4775073357117224, "grad_norm": 0.2910480499267578, "learning_rate": 9.424917600968541e-06, "loss": 0.0125, "step": 56550 }, { "epoch": 0.4775917755588862, "grad_norm": 0.43216443061828613, "learning_rate": 9.424574444254583e-06, "loss": 0.0133, "step": 56560 }, { "epoch": 0.4776762154060501, "grad_norm": 0.14131376147270203, "learning_rate": 9.424231191439773e-06, "loss": 0.0151, "step": 56570 }, { "epoch": 0.477760655253214, "grad_norm": 0.2950034737586975, "learning_rate": 9.423887842531568e-06, "loss": 0.0214, "step": 56580 }, { "epoch": 0.47784509510037787, "grad_norm": 0.5725684762001038, "learning_rate": 9.423544397537427e-06, "loss": 0.0169, "step": 56590 }, { "epoch": 0.47792953494754176, "grad_norm": 0.3265641927719116, "learning_rate": 9.423200856464806e-06, "loss": 0.0155, "step": 56600 }, { "epoch": 0.47801397479470564, "grad_norm": 0.505927562713623, "learning_rate": 9.42285721932117e-06, "loss": 0.0124, "step": 56610 }, { "epoch": 0.4780984146418695, "grad_norm": 1.080731749534607, "learning_rate": 9.422513486113983e-06, "loss": 0.0136, "step": 56620 }, { "epoch": 0.47818285448903336, "grad_norm": 0.4108302891254425, "learning_rate": 9.422169656850706e-06, "loss": 0.0123, "step": 56630 }, { "epoch": 0.47826729433619725, "grad_norm": 0.46646490693092346, "learning_rate": 9.421825731538812e-06, "loss": 0.0192, "step": 56640 }, { "epoch": 0.47835173418336113, "grad_norm": 0.2496395856142044, "learning_rate": 9.42148171018577e-06, "loss": 0.0053, "step": 56650 }, { "epoch": 0.478436174030525, "grad_norm": 0.1724015772342682, "learning_rate": 9.42113759279905e-06, "loss": 0.0131, "step": 56660 }, { "epoch": 0.4785206138776889, "grad_norm": 0.7300101518630981, "learning_rate": 9.420793379386129e-06, "loss": 0.0172, "step": 56670 }, { "epoch": 0.47860505372485274, "grad_norm": 0.41958358883857727, "learning_rate": 9.42044906995448e-06, "loss": 0.0222, "step": 56680 }, { "epoch": 0.4786894935720166, "grad_norm": 0.4699578285217285, "learning_rate": 9.420104664511585e-06, "loss": 0.0151, "step": 56690 }, { "epoch": 0.4787739334191805, "grad_norm": 0.21695208549499512, "learning_rate": 9.419760163064921e-06, "loss": 0.0214, "step": 56700 }, { "epoch": 0.4788583732663444, "grad_norm": 0.24135105311870575, "learning_rate": 9.419415565621972e-06, "loss": 0.0128, "step": 56710 }, { "epoch": 0.4789428131135083, "grad_norm": 0.4708273112773895, "learning_rate": 9.419070872190223e-06, "loss": 0.0201, "step": 56720 }, { "epoch": 0.47902725296067217, "grad_norm": 0.2757622003555298, "learning_rate": 9.418726082777158e-06, "loss": 0.0182, "step": 56730 }, { "epoch": 0.479111692807836, "grad_norm": 0.6938328146934509, "learning_rate": 9.418381197390272e-06, "loss": 0.0089, "step": 56740 }, { "epoch": 0.4791961326549999, "grad_norm": 0.43494105339050293, "learning_rate": 9.418036216037048e-06, "loss": 0.0193, "step": 56750 }, { "epoch": 0.47928057250216377, "grad_norm": 0.2829609513282776, "learning_rate": 9.417691138724984e-06, "loss": 0.0126, "step": 56760 }, { "epoch": 0.47936501234932766, "grad_norm": 0.2806214988231659, "learning_rate": 9.417345965461576e-06, "loss": 0.0193, "step": 56770 }, { "epoch": 0.47944945219649154, "grad_norm": 0.387458860874176, "learning_rate": 9.417000696254316e-06, "loss": 0.0093, "step": 56780 }, { "epoch": 0.4795338920436554, "grad_norm": 0.48112213611602783, "learning_rate": 9.416655331110708e-06, "loss": 0.0108, "step": 56790 }, { "epoch": 0.47961833189081926, "grad_norm": 0.34189876914024353, "learning_rate": 9.41630987003825e-06, "loss": 0.007, "step": 56800 }, { "epoch": 0.47970277173798315, "grad_norm": 0.14718785881996155, "learning_rate": 9.415964313044447e-06, "loss": 0.0128, "step": 56810 }, { "epoch": 0.47978721158514703, "grad_norm": 0.12352873384952545, "learning_rate": 9.415618660136804e-06, "loss": 0.0093, "step": 56820 }, { "epoch": 0.4798716514323109, "grad_norm": 0.8043262362480164, "learning_rate": 9.415272911322829e-06, "loss": 0.0247, "step": 56830 }, { "epoch": 0.4799560912794748, "grad_norm": 0.18387910723686218, "learning_rate": 9.41492706661003e-06, "loss": 0.0201, "step": 56840 }, { "epoch": 0.48004053112663864, "grad_norm": 0.17517685890197754, "learning_rate": 9.41458112600592e-06, "loss": 0.0118, "step": 56850 }, { "epoch": 0.4801249709738025, "grad_norm": 0.15172453224658966, "learning_rate": 9.414235089518014e-06, "loss": 0.0155, "step": 56860 }, { "epoch": 0.4802094108209664, "grad_norm": 0.2480565309524536, "learning_rate": 9.413888957153826e-06, "loss": 0.0131, "step": 56870 }, { "epoch": 0.4802938506681303, "grad_norm": 0.3336111903190613, "learning_rate": 9.413542728920873e-06, "loss": 0.0116, "step": 56880 }, { "epoch": 0.4803782905152942, "grad_norm": 0.41570159792900085, "learning_rate": 9.413196404826677e-06, "loss": 0.0218, "step": 56890 }, { "epoch": 0.48046273036245807, "grad_norm": 0.19797253608703613, "learning_rate": 9.412849984878759e-06, "loss": 0.0099, "step": 56900 }, { "epoch": 0.4805471702096219, "grad_norm": 0.27078935503959656, "learning_rate": 9.412503469084644e-06, "loss": 0.0129, "step": 56910 }, { "epoch": 0.4806316100567858, "grad_norm": 0.33900976181030273, "learning_rate": 9.412156857451858e-06, "loss": 0.0125, "step": 56920 }, { "epoch": 0.48071604990394967, "grad_norm": 0.6741605401039124, "learning_rate": 9.41181014998793e-06, "loss": 0.0182, "step": 56930 }, { "epoch": 0.48080048975111356, "grad_norm": 0.3018178939819336, "learning_rate": 9.411463346700389e-06, "loss": 0.0157, "step": 56940 }, { "epoch": 0.48088492959827744, "grad_norm": 0.96195387840271, "learning_rate": 9.411116447596767e-06, "loss": 0.0158, "step": 56950 }, { "epoch": 0.48096936944544133, "grad_norm": 0.9181502461433411, "learning_rate": 9.4107694526846e-06, "loss": 0.0137, "step": 56960 }, { "epoch": 0.48105380929260516, "grad_norm": 0.7344414591789246, "learning_rate": 9.410422361971425e-06, "loss": 0.0119, "step": 56970 }, { "epoch": 0.48113824913976905, "grad_norm": 0.2408357709646225, "learning_rate": 9.41007517546478e-06, "loss": 0.0128, "step": 56980 }, { "epoch": 0.48122268898693293, "grad_norm": 0.4186398983001709, "learning_rate": 9.409727893172207e-06, "loss": 0.0121, "step": 56990 }, { "epoch": 0.4813071288340968, "grad_norm": 0.17787912487983704, "learning_rate": 9.409380515101246e-06, "loss": 0.0083, "step": 57000 }, { "epoch": 0.4813915686812607, "grad_norm": 0.29637500643730164, "learning_rate": 9.409033041259445e-06, "loss": 0.0149, "step": 57010 }, { "epoch": 0.48147600852842454, "grad_norm": 0.4848429560661316, "learning_rate": 9.408685471654348e-06, "loss": 0.0183, "step": 57020 }, { "epoch": 0.4815604483755884, "grad_norm": 0.006207069382071495, "learning_rate": 9.408337806293507e-06, "loss": 0.0089, "step": 57030 }, { "epoch": 0.4816448882227523, "grad_norm": 0.01773255504667759, "learning_rate": 9.407990045184474e-06, "loss": 0.0115, "step": 57040 }, { "epoch": 0.4817293280699162, "grad_norm": 0.4162052869796753, "learning_rate": 9.407642188334799e-06, "loss": 0.0162, "step": 57050 }, { "epoch": 0.4818137679170801, "grad_norm": 0.4544851779937744, "learning_rate": 9.40729423575204e-06, "loss": 0.0149, "step": 57060 }, { "epoch": 0.48189820776424397, "grad_norm": 0.4964061975479126, "learning_rate": 9.40694618744375e-06, "loss": 0.0139, "step": 57070 }, { "epoch": 0.4819826476114078, "grad_norm": 0.36313605308532715, "learning_rate": 9.406598043417496e-06, "loss": 0.016, "step": 57080 }, { "epoch": 0.4820670874585717, "grad_norm": 0.44936081767082214, "learning_rate": 9.406249803680834e-06, "loss": 0.0157, "step": 57090 }, { "epoch": 0.48215152730573557, "grad_norm": 0.5959211587905884, "learning_rate": 9.405901468241328e-06, "loss": 0.0168, "step": 57100 }, { "epoch": 0.48223596715289946, "grad_norm": 0.011228187941014767, "learning_rate": 9.405553037106545e-06, "loss": 0.0224, "step": 57110 }, { "epoch": 0.48232040700006334, "grad_norm": 0.5188606381416321, "learning_rate": 9.405204510284054e-06, "loss": 0.0139, "step": 57120 }, { "epoch": 0.48240484684722723, "grad_norm": 0.3289984464645386, "learning_rate": 9.404855887781423e-06, "loss": 0.0123, "step": 57130 }, { "epoch": 0.48248928669439106, "grad_norm": 0.16058766841888428, "learning_rate": 9.404507169606225e-06, "loss": 0.0213, "step": 57140 }, { "epoch": 0.48257372654155495, "grad_norm": 0.4809328317642212, "learning_rate": 9.404158355766036e-06, "loss": 0.0095, "step": 57150 }, { "epoch": 0.48265816638871883, "grad_norm": 0.4386816620826721, "learning_rate": 9.403809446268427e-06, "loss": 0.0119, "step": 57160 }, { "epoch": 0.4827426062358827, "grad_norm": 0.2697601616382599, "learning_rate": 9.403460441120979e-06, "loss": 0.0138, "step": 57170 }, { "epoch": 0.4828270460830466, "grad_norm": 0.33205270767211914, "learning_rate": 9.403111340331275e-06, "loss": 0.016, "step": 57180 }, { "epoch": 0.4829114859302105, "grad_norm": 0.36194702982902527, "learning_rate": 9.402762143906893e-06, "loss": 0.0167, "step": 57190 }, { "epoch": 0.4829959257773743, "grad_norm": 0.5059097409248352, "learning_rate": 9.40241285185542e-06, "loss": 0.0111, "step": 57200 }, { "epoch": 0.4830803656245382, "grad_norm": 0.48994728922843933, "learning_rate": 9.402063464184443e-06, "loss": 0.0131, "step": 57210 }, { "epoch": 0.4831648054717021, "grad_norm": 0.20796121656894684, "learning_rate": 9.401713980901548e-06, "loss": 0.01, "step": 57220 }, { "epoch": 0.483249245318866, "grad_norm": 0.12156372517347336, "learning_rate": 9.40136440201433e-06, "loss": 0.0076, "step": 57230 }, { "epoch": 0.48333368516602987, "grad_norm": 0.30304229259490967, "learning_rate": 9.401014727530375e-06, "loss": 0.0152, "step": 57240 }, { "epoch": 0.4834181250131937, "grad_norm": 0.7875737547874451, "learning_rate": 9.400664957457286e-06, "loss": 0.016, "step": 57250 }, { "epoch": 0.4835025648603576, "grad_norm": 0.2185581773519516, "learning_rate": 9.400315091802654e-06, "loss": 0.0097, "step": 57260 }, { "epoch": 0.4835870047075215, "grad_norm": 0.28319135308265686, "learning_rate": 9.39996513057408e-06, "loss": 0.0132, "step": 57270 }, { "epoch": 0.48367144455468536, "grad_norm": 0.6202268004417419, "learning_rate": 9.399615073779165e-06, "loss": 0.0108, "step": 57280 }, { "epoch": 0.48375588440184925, "grad_norm": 0.2068415731191635, "learning_rate": 9.399264921425513e-06, "loss": 0.0219, "step": 57290 }, { "epoch": 0.48384032424901313, "grad_norm": 0.49242687225341797, "learning_rate": 9.398914673520727e-06, "loss": 0.011, "step": 57300 }, { "epoch": 0.48392476409617696, "grad_norm": 0.24023281037807465, "learning_rate": 9.398564330072415e-06, "loss": 0.0113, "step": 57310 }, { "epoch": 0.48400920394334085, "grad_norm": 0.6884227395057678, "learning_rate": 9.39821389108819e-06, "loss": 0.0166, "step": 57320 }, { "epoch": 0.48409364379050474, "grad_norm": 0.2798081338405609, "learning_rate": 9.397863356575659e-06, "loss": 0.0095, "step": 57330 }, { "epoch": 0.4841780836376686, "grad_norm": 0.5435305833816528, "learning_rate": 9.397512726542437e-06, "loss": 0.0203, "step": 57340 }, { "epoch": 0.4842625234848325, "grad_norm": 0.5632772445678711, "learning_rate": 9.39716200099614e-06, "loss": 0.0165, "step": 57350 }, { "epoch": 0.4843469633319964, "grad_norm": 0.6066392660140991, "learning_rate": 9.396811179944385e-06, "loss": 0.0188, "step": 57360 }, { "epoch": 0.4844314031791602, "grad_norm": 0.5412968993186951, "learning_rate": 9.396460263394793e-06, "loss": 0.0202, "step": 57370 }, { "epoch": 0.4845158430263241, "grad_norm": 0.5829641222953796, "learning_rate": 9.396109251354983e-06, "loss": 0.02, "step": 57380 }, { "epoch": 0.484600282873488, "grad_norm": 0.6703881621360779, "learning_rate": 9.395758143832584e-06, "loss": 0.0107, "step": 57390 }, { "epoch": 0.4846847227206519, "grad_norm": 0.24443356692790985, "learning_rate": 9.395406940835217e-06, "loss": 0.0091, "step": 57400 }, { "epoch": 0.48476916256781577, "grad_norm": 0.482207715511322, "learning_rate": 9.395055642370512e-06, "loss": 0.0158, "step": 57410 }, { "epoch": 0.48485360241497966, "grad_norm": 0.4918508529663086, "learning_rate": 9.394704248446098e-06, "loss": 0.0094, "step": 57420 }, { "epoch": 0.4849380422621435, "grad_norm": 0.4659710228443146, "learning_rate": 9.394352759069609e-06, "loss": 0.0113, "step": 57430 }, { "epoch": 0.4850224821093074, "grad_norm": 0.3356427252292633, "learning_rate": 9.39400117424868e-06, "loss": 0.0145, "step": 57440 }, { "epoch": 0.48510692195647126, "grad_norm": 0.24496349692344666, "learning_rate": 9.393649493990945e-06, "loss": 0.0123, "step": 57450 }, { "epoch": 0.48519136180363515, "grad_norm": 0.4738076329231262, "learning_rate": 9.393297718304043e-06, "loss": 0.023, "step": 57460 }, { "epoch": 0.48527580165079903, "grad_norm": 0.32391589879989624, "learning_rate": 9.392945847195616e-06, "loss": 0.0187, "step": 57470 }, { "epoch": 0.48536024149796286, "grad_norm": 0.4102855920791626, "learning_rate": 9.392593880673304e-06, "loss": 0.0142, "step": 57480 }, { "epoch": 0.48544468134512675, "grad_norm": 0.23358884453773499, "learning_rate": 9.392241818744754e-06, "loss": 0.0118, "step": 57490 }, { "epoch": 0.48552912119229064, "grad_norm": 0.37980613112449646, "learning_rate": 9.39188966141761e-06, "loss": 0.0155, "step": 57500 }, { "epoch": 0.4856135610394545, "grad_norm": 0.30432143807411194, "learning_rate": 9.391537408699526e-06, "loss": 0.008, "step": 57510 }, { "epoch": 0.4856980008866184, "grad_norm": 0.07561132311820984, "learning_rate": 9.391185060598146e-06, "loss": 0.015, "step": 57520 }, { "epoch": 0.4857824407337823, "grad_norm": 0.6264349222183228, "learning_rate": 9.390832617121128e-06, "loss": 0.0168, "step": 57530 }, { "epoch": 0.4858668805809461, "grad_norm": 0.4007004499435425, "learning_rate": 9.390480078276125e-06, "loss": 0.0093, "step": 57540 }, { "epoch": 0.48595132042811, "grad_norm": 0.6533617973327637, "learning_rate": 9.390127444070794e-06, "loss": 0.0116, "step": 57550 }, { "epoch": 0.4860357602752739, "grad_norm": 0.2509394884109497, "learning_rate": 9.389774714512794e-06, "loss": 0.0102, "step": 57560 }, { "epoch": 0.4861202001224378, "grad_norm": 0.4139041006565094, "learning_rate": 9.38942188960979e-06, "loss": 0.0116, "step": 57570 }, { "epoch": 0.48620463996960167, "grad_norm": 0.6416322588920593, "learning_rate": 9.389068969369439e-06, "loss": 0.0178, "step": 57580 }, { "epoch": 0.48628907981676556, "grad_norm": 0.2742464542388916, "learning_rate": 9.38871595379941e-06, "loss": 0.0097, "step": 57590 }, { "epoch": 0.4863735196639294, "grad_norm": 0.8577821850776672, "learning_rate": 9.388362842907369e-06, "loss": 0.0187, "step": 57600 }, { "epoch": 0.4864579595110933, "grad_norm": 0.40211179852485657, "learning_rate": 9.388009636700987e-06, "loss": 0.0129, "step": 57610 }, { "epoch": 0.48654239935825716, "grad_norm": 1.3313921689987183, "learning_rate": 9.387656335187935e-06, "loss": 0.0162, "step": 57620 }, { "epoch": 0.48662683920542105, "grad_norm": 0.2587847411632538, "learning_rate": 9.387302938375885e-06, "loss": 0.0119, "step": 57630 }, { "epoch": 0.48671127905258493, "grad_norm": 0.49307847023010254, "learning_rate": 9.386949446272517e-06, "loss": 0.0169, "step": 57640 }, { "epoch": 0.48679571889974876, "grad_norm": 0.3090207576751709, "learning_rate": 9.386595858885505e-06, "loss": 0.0095, "step": 57650 }, { "epoch": 0.48688015874691265, "grad_norm": 0.4413006603717804, "learning_rate": 9.386242176222531e-06, "loss": 0.0161, "step": 57660 }, { "epoch": 0.48696459859407654, "grad_norm": 0.45232829451560974, "learning_rate": 9.385888398291273e-06, "loss": 0.014, "step": 57670 }, { "epoch": 0.4870490384412404, "grad_norm": 0.5537892580032349, "learning_rate": 9.38553452509942e-06, "loss": 0.0168, "step": 57680 }, { "epoch": 0.4871334782884043, "grad_norm": 0.48707786202430725, "learning_rate": 9.385180556654654e-06, "loss": 0.0145, "step": 57690 }, { "epoch": 0.4872179181355682, "grad_norm": 0.3645235300064087, "learning_rate": 9.384826492964667e-06, "loss": 0.0132, "step": 57700 }, { "epoch": 0.487302357982732, "grad_norm": 0.27379563450813293, "learning_rate": 9.384472334037144e-06, "loss": 0.0198, "step": 57710 }, { "epoch": 0.4873867978298959, "grad_norm": 0.06604200601577759, "learning_rate": 9.384118079879784e-06, "loss": 0.0115, "step": 57720 }, { "epoch": 0.4874712376770598, "grad_norm": 0.26991915702819824, "learning_rate": 9.383763730500276e-06, "loss": 0.0161, "step": 57730 }, { "epoch": 0.4875556775242237, "grad_norm": 0.30614808201789856, "learning_rate": 9.383409285906317e-06, "loss": 0.0146, "step": 57740 }, { "epoch": 0.48764011737138757, "grad_norm": 0.21310363709926605, "learning_rate": 9.383054746105608e-06, "loss": 0.0154, "step": 57750 }, { "epoch": 0.48772455721855146, "grad_norm": 0.5935869216918945, "learning_rate": 9.382700111105848e-06, "loss": 0.0137, "step": 57760 }, { "epoch": 0.4878089970657153, "grad_norm": 0.394243448972702, "learning_rate": 9.38234538091474e-06, "loss": 0.0078, "step": 57770 }, { "epoch": 0.4878934369128792, "grad_norm": 0.6929098963737488, "learning_rate": 9.381990555539988e-06, "loss": 0.0125, "step": 57780 }, { "epoch": 0.48797787676004306, "grad_norm": 0.29022884368896484, "learning_rate": 9.381635634989298e-06, "loss": 0.013, "step": 57790 }, { "epoch": 0.48806231660720695, "grad_norm": 0.3013836145401001, "learning_rate": 9.38128061927038e-06, "loss": 0.0091, "step": 57800 }, { "epoch": 0.48814675645437083, "grad_norm": 0.14181429147720337, "learning_rate": 9.380925508390947e-06, "loss": 0.0222, "step": 57810 }, { "epoch": 0.4882311963015347, "grad_norm": 0.26020699739456177, "learning_rate": 9.380570302358708e-06, "loss": 0.0174, "step": 57820 }, { "epoch": 0.48831563614869855, "grad_norm": 0.12778505682945251, "learning_rate": 9.38021500118138e-06, "loss": 0.0111, "step": 57830 }, { "epoch": 0.48840007599586244, "grad_norm": 0.4978334307670593, "learning_rate": 9.37985960486668e-06, "loss": 0.013, "step": 57840 }, { "epoch": 0.4884845158430263, "grad_norm": 0.24541686475276947, "learning_rate": 9.379504113422326e-06, "loss": 0.0138, "step": 57850 }, { "epoch": 0.4885689556901902, "grad_norm": 0.16400575637817383, "learning_rate": 9.379148526856041e-06, "loss": 0.0169, "step": 57860 }, { "epoch": 0.4886533955373541, "grad_norm": 0.43182969093322754, "learning_rate": 9.378792845175547e-06, "loss": 0.0194, "step": 57870 }, { "epoch": 0.4887378353845179, "grad_norm": 0.1887195110321045, "learning_rate": 9.378437068388569e-06, "loss": 0.0217, "step": 57880 }, { "epoch": 0.4888222752316818, "grad_norm": 0.41803228855133057, "learning_rate": 9.378081196502837e-06, "loss": 0.0111, "step": 57890 }, { "epoch": 0.4889067150788457, "grad_norm": 0.4226348102092743, "learning_rate": 9.377725229526076e-06, "loss": 0.0132, "step": 57900 }, { "epoch": 0.4889911549260096, "grad_norm": 0.0028802345041185617, "learning_rate": 9.377369167466021e-06, "loss": 0.02, "step": 57910 }, { "epoch": 0.48907559477317347, "grad_norm": 0.17749391496181488, "learning_rate": 9.377013010330406e-06, "loss": 0.0119, "step": 57920 }, { "epoch": 0.48916003462033736, "grad_norm": 0.3119463324546814, "learning_rate": 9.376656758126966e-06, "loss": 0.012, "step": 57930 }, { "epoch": 0.4892444744675012, "grad_norm": 0.33621200919151306, "learning_rate": 9.376300410863437e-06, "loss": 0.0154, "step": 57940 }, { "epoch": 0.4893289143146651, "grad_norm": 0.1862599402666092, "learning_rate": 9.375943968547559e-06, "loss": 0.013, "step": 57950 }, { "epoch": 0.48941335416182896, "grad_norm": 0.38412991166114807, "learning_rate": 9.375587431187073e-06, "loss": 0.0151, "step": 57960 }, { "epoch": 0.48949779400899285, "grad_norm": 0.8839970827102661, "learning_rate": 9.375230798789728e-06, "loss": 0.0269, "step": 57970 }, { "epoch": 0.48958223385615673, "grad_norm": 0.485187292098999, "learning_rate": 9.374874071363266e-06, "loss": 0.0091, "step": 57980 }, { "epoch": 0.4896666737033206, "grad_norm": 0.15336070954799652, "learning_rate": 9.374517248915436e-06, "loss": 0.0219, "step": 57990 }, { "epoch": 0.48975111355048445, "grad_norm": 0.35870999097824097, "learning_rate": 9.374160331453985e-06, "loss": 0.0126, "step": 58000 }, { "epoch": 0.48983555339764834, "grad_norm": 0.36501792073249817, "learning_rate": 9.37380331898667e-06, "loss": 0.0124, "step": 58010 }, { "epoch": 0.4899199932448122, "grad_norm": 0.05940594524145126, "learning_rate": 9.373446211521244e-06, "loss": 0.0087, "step": 58020 }, { "epoch": 0.4900044330919761, "grad_norm": 0.1839822381734848, "learning_rate": 9.373089009065462e-06, "loss": 0.0137, "step": 58030 }, { "epoch": 0.49008887293914, "grad_norm": 0.4808032214641571, "learning_rate": 9.372731711627081e-06, "loss": 0.0146, "step": 58040 }, { "epoch": 0.4901733127863039, "grad_norm": 0.37918198108673096, "learning_rate": 9.372374319213866e-06, "loss": 0.0164, "step": 58050 }, { "epoch": 0.4902577526334677, "grad_norm": 0.5295453071594238, "learning_rate": 9.372016831833574e-06, "loss": 0.0131, "step": 58060 }, { "epoch": 0.4903421924806316, "grad_norm": 0.20431488752365112, "learning_rate": 9.371659249493975e-06, "loss": 0.0077, "step": 58070 }, { "epoch": 0.4904266323277955, "grad_norm": 0.4357214868068695, "learning_rate": 9.371301572202832e-06, "loss": 0.0124, "step": 58080 }, { "epoch": 0.4905110721749594, "grad_norm": 0.5218549966812134, "learning_rate": 9.370943799967914e-06, "loss": 0.0124, "step": 58090 }, { "epoch": 0.49059551202212326, "grad_norm": 0.4477502703666687, "learning_rate": 9.370585932796994e-06, "loss": 0.0109, "step": 58100 }, { "epoch": 0.4906799518692871, "grad_norm": 0.25940847396850586, "learning_rate": 9.370227970697841e-06, "loss": 0.0177, "step": 58110 }, { "epoch": 0.490764391716451, "grad_norm": 0.2689698040485382, "learning_rate": 9.369869913678233e-06, "loss": 0.0121, "step": 58120 }, { "epoch": 0.49084883156361486, "grad_norm": 0.58743816614151, "learning_rate": 9.369511761745946e-06, "loss": 0.0126, "step": 58130 }, { "epoch": 0.49093327141077875, "grad_norm": 0.2885567247867584, "learning_rate": 9.369153514908757e-06, "loss": 0.0167, "step": 58140 }, { "epoch": 0.49101771125794264, "grad_norm": 0.40274539589881897, "learning_rate": 9.368795173174452e-06, "loss": 0.0134, "step": 58150 }, { "epoch": 0.4911021511051065, "grad_norm": 0.38842281699180603, "learning_rate": 9.36843673655081e-06, "loss": 0.0131, "step": 58160 }, { "epoch": 0.49118659095227035, "grad_norm": 0.32670778036117554, "learning_rate": 9.368078205045617e-06, "loss": 0.0102, "step": 58170 }, { "epoch": 0.49127103079943424, "grad_norm": 0.5911470055580139, "learning_rate": 9.36771957866666e-06, "loss": 0.013, "step": 58180 }, { "epoch": 0.4913554706465981, "grad_norm": 0.8521972298622131, "learning_rate": 9.36736085742173e-06, "loss": 0.0106, "step": 58190 }, { "epoch": 0.491439910493762, "grad_norm": 0.1592445969581604, "learning_rate": 9.367002041318615e-06, "loss": 0.0108, "step": 58200 }, { "epoch": 0.4915243503409259, "grad_norm": 0.3239706754684448, "learning_rate": 9.366643130365113e-06, "loss": 0.0174, "step": 58210 }, { "epoch": 0.4916087901880898, "grad_norm": 0.256876677274704, "learning_rate": 9.366284124569016e-06, "loss": 0.0185, "step": 58220 }, { "epoch": 0.4916932300352536, "grad_norm": 0.13664455711841583, "learning_rate": 9.365925023938123e-06, "loss": 0.0094, "step": 58230 }, { "epoch": 0.4917776698824175, "grad_norm": 0.26936548948287964, "learning_rate": 9.365565828480232e-06, "loss": 0.0131, "step": 58240 }, { "epoch": 0.4918621097295814, "grad_norm": 0.2953247129917145, "learning_rate": 9.365206538203147e-06, "loss": 0.0119, "step": 58250 }, { "epoch": 0.4919465495767453, "grad_norm": 0.19747498631477356, "learning_rate": 9.36484715311467e-06, "loss": 0.0105, "step": 58260 }, { "epoch": 0.49203098942390916, "grad_norm": 0.17578451335430145, "learning_rate": 9.364487673222607e-06, "loss": 0.0112, "step": 58270 }, { "epoch": 0.49211542927107305, "grad_norm": 0.1952677220106125, "learning_rate": 9.364128098534767e-06, "loss": 0.0117, "step": 58280 }, { "epoch": 0.4921998691182369, "grad_norm": 0.17965248227119446, "learning_rate": 9.36376842905896e-06, "loss": 0.0146, "step": 58290 }, { "epoch": 0.49228430896540076, "grad_norm": 0.48143914341926575, "learning_rate": 9.363408664802993e-06, "loss": 0.0103, "step": 58300 }, { "epoch": 0.49236874881256465, "grad_norm": 0.494131863117218, "learning_rate": 9.363048805774687e-06, "loss": 0.0156, "step": 58310 }, { "epoch": 0.49245318865972854, "grad_norm": 0.2578110992908478, "learning_rate": 9.362688851981854e-06, "loss": 0.0144, "step": 58320 }, { "epoch": 0.4925376285068924, "grad_norm": 0.26161062717437744, "learning_rate": 9.362328803432314e-06, "loss": 0.0097, "step": 58330 }, { "epoch": 0.49262206835405625, "grad_norm": 0.3277572989463806, "learning_rate": 9.361968660133886e-06, "loss": 0.0154, "step": 58340 }, { "epoch": 0.49270650820122014, "grad_norm": 0.26992565393447876, "learning_rate": 9.361608422094392e-06, "loss": 0.0108, "step": 58350 }, { "epoch": 0.492790948048384, "grad_norm": 0.4180564880371094, "learning_rate": 9.361248089321658e-06, "loss": 0.0186, "step": 58360 }, { "epoch": 0.4928753878955479, "grad_norm": 0.7033438086509705, "learning_rate": 9.360887661823508e-06, "loss": 0.0276, "step": 58370 }, { "epoch": 0.4929598277427118, "grad_norm": 0.2673768997192383, "learning_rate": 9.36052713960777e-06, "loss": 0.0086, "step": 58380 }, { "epoch": 0.4930442675898757, "grad_norm": 0.4399992823600769, "learning_rate": 9.360166522682281e-06, "loss": 0.0182, "step": 58390 }, { "epoch": 0.4931287074370395, "grad_norm": 1.0807207822799683, "learning_rate": 9.359805811054865e-06, "loss": 0.0121, "step": 58400 }, { "epoch": 0.4932131472842034, "grad_norm": 0.27538514137268066, "learning_rate": 9.359445004733361e-06, "loss": 0.0155, "step": 58410 }, { "epoch": 0.4932975871313673, "grad_norm": 0.35022062063217163, "learning_rate": 9.359084103725604e-06, "loss": 0.0103, "step": 58420 }, { "epoch": 0.4933820269785312, "grad_norm": 0.47701629996299744, "learning_rate": 9.358723108039434e-06, "loss": 0.0159, "step": 58430 }, { "epoch": 0.49346646682569506, "grad_norm": 0.3283310830593109, "learning_rate": 9.358362017682691e-06, "loss": 0.0161, "step": 58440 }, { "epoch": 0.49355090667285895, "grad_norm": 0.19397754967212677, "learning_rate": 9.358000832663219e-06, "loss": 0.0145, "step": 58450 }, { "epoch": 0.4936353465200228, "grad_norm": 0.35550403594970703, "learning_rate": 9.357639552988861e-06, "loss": 0.0131, "step": 58460 }, { "epoch": 0.49371978636718666, "grad_norm": 0.0979933962225914, "learning_rate": 9.357278178667466e-06, "loss": 0.0204, "step": 58470 }, { "epoch": 0.49380422621435055, "grad_norm": 0.4989265501499176, "learning_rate": 9.356916709706883e-06, "loss": 0.0177, "step": 58480 }, { "epoch": 0.49388866606151444, "grad_norm": 0.29322853684425354, "learning_rate": 9.356555146114959e-06, "loss": 0.0098, "step": 58490 }, { "epoch": 0.4939731059086783, "grad_norm": 0.3089917004108429, "learning_rate": 9.356193487899552e-06, "loss": 0.0141, "step": 58500 }, { "epoch": 0.4940575457558422, "grad_norm": 0.23133812844753265, "learning_rate": 9.355831735068513e-06, "loss": 0.0189, "step": 58510 }, { "epoch": 0.49414198560300604, "grad_norm": 0.23487624526023865, "learning_rate": 9.355469887629704e-06, "loss": 0.0159, "step": 58520 }, { "epoch": 0.4942264254501699, "grad_norm": 0.667039155960083, "learning_rate": 9.35510794559098e-06, "loss": 0.0178, "step": 58530 }, { "epoch": 0.4943108652973338, "grad_norm": 0.33243250846862793, "learning_rate": 9.354745908960204e-06, "loss": 0.0071, "step": 58540 }, { "epoch": 0.4943953051444977, "grad_norm": 0.5210553407669067, "learning_rate": 9.354383777745239e-06, "loss": 0.0127, "step": 58550 }, { "epoch": 0.4944797449916616, "grad_norm": 0.872498631477356, "learning_rate": 9.354021551953951e-06, "loss": 0.0135, "step": 58560 }, { "epoch": 0.4945641848388254, "grad_norm": 0.49401259422302246, "learning_rate": 9.353659231594206e-06, "loss": 0.0153, "step": 58570 }, { "epoch": 0.4946486246859893, "grad_norm": 0.2942926585674286, "learning_rate": 9.353296816673875e-06, "loss": 0.0125, "step": 58580 }, { "epoch": 0.4947330645331532, "grad_norm": 0.17795313894748688, "learning_rate": 9.352934307200831e-06, "loss": 0.0136, "step": 58590 }, { "epoch": 0.4948175043803171, "grad_norm": 0.2936469316482544, "learning_rate": 9.352571703182944e-06, "loss": 0.0222, "step": 58600 }, { "epoch": 0.49490194422748096, "grad_norm": 0.3030269742012024, "learning_rate": 9.352209004628093e-06, "loss": 0.0146, "step": 58610 }, { "epoch": 0.49498638407464485, "grad_norm": 0.4166540801525116, "learning_rate": 9.351846211544153e-06, "loss": 0.0073, "step": 58620 }, { "epoch": 0.4950708239218087, "grad_norm": 0.07718124240636826, "learning_rate": 9.351483323939006e-06, "loss": 0.0083, "step": 58630 }, { "epoch": 0.49515526376897256, "grad_norm": 0.5581969618797302, "learning_rate": 9.351120341820533e-06, "loss": 0.019, "step": 58640 }, { "epoch": 0.49523970361613645, "grad_norm": 0.45851024985313416, "learning_rate": 9.350757265196616e-06, "loss": 0.021, "step": 58650 }, { "epoch": 0.49532414346330034, "grad_norm": 0.053420357406139374, "learning_rate": 9.350394094075145e-06, "loss": 0.0077, "step": 58660 }, { "epoch": 0.4954085833104642, "grad_norm": 0.29589495062828064, "learning_rate": 9.350030828464005e-06, "loss": 0.0152, "step": 58670 }, { "epoch": 0.4954930231576281, "grad_norm": 0.30333322286605835, "learning_rate": 9.349667468371086e-06, "loss": 0.0177, "step": 58680 }, { "epoch": 0.49557746300479194, "grad_norm": 0.8084084391593933, "learning_rate": 9.349304013804282e-06, "loss": 0.0092, "step": 58690 }, { "epoch": 0.4956619028519558, "grad_norm": 0.39720848202705383, "learning_rate": 9.348940464771485e-06, "loss": 0.0142, "step": 58700 }, { "epoch": 0.4957463426991197, "grad_norm": 0.4768680930137634, "learning_rate": 9.348576821280593e-06, "loss": 0.0086, "step": 58710 }, { "epoch": 0.4958307825462836, "grad_norm": 0.581877589225769, "learning_rate": 9.348213083339504e-06, "loss": 0.0146, "step": 58720 }, { "epoch": 0.4959152223934475, "grad_norm": 2.2437992095947266, "learning_rate": 9.347849250956117e-06, "loss": 0.0203, "step": 58730 }, { "epoch": 0.49599966224061137, "grad_norm": 0.6642377972602844, "learning_rate": 9.347485324138335e-06, "loss": 0.0126, "step": 58740 }, { "epoch": 0.4960841020877752, "grad_norm": 0.44516968727111816, "learning_rate": 9.347121302894064e-06, "loss": 0.022, "step": 58750 }, { "epoch": 0.4961685419349391, "grad_norm": 1.1658284664154053, "learning_rate": 9.346757187231208e-06, "loss": 0.0099, "step": 58760 }, { "epoch": 0.496252981782103, "grad_norm": 0.21677492558956146, "learning_rate": 9.346392977157676e-06, "loss": 0.0126, "step": 58770 }, { "epoch": 0.49633742162926686, "grad_norm": 0.5091487169265747, "learning_rate": 9.346028672681383e-06, "loss": 0.0151, "step": 58780 }, { "epoch": 0.49642186147643075, "grad_norm": 0.2201891392469406, "learning_rate": 9.345664273810233e-06, "loss": 0.0093, "step": 58790 }, { "epoch": 0.4965063013235946, "grad_norm": 0.5414146184921265, "learning_rate": 9.34529978055215e-06, "loss": 0.0136, "step": 58800 }, { "epoch": 0.49659074117075847, "grad_norm": 0.07467939704656601, "learning_rate": 9.344935192915042e-06, "loss": 0.0219, "step": 58810 }, { "epoch": 0.49667518101792235, "grad_norm": 0.35591214895248413, "learning_rate": 9.344570510906833e-06, "loss": 0.0148, "step": 58820 }, { "epoch": 0.49675962086508624, "grad_norm": 0.167350634932518, "learning_rate": 9.344205734535443e-06, "loss": 0.0107, "step": 58830 }, { "epoch": 0.4968440607122501, "grad_norm": 0.3369799554347992, "learning_rate": 9.343840863808795e-06, "loss": 0.0153, "step": 58840 }, { "epoch": 0.496928500559414, "grad_norm": 0.27097418904304504, "learning_rate": 9.343475898734815e-06, "loss": 0.0094, "step": 58850 }, { "epoch": 0.49701294040657784, "grad_norm": 0.19064873456954956, "learning_rate": 9.343110839321425e-06, "loss": 0.0123, "step": 58860 }, { "epoch": 0.4970973802537417, "grad_norm": 0.4895366430282593, "learning_rate": 9.342745685576561e-06, "loss": 0.0107, "step": 58870 }, { "epoch": 0.4971818201009056, "grad_norm": 0.18077003955841064, "learning_rate": 9.34238043750815e-06, "loss": 0.0093, "step": 58880 }, { "epoch": 0.4972662599480695, "grad_norm": 0.2390676736831665, "learning_rate": 9.342015095124123e-06, "loss": 0.0163, "step": 58890 }, { "epoch": 0.4973506997952334, "grad_norm": 0.42432260513305664, "learning_rate": 9.341649658432421e-06, "loss": 0.0102, "step": 58900 }, { "epoch": 0.4974351396423973, "grad_norm": 0.3393155038356781, "learning_rate": 9.341284127440977e-06, "loss": 0.008, "step": 58910 }, { "epoch": 0.4975195794895611, "grad_norm": 0.2220505326986313, "learning_rate": 9.340918502157731e-06, "loss": 0.0141, "step": 58920 }, { "epoch": 0.497604019336725, "grad_norm": 0.6508219242095947, "learning_rate": 9.340552782590627e-06, "loss": 0.0176, "step": 58930 }, { "epoch": 0.4976884591838889, "grad_norm": 0.37836647033691406, "learning_rate": 9.340186968747602e-06, "loss": 0.0158, "step": 58940 }, { "epoch": 0.49777289903105276, "grad_norm": 0.5209463238716125, "learning_rate": 9.33982106063661e-06, "loss": 0.0131, "step": 58950 }, { "epoch": 0.49785733887821665, "grad_norm": 0.2285228967666626, "learning_rate": 9.33945505826559e-06, "loss": 0.0189, "step": 58960 }, { "epoch": 0.4979417787253805, "grad_norm": 0.16559989750385284, "learning_rate": 9.339088961642498e-06, "loss": 0.0134, "step": 58970 }, { "epoch": 0.49802621857254437, "grad_norm": 0.632526159286499, "learning_rate": 9.33872277077528e-06, "loss": 0.0121, "step": 58980 }, { "epoch": 0.49811065841970825, "grad_norm": 0.33050358295440674, "learning_rate": 9.338356485671895e-06, "loss": 0.0199, "step": 58990 }, { "epoch": 0.49819509826687214, "grad_norm": 0.1241118311882019, "learning_rate": 9.337990106340296e-06, "loss": 0.0075, "step": 59000 }, { "epoch": 0.498279538114036, "grad_norm": 0.352522075176239, "learning_rate": 9.33762363278844e-06, "loss": 0.0138, "step": 59010 }, { "epoch": 0.4983639779611999, "grad_norm": 0.6779357194900513, "learning_rate": 9.337257065024287e-06, "loss": 0.014, "step": 59020 }, { "epoch": 0.49844841780836374, "grad_norm": 0.12957985699176788, "learning_rate": 9.336890403055801e-06, "loss": 0.0125, "step": 59030 }, { "epoch": 0.49853285765552763, "grad_norm": 0.24195140600204468, "learning_rate": 9.336523646890943e-06, "loss": 0.0189, "step": 59040 }, { "epoch": 0.4986172975026915, "grad_norm": 0.3343936800956726, "learning_rate": 9.33615679653768e-06, "loss": 0.0139, "step": 59050 }, { "epoch": 0.4987017373498554, "grad_norm": 1.813414454460144, "learning_rate": 9.33578985200398e-06, "loss": 0.013, "step": 59060 }, { "epoch": 0.4987861771970193, "grad_norm": 0.31661906838417053, "learning_rate": 9.335422813297813e-06, "loss": 0.0103, "step": 59070 }, { "epoch": 0.4988706170441832, "grad_norm": 0.4341624975204468, "learning_rate": 9.335055680427152e-06, "loss": 0.0111, "step": 59080 }, { "epoch": 0.498955056891347, "grad_norm": 0.25066274404525757, "learning_rate": 9.334688453399968e-06, "loss": 0.0114, "step": 59090 }, { "epoch": 0.4990394967385109, "grad_norm": 0.3418809175491333, "learning_rate": 9.33432113222424e-06, "loss": 0.0121, "step": 59100 }, { "epoch": 0.4991239365856748, "grad_norm": 0.3818916976451874, "learning_rate": 9.333953716907945e-06, "loss": 0.018, "step": 59110 }, { "epoch": 0.49920837643283866, "grad_norm": 0.27632585167884827, "learning_rate": 9.333586207459065e-06, "loss": 0.0143, "step": 59120 }, { "epoch": 0.49929281628000255, "grad_norm": 0.27120697498321533, "learning_rate": 9.333218603885579e-06, "loss": 0.0252, "step": 59130 }, { "epoch": 0.49937725612716644, "grad_norm": 0.5938481688499451, "learning_rate": 9.332850906195473e-06, "loss": 0.0187, "step": 59140 }, { "epoch": 0.49946169597433027, "grad_norm": 0.20943734049797058, "learning_rate": 9.332483114396734e-06, "loss": 0.0111, "step": 59150 }, { "epoch": 0.49954613582149415, "grad_norm": 0.5051679611206055, "learning_rate": 9.332115228497347e-06, "loss": 0.013, "step": 59160 }, { "epoch": 0.49963057566865804, "grad_norm": 0.9648111462593079, "learning_rate": 9.331747248505307e-06, "loss": 0.0063, "step": 59170 }, { "epoch": 0.4997150155158219, "grad_norm": 0.589751660823822, "learning_rate": 9.331379174428605e-06, "loss": 0.0145, "step": 59180 }, { "epoch": 0.4997994553629858, "grad_norm": 0.23927262425422668, "learning_rate": 9.331011006275234e-06, "loss": 0.0093, "step": 59190 }, { "epoch": 0.49988389521014964, "grad_norm": 0.5004435777664185, "learning_rate": 9.33064274405319e-06, "loss": 0.017, "step": 59200 }, { "epoch": 0.49996833505731353, "grad_norm": 0.1324840784072876, "learning_rate": 9.330274387770477e-06, "loss": 0.0089, "step": 59210 }, { "epoch": 0.5000527749044774, "grad_norm": 0.37180736660957336, "learning_rate": 9.329905937435089e-06, "loss": 0.0142, "step": 59220 }, { "epoch": 0.5001372147516413, "grad_norm": 0.5768831968307495, "learning_rate": 9.329537393055032e-06, "loss": 0.0099, "step": 59230 }, { "epoch": 0.5002216545988052, "grad_norm": 0.41559895873069763, "learning_rate": 9.32916875463831e-06, "loss": 0.0102, "step": 59240 }, { "epoch": 0.5003060944459691, "grad_norm": 0.14857548475265503, "learning_rate": 9.328800022192929e-06, "loss": 0.01, "step": 59250 }, { "epoch": 0.500390534293133, "grad_norm": 0.14517620205879211, "learning_rate": 9.328431195726898e-06, "loss": 0.0084, "step": 59260 }, { "epoch": 0.5004749741402968, "grad_norm": 0.27842170000076294, "learning_rate": 9.32806227524823e-06, "loss": 0.0117, "step": 59270 }, { "epoch": 0.5005594139874607, "grad_norm": 0.1288267821073532, "learning_rate": 9.327693260764937e-06, "loss": 0.0143, "step": 59280 }, { "epoch": 0.5006438538346245, "grad_norm": 0.41420745849609375, "learning_rate": 9.327324152285032e-06, "loss": 0.0158, "step": 59290 }, { "epoch": 0.5007282936817884, "grad_norm": 0.7814791202545166, "learning_rate": 9.326954949816531e-06, "loss": 0.0155, "step": 59300 }, { "epoch": 0.5008127335289523, "grad_norm": 0.18607178330421448, "learning_rate": 9.326585653367458e-06, "loss": 0.0068, "step": 59310 }, { "epoch": 0.5008971733761162, "grad_norm": 0.272402286529541, "learning_rate": 9.326216262945831e-06, "loss": 0.0209, "step": 59320 }, { "epoch": 0.50098161322328, "grad_norm": 0.36912626028060913, "learning_rate": 9.325846778559673e-06, "loss": 0.0098, "step": 59330 }, { "epoch": 0.5010660530704439, "grad_norm": 1.0902202129364014, "learning_rate": 9.32547720021701e-06, "loss": 0.0155, "step": 59340 }, { "epoch": 0.5011504929176078, "grad_norm": 0.4665512144565582, "learning_rate": 9.325107527925867e-06, "loss": 0.0119, "step": 59350 }, { "epoch": 0.5012349327647717, "grad_norm": 0.3540622889995575, "learning_rate": 9.324737761694276e-06, "loss": 0.0173, "step": 59360 }, { "epoch": 0.5013193726119356, "grad_norm": 0.5764462351799011, "learning_rate": 9.324367901530268e-06, "loss": 0.0151, "step": 59370 }, { "epoch": 0.5014038124590995, "grad_norm": 0.5822502374649048, "learning_rate": 9.323997947441872e-06, "loss": 0.0158, "step": 59380 }, { "epoch": 0.5014882523062634, "grad_norm": 0.43657705187797546, "learning_rate": 9.323627899437128e-06, "loss": 0.0279, "step": 59390 }, { "epoch": 0.5015726921534273, "grad_norm": 0.29492196440696716, "learning_rate": 9.323257757524074e-06, "loss": 0.0131, "step": 59400 }, { "epoch": 0.501657132000591, "grad_norm": 1.0787807703018188, "learning_rate": 9.322887521710745e-06, "loss": 0.0197, "step": 59410 }, { "epoch": 0.5017415718477549, "grad_norm": 0.7408972978591919, "learning_rate": 9.322517192005185e-06, "loss": 0.0085, "step": 59420 }, { "epoch": 0.5018260116949188, "grad_norm": 0.6709010004997253, "learning_rate": 9.32214676841544e-06, "loss": 0.0131, "step": 59430 }, { "epoch": 0.5019104515420827, "grad_norm": 0.3451274037361145, "learning_rate": 9.32177625094955e-06, "loss": 0.0139, "step": 59440 }, { "epoch": 0.5019948913892466, "grad_norm": 0.3798070251941681, "learning_rate": 9.321405639615567e-06, "loss": 0.0092, "step": 59450 }, { "epoch": 0.5020793312364105, "grad_norm": 0.4035240709781647, "learning_rate": 9.321034934421539e-06, "loss": 0.0124, "step": 59460 }, { "epoch": 0.5021637710835744, "grad_norm": 0.3825409710407257, "learning_rate": 9.320664135375516e-06, "loss": 0.0123, "step": 59470 }, { "epoch": 0.5022482109307382, "grad_norm": 0.22752805054187775, "learning_rate": 9.320293242485557e-06, "loss": 0.016, "step": 59480 }, { "epoch": 0.5023326507779021, "grad_norm": 0.38370999693870544, "learning_rate": 9.31992225575971e-06, "loss": 0.0117, "step": 59490 }, { "epoch": 0.502417090625066, "grad_norm": 0.3584609925746918, "learning_rate": 9.319551175206038e-06, "loss": 0.009, "step": 59500 }, { "epoch": 0.5025015304722299, "grad_norm": 0.29216569662094116, "learning_rate": 9.319180000832602e-06, "loss": 0.0165, "step": 59510 }, { "epoch": 0.5025859703193937, "grad_norm": 0.8831316828727722, "learning_rate": 9.31880873264746e-06, "loss": 0.0183, "step": 59520 }, { "epoch": 0.5026704101665576, "grad_norm": 0.5490566492080688, "learning_rate": 9.318437370658677e-06, "loss": 0.0206, "step": 59530 }, { "epoch": 0.5027548500137214, "grad_norm": 0.019582664594054222, "learning_rate": 9.318065914874319e-06, "loss": 0.0153, "step": 59540 }, { "epoch": 0.5028392898608853, "grad_norm": 0.40520989894866943, "learning_rate": 9.317694365302455e-06, "loss": 0.0106, "step": 59550 }, { "epoch": 0.5029237297080492, "grad_norm": 0.2753881514072418, "learning_rate": 9.317322721951153e-06, "loss": 0.0109, "step": 59560 }, { "epoch": 0.5030081695552131, "grad_norm": 0.6229685544967651, "learning_rate": 9.316950984828486e-06, "loss": 0.0198, "step": 59570 }, { "epoch": 0.503092609402377, "grad_norm": 0.4365091919898987, "learning_rate": 9.31657915394253e-06, "loss": 0.0097, "step": 59580 }, { "epoch": 0.5031770492495409, "grad_norm": 0.4006112217903137, "learning_rate": 9.316207229301358e-06, "loss": 0.0281, "step": 59590 }, { "epoch": 0.5032614890967048, "grad_norm": 0.5075029730796814, "learning_rate": 9.31583521091305e-06, "loss": 0.0149, "step": 59600 }, { "epoch": 0.5033459289438686, "grad_norm": 0.19416025280952454, "learning_rate": 9.315463098785686e-06, "loss": 0.0194, "step": 59610 }, { "epoch": 0.5034303687910325, "grad_norm": 0.3900161385536194, "learning_rate": 9.315090892927348e-06, "loss": 0.0146, "step": 59620 }, { "epoch": 0.5035148086381963, "grad_norm": 0.35730457305908203, "learning_rate": 9.314718593346119e-06, "loss": 0.0138, "step": 59630 }, { "epoch": 0.5035992484853602, "grad_norm": 0.35932180285453796, "learning_rate": 9.314346200050086e-06, "loss": 0.0124, "step": 59640 }, { "epoch": 0.5036836883325241, "grad_norm": 0.28614482283592224, "learning_rate": 9.313973713047338e-06, "loss": 0.0108, "step": 59650 }, { "epoch": 0.503768128179688, "grad_norm": 0.32102084159851074, "learning_rate": 9.313601132345967e-06, "loss": 0.0105, "step": 59660 }, { "epoch": 0.5038525680268519, "grad_norm": 0.1801314502954483, "learning_rate": 9.31322845795406e-06, "loss": 0.0145, "step": 59670 }, { "epoch": 0.5039370078740157, "grad_norm": 0.4635794460773468, "learning_rate": 9.312855689879716e-06, "loss": 0.0086, "step": 59680 }, { "epoch": 0.5040214477211796, "grad_norm": 0.6182215809822083, "learning_rate": 9.312482828131031e-06, "loss": 0.0121, "step": 59690 }, { "epoch": 0.5041058875683435, "grad_norm": 0.22753487527370453, "learning_rate": 9.312109872716102e-06, "loss": 0.0055, "step": 59700 }, { "epoch": 0.5041903274155074, "grad_norm": 0.35091063380241394, "learning_rate": 9.31173682364303e-06, "loss": 0.0172, "step": 59710 }, { "epoch": 0.5042747672626713, "grad_norm": 0.47739920020103455, "learning_rate": 9.31136368091992e-06, "loss": 0.019, "step": 59720 }, { "epoch": 0.5043592071098352, "grad_norm": 0.18950343132019043, "learning_rate": 9.310990444554872e-06, "loss": 0.0119, "step": 59730 }, { "epoch": 0.5044436469569991, "grad_norm": 0.01094734575599432, "learning_rate": 9.310617114555995e-06, "loss": 0.0158, "step": 59740 }, { "epoch": 0.5045280868041628, "grad_norm": 0.1671818047761917, "learning_rate": 9.310243690931398e-06, "loss": 0.012, "step": 59750 }, { "epoch": 0.5046125266513267, "grad_norm": 0.9132121205329895, "learning_rate": 9.309870173689191e-06, "loss": 0.0172, "step": 59760 }, { "epoch": 0.5046969664984906, "grad_norm": 0.432651549577713, "learning_rate": 9.309496562837488e-06, "loss": 0.0098, "step": 59770 }, { "epoch": 0.5047814063456545, "grad_norm": 0.443798691034317, "learning_rate": 9.309122858384402e-06, "loss": 0.0142, "step": 59780 }, { "epoch": 0.5048658461928184, "grad_norm": 0.05426539480686188, "learning_rate": 9.308749060338052e-06, "loss": 0.0101, "step": 59790 }, { "epoch": 0.5049502860399823, "grad_norm": 0.4124387502670288, "learning_rate": 9.308375168706554e-06, "loss": 0.0209, "step": 59800 }, { "epoch": 0.5050347258871462, "grad_norm": 0.30030420422554016, "learning_rate": 9.30800118349803e-06, "loss": 0.0127, "step": 59810 }, { "epoch": 0.50511916573431, "grad_norm": 0.14104528725147247, "learning_rate": 9.307627104720602e-06, "loss": 0.0092, "step": 59820 }, { "epoch": 0.5052036055814739, "grad_norm": 0.19184474647045135, "learning_rate": 9.307252932382398e-06, "loss": 0.014, "step": 59830 }, { "epoch": 0.5052880454286378, "grad_norm": 0.4972532391548157, "learning_rate": 9.306878666491543e-06, "loss": 0.0106, "step": 59840 }, { "epoch": 0.5053724852758017, "grad_norm": 1.2125619649887085, "learning_rate": 9.306504307056164e-06, "loss": 0.0166, "step": 59850 }, { "epoch": 0.5054569251229655, "grad_norm": 0.5243943333625793, "learning_rate": 9.306129854084396e-06, "loss": 0.0108, "step": 59860 }, { "epoch": 0.5055413649701294, "grad_norm": 0.4032428562641144, "learning_rate": 9.305755307584369e-06, "loss": 0.0186, "step": 59870 }, { "epoch": 0.5056258048172932, "grad_norm": 0.4121669828891754, "learning_rate": 9.305380667564219e-06, "loss": 0.0099, "step": 59880 }, { "epoch": 0.5057102446644571, "grad_norm": 0.2858012616634369, "learning_rate": 9.305005934032083e-06, "loss": 0.0216, "step": 59890 }, { "epoch": 0.505794684511621, "grad_norm": 0.3618719279766083, "learning_rate": 9.304631106996101e-06, "loss": 0.0104, "step": 59900 }, { "epoch": 0.5058791243587849, "grad_norm": 0.40926864743232727, "learning_rate": 9.304256186464413e-06, "loss": 0.0134, "step": 59910 }, { "epoch": 0.5059635642059488, "grad_norm": 0.19478383660316467, "learning_rate": 9.303881172445163e-06, "loss": 0.0176, "step": 59920 }, { "epoch": 0.5060480040531127, "grad_norm": 0.7351306676864624, "learning_rate": 9.303506064946495e-06, "loss": 0.023, "step": 59930 }, { "epoch": 0.5061324439002766, "grad_norm": 0.523544430732727, "learning_rate": 9.303130863976558e-06, "loss": 0.0097, "step": 59940 }, { "epoch": 0.5062168837474404, "grad_norm": 0.3020641505718231, "learning_rate": 9.3027555695435e-06, "loss": 0.0151, "step": 59950 }, { "epoch": 0.5063013235946043, "grad_norm": 0.4256478250026703, "learning_rate": 9.302380181655473e-06, "loss": 0.0101, "step": 59960 }, { "epoch": 0.5063857634417682, "grad_norm": 0.5107252597808838, "learning_rate": 9.30200470032063e-06, "loss": 0.0094, "step": 59970 }, { "epoch": 0.506470203288932, "grad_norm": 0.01939263381063938, "learning_rate": 9.301629125547127e-06, "loss": 0.0167, "step": 59980 }, { "epoch": 0.5065546431360959, "grad_norm": 0.41842105984687805, "learning_rate": 9.30125345734312e-06, "loss": 0.0116, "step": 59990 }, { "epoch": 0.5066390829832598, "grad_norm": 0.20479850471019745, "learning_rate": 9.30087769571677e-06, "loss": 0.0151, "step": 60000 }, { "epoch": 0.5067235228304237, "grad_norm": 0.3492632806301117, "learning_rate": 9.300501840676237e-06, "loss": 0.008, "step": 60010 }, { "epoch": 0.5068079626775875, "grad_norm": 0.4678933024406433, "learning_rate": 9.300125892229686e-06, "loss": 0.0096, "step": 60020 }, { "epoch": 0.5068924025247514, "grad_norm": 0.3548631966114044, "learning_rate": 9.299749850385283e-06, "loss": 0.0092, "step": 60030 }, { "epoch": 0.5069768423719153, "grad_norm": 0.7275392413139343, "learning_rate": 9.299373715151192e-06, "loss": 0.0081, "step": 60040 }, { "epoch": 0.5070612822190792, "grad_norm": 0.6604763269424438, "learning_rate": 9.298997486535586e-06, "loss": 0.0225, "step": 60050 }, { "epoch": 0.5071457220662431, "grad_norm": 0.35226574540138245, "learning_rate": 9.298621164546635e-06, "loss": 0.0102, "step": 60060 }, { "epoch": 0.507230161913407, "grad_norm": 0.2685427665710449, "learning_rate": 9.298244749192513e-06, "loss": 0.0234, "step": 60070 }, { "epoch": 0.5073146017605709, "grad_norm": 0.239597886800766, "learning_rate": 9.297868240481397e-06, "loss": 0.0125, "step": 60080 }, { "epoch": 0.5073990416077346, "grad_norm": 0.3522648811340332, "learning_rate": 9.297491638421462e-06, "loss": 0.0263, "step": 60090 }, { "epoch": 0.5074834814548985, "grad_norm": 0.31951087713241577, "learning_rate": 9.29711494302089e-06, "loss": 0.0067, "step": 60100 }, { "epoch": 0.5075679213020624, "grad_norm": 0.12429427355527878, "learning_rate": 9.296738154287864e-06, "loss": 0.01, "step": 60110 }, { "epoch": 0.5076523611492263, "grad_norm": 0.1868760734796524, "learning_rate": 9.296361272230563e-06, "loss": 0.016, "step": 60120 }, { "epoch": 0.5077368009963902, "grad_norm": 0.16911479830741882, "learning_rate": 9.295984296857177e-06, "loss": 0.0099, "step": 60130 }, { "epoch": 0.5078212408435541, "grad_norm": 0.36531516909599304, "learning_rate": 9.29560722817589e-06, "loss": 0.0089, "step": 60140 }, { "epoch": 0.507905680690718, "grad_norm": 0.4421771168708801, "learning_rate": 9.295230066194897e-06, "loss": 0.012, "step": 60150 }, { "epoch": 0.5079901205378818, "grad_norm": 0.6474315524101257, "learning_rate": 9.294852810922388e-06, "loss": 0.0153, "step": 60160 }, { "epoch": 0.5080745603850457, "grad_norm": 0.3449358344078064, "learning_rate": 9.294475462366555e-06, "loss": 0.0103, "step": 60170 }, { "epoch": 0.5081590002322096, "grad_norm": 0.3172045052051544, "learning_rate": 9.294098020535593e-06, "loss": 0.0102, "step": 60180 }, { "epoch": 0.5082434400793735, "grad_norm": 0.47367095947265625, "learning_rate": 9.293720485437703e-06, "loss": 0.0212, "step": 60190 }, { "epoch": 0.5083278799265374, "grad_norm": 0.8461731672286987, "learning_rate": 9.293342857081085e-06, "loss": 0.0112, "step": 60200 }, { "epoch": 0.5084123197737012, "grad_norm": 0.4139941334724426, "learning_rate": 9.29296513547394e-06, "loss": 0.0087, "step": 60210 }, { "epoch": 0.508496759620865, "grad_norm": 0.14319999516010284, "learning_rate": 9.29258732062447e-06, "loss": 0.0076, "step": 60220 }, { "epoch": 0.5085811994680289, "grad_norm": 0.33794155716896057, "learning_rate": 9.292209412540885e-06, "loss": 0.01, "step": 60230 }, { "epoch": 0.5086656393151928, "grad_norm": 0.13531403243541718, "learning_rate": 9.291831411231388e-06, "loss": 0.0205, "step": 60240 }, { "epoch": 0.5087500791623567, "grad_norm": 0.24598757922649384, "learning_rate": 9.291453316704195e-06, "loss": 0.0188, "step": 60250 }, { "epoch": 0.5088345190095206, "grad_norm": 0.29702994227409363, "learning_rate": 9.291075128967517e-06, "loss": 0.0119, "step": 60260 }, { "epoch": 0.5089189588566845, "grad_norm": 0.22276251018047333, "learning_rate": 9.290696848029563e-06, "loss": 0.0091, "step": 60270 }, { "epoch": 0.5090033987038484, "grad_norm": 0.15332475304603577, "learning_rate": 9.290318473898555e-06, "loss": 0.0201, "step": 60280 }, { "epoch": 0.5090878385510123, "grad_norm": 0.17321600019931793, "learning_rate": 9.289940006582706e-06, "loss": 0.0106, "step": 60290 }, { "epoch": 0.5091722783981761, "grad_norm": 0.2826169431209564, "learning_rate": 9.289561446090242e-06, "loss": 0.0127, "step": 60300 }, { "epoch": 0.50925671824534, "grad_norm": 0.20977073907852173, "learning_rate": 9.289182792429382e-06, "loss": 0.0128, "step": 60310 }, { "epoch": 0.5093411580925038, "grad_norm": 0.23861883580684662, "learning_rate": 9.288804045608351e-06, "loss": 0.0243, "step": 60320 }, { "epoch": 0.5094255979396677, "grad_norm": 0.4349658191204071, "learning_rate": 9.288425205635374e-06, "loss": 0.0128, "step": 60330 }, { "epoch": 0.5095100377868316, "grad_norm": 0.5359317064285278, "learning_rate": 9.28804627251868e-06, "loss": 0.0149, "step": 60340 }, { "epoch": 0.5095944776339955, "grad_norm": 0.6661383509635925, "learning_rate": 9.2876672462665e-06, "loss": 0.0213, "step": 60350 }, { "epoch": 0.5096789174811593, "grad_norm": 0.5652182102203369, "learning_rate": 9.287288126887066e-06, "loss": 0.0171, "step": 60360 }, { "epoch": 0.5097633573283232, "grad_norm": 0.1455634981393814, "learning_rate": 9.286908914388614e-06, "loss": 0.006, "step": 60370 }, { "epoch": 0.5098477971754871, "grad_norm": 0.479659765958786, "learning_rate": 9.286529608779379e-06, "loss": 0.0109, "step": 60380 }, { "epoch": 0.509932237022651, "grad_norm": 0.5907760262489319, "learning_rate": 9.286150210067595e-06, "loss": 0.0121, "step": 60390 }, { "epoch": 0.5100166768698149, "grad_norm": 0.3914528787136078, "learning_rate": 9.285770718261511e-06, "loss": 0.0138, "step": 60400 }, { "epoch": 0.5101011167169788, "grad_norm": 0.368365615606308, "learning_rate": 9.285391133369364e-06, "loss": 0.0135, "step": 60410 }, { "epoch": 0.5101855565641427, "grad_norm": 0.23549166321754456, "learning_rate": 9.2850114553994e-06, "loss": 0.0078, "step": 60420 }, { "epoch": 0.5102699964113065, "grad_norm": 0.2863317131996155, "learning_rate": 9.284631684359864e-06, "loss": 0.0131, "step": 60430 }, { "epoch": 0.5103544362584703, "grad_norm": 0.21995815634727478, "learning_rate": 9.284251820259008e-06, "loss": 0.0114, "step": 60440 }, { "epoch": 0.5104388761056342, "grad_norm": 0.1974690556526184, "learning_rate": 9.283871863105079e-06, "loss": 0.009, "step": 60450 }, { "epoch": 0.5105233159527981, "grad_norm": 0.20037008821964264, "learning_rate": 9.283491812906329e-06, "loss": 0.0116, "step": 60460 }, { "epoch": 0.510607755799962, "grad_norm": 0.5453402400016785, "learning_rate": 9.283111669671017e-06, "loss": 0.0126, "step": 60470 }, { "epoch": 0.5106921956471259, "grad_norm": 0.3387255072593689, "learning_rate": 9.282731433407398e-06, "loss": 0.0139, "step": 60480 }, { "epoch": 0.5107766354942898, "grad_norm": 0.3447425663471222, "learning_rate": 9.282351104123728e-06, "loss": 0.0071, "step": 60490 }, { "epoch": 0.5108610753414536, "grad_norm": 0.4575863778591156, "learning_rate": 9.28197068182827e-06, "loss": 0.0237, "step": 60500 }, { "epoch": 0.5109455151886175, "grad_norm": 0.9670538306236267, "learning_rate": 9.281590166529287e-06, "loss": 0.0136, "step": 60510 }, { "epoch": 0.5110299550357814, "grad_norm": 0.4302358329296112, "learning_rate": 9.281209558235043e-06, "loss": 0.0105, "step": 60520 }, { "epoch": 0.5111143948829453, "grad_norm": 0.3825385272502899, "learning_rate": 9.280828856953803e-06, "loss": 0.0185, "step": 60530 }, { "epoch": 0.5111988347301092, "grad_norm": 1.097088098526001, "learning_rate": 9.280448062693838e-06, "loss": 0.0202, "step": 60540 }, { "epoch": 0.511283274577273, "grad_norm": 0.7317948341369629, "learning_rate": 9.280067175463421e-06, "loss": 0.0158, "step": 60550 }, { "epoch": 0.5113677144244368, "grad_norm": 0.5048770308494568, "learning_rate": 9.279686195270819e-06, "loss": 0.0218, "step": 60560 }, { "epoch": 0.5114521542716007, "grad_norm": 0.24545493721961975, "learning_rate": 9.27930512212431e-06, "loss": 0.0103, "step": 60570 }, { "epoch": 0.5115365941187646, "grad_norm": 0.08504708111286163, "learning_rate": 9.278923956032172e-06, "loss": 0.0146, "step": 60580 }, { "epoch": 0.5116210339659285, "grad_norm": 0.26564037799835205, "learning_rate": 9.278542697002681e-06, "loss": 0.0136, "step": 60590 }, { "epoch": 0.5117054738130924, "grad_norm": 0.4320104122161865, "learning_rate": 9.278161345044121e-06, "loss": 0.015, "step": 60600 }, { "epoch": 0.5117899136602563, "grad_norm": 0.3535914719104767, "learning_rate": 9.277779900164773e-06, "loss": 0.0168, "step": 60610 }, { "epoch": 0.5118743535074202, "grad_norm": 0.38623273372650146, "learning_rate": 9.277398362372922e-06, "loss": 0.0144, "step": 60620 }, { "epoch": 0.511958793354584, "grad_norm": 0.5975171327590942, "learning_rate": 9.277016731676855e-06, "loss": 0.01, "step": 60630 }, { "epoch": 0.5120432332017479, "grad_norm": 0.22226209938526154, "learning_rate": 9.27663500808486e-06, "loss": 0.0169, "step": 60640 }, { "epoch": 0.5121276730489118, "grad_norm": 0.08350969105958939, "learning_rate": 9.276253191605232e-06, "loss": 0.0091, "step": 60650 }, { "epoch": 0.5122121128960757, "grad_norm": 0.3690885603427887, "learning_rate": 9.275871282246256e-06, "loss": 0.0169, "step": 60660 }, { "epoch": 0.5122965527432395, "grad_norm": 0.26927608251571655, "learning_rate": 9.275489280016237e-06, "loss": 0.0124, "step": 60670 }, { "epoch": 0.5123809925904034, "grad_norm": 0.32813388109207153, "learning_rate": 9.275107184923463e-06, "loss": 0.0149, "step": 60680 }, { "epoch": 0.5124654324375673, "grad_norm": 0.6072826385498047, "learning_rate": 9.27472499697624e-06, "loss": 0.0097, "step": 60690 }, { "epoch": 0.5125498722847311, "grad_norm": 0.39519011974334717, "learning_rate": 9.274342716182864e-06, "loss": 0.0195, "step": 60700 }, { "epoch": 0.512634312131895, "grad_norm": 0.4551677107810974, "learning_rate": 9.27396034255164e-06, "loss": 0.0238, "step": 60710 }, { "epoch": 0.5127187519790589, "grad_norm": 0.3473856747150421, "learning_rate": 9.273577876090876e-06, "loss": 0.0103, "step": 60720 }, { "epoch": 0.5128031918262228, "grad_norm": 0.6130092144012451, "learning_rate": 9.273195316808873e-06, "loss": 0.013, "step": 60730 }, { "epoch": 0.5128876316733867, "grad_norm": 0.22228612005710602, "learning_rate": 9.272812664713945e-06, "loss": 0.0093, "step": 60740 }, { "epoch": 0.5129720715205506, "grad_norm": 0.048904258757829666, "learning_rate": 9.272429919814401e-06, "loss": 0.0094, "step": 60750 }, { "epoch": 0.5130565113677145, "grad_norm": 0.18363235890865326, "learning_rate": 9.272047082118556e-06, "loss": 0.0132, "step": 60760 }, { "epoch": 0.5131409512148783, "grad_norm": 0.41299962997436523, "learning_rate": 9.271664151634721e-06, "loss": 0.0116, "step": 60770 }, { "epoch": 0.5132253910620421, "grad_norm": 0.4077122211456299, "learning_rate": 9.27128112837122e-06, "loss": 0.0119, "step": 60780 }, { "epoch": 0.513309830909206, "grad_norm": 0.36447829008102417, "learning_rate": 9.270898012336365e-06, "loss": 0.0199, "step": 60790 }, { "epoch": 0.5133942707563699, "grad_norm": 0.7054146528244019, "learning_rate": 9.27051480353848e-06, "loss": 0.0152, "step": 60800 }, { "epoch": 0.5134787106035338, "grad_norm": 0.2622877359390259, "learning_rate": 9.27013150198589e-06, "loss": 0.0104, "step": 60810 }, { "epoch": 0.5135631504506977, "grad_norm": 0.14878545701503754, "learning_rate": 9.269748107686917e-06, "loss": 0.009, "step": 60820 }, { "epoch": 0.5136475902978616, "grad_norm": 0.4576819837093353, "learning_rate": 9.269364620649893e-06, "loss": 0.0162, "step": 60830 }, { "epoch": 0.5137320301450254, "grad_norm": 0.1266372799873352, "learning_rate": 9.268981040883143e-06, "loss": 0.0168, "step": 60840 }, { "epoch": 0.5138164699921893, "grad_norm": 0.30849918723106384, "learning_rate": 9.268597368395e-06, "loss": 0.0153, "step": 60850 }, { "epoch": 0.5139009098393532, "grad_norm": 0.19799922406673431, "learning_rate": 9.268213603193796e-06, "loss": 0.0121, "step": 60860 }, { "epoch": 0.5139853496865171, "grad_norm": 0.5010849833488464, "learning_rate": 9.267829745287867e-06, "loss": 0.0164, "step": 60870 }, { "epoch": 0.514069789533681, "grad_norm": 1.3669406175613403, "learning_rate": 9.267445794685552e-06, "loss": 0.0222, "step": 60880 }, { "epoch": 0.5141542293808449, "grad_norm": 0.2899334728717804, "learning_rate": 9.267061751395187e-06, "loss": 0.0123, "step": 60890 }, { "epoch": 0.5142386692280086, "grad_norm": 0.5348419547080994, "learning_rate": 9.266677615425118e-06, "loss": 0.0173, "step": 60900 }, { "epoch": 0.5143231090751725, "grad_norm": 1.0257928371429443, "learning_rate": 9.266293386783683e-06, "loss": 0.0254, "step": 60910 }, { "epoch": 0.5144075489223364, "grad_norm": 1.1926630735397339, "learning_rate": 9.26590906547923e-06, "loss": 0.0171, "step": 60920 }, { "epoch": 0.5144919887695003, "grad_norm": 0.49309250712394714, "learning_rate": 9.265524651520106e-06, "loss": 0.0222, "step": 60930 }, { "epoch": 0.5145764286166642, "grad_norm": 0.22091828286647797, "learning_rate": 9.265140144914662e-06, "loss": 0.0117, "step": 60940 }, { "epoch": 0.5146608684638281, "grad_norm": 0.26074114441871643, "learning_rate": 9.264755545671247e-06, "loss": 0.0114, "step": 60950 }, { "epoch": 0.514745308310992, "grad_norm": 0.6474300622940063, "learning_rate": 9.264370853798217e-06, "loss": 0.0121, "step": 60960 }, { "epoch": 0.5148297481581559, "grad_norm": 0.32899048924446106, "learning_rate": 9.263986069303924e-06, "loss": 0.0132, "step": 60970 }, { "epoch": 0.5149141880053197, "grad_norm": 0.30232253670692444, "learning_rate": 9.263601192196727e-06, "loss": 0.0193, "step": 60980 }, { "epoch": 0.5149986278524836, "grad_norm": 0.29116836190223694, "learning_rate": 9.263216222484988e-06, "loss": 0.0119, "step": 60990 }, { "epoch": 0.5150830676996475, "grad_norm": 0.1346965730190277, "learning_rate": 9.262831160177065e-06, "loss": 0.0142, "step": 61000 }, { "epoch": 0.5150830676996475, "eval_loss": 0.012302111834287643, "eval_runtime": 2.9045, "eval_samples_per_second": 68.859, "eval_steps_per_second": 34.429, "step": 61000 }, { "epoch": 0.5151675075468113, "grad_norm": 0.4946332573890686, "learning_rate": 9.262446005281322e-06, "loss": 0.0151, "step": 61010 }, { "epoch": 0.5152519473939752, "grad_norm": 0.1580837219953537, "learning_rate": 9.262060757806127e-06, "loss": 0.0125, "step": 61020 }, { "epoch": 0.5153363872411391, "grad_norm": 0.7419902682304382, "learning_rate": 9.261675417759846e-06, "loss": 0.0208, "step": 61030 }, { "epoch": 0.515420827088303, "grad_norm": 0.45304250717163086, "learning_rate": 9.261289985150847e-06, "loss": 0.0109, "step": 61040 }, { "epoch": 0.5155052669354668, "grad_norm": 0.3478968143463135, "learning_rate": 9.260904459987502e-06, "loss": 0.0159, "step": 61050 }, { "epoch": 0.5155897067826307, "grad_norm": 0.2747814953327179, "learning_rate": 9.260518842278185e-06, "loss": 0.0125, "step": 61060 }, { "epoch": 0.5156741466297946, "grad_norm": 0.3282414376735687, "learning_rate": 9.260133132031274e-06, "loss": 0.0136, "step": 61070 }, { "epoch": 0.5157585864769585, "grad_norm": 0.22911639511585236, "learning_rate": 9.259747329255145e-06, "loss": 0.0165, "step": 61080 }, { "epoch": 0.5158430263241224, "grad_norm": 0.06229550391435623, "learning_rate": 9.259361433958174e-06, "loss": 0.0099, "step": 61090 }, { "epoch": 0.5159274661712863, "grad_norm": 0.3215469419956207, "learning_rate": 9.258975446148747e-06, "loss": 0.0124, "step": 61100 }, { "epoch": 0.5160119060184502, "grad_norm": 0.1039087101817131, "learning_rate": 9.258589365835244e-06, "loss": 0.0141, "step": 61110 }, { "epoch": 0.516096345865614, "grad_norm": 0.47741344571113586, "learning_rate": 9.258203193026056e-06, "loss": 0.0153, "step": 61120 }, { "epoch": 0.5161807857127778, "grad_norm": 0.25720933079719543, "learning_rate": 9.257816927729565e-06, "loss": 0.0128, "step": 61130 }, { "epoch": 0.5162652255599417, "grad_norm": 0.7898632287979126, "learning_rate": 9.257430569954165e-06, "loss": 0.0174, "step": 61140 }, { "epoch": 0.5163496654071056, "grad_norm": 0.8361085057258606, "learning_rate": 9.257044119708242e-06, "loss": 0.0224, "step": 61150 }, { "epoch": 0.5164341052542695, "grad_norm": 0.4231012761592865, "learning_rate": 9.256657577000195e-06, "loss": 0.017, "step": 61160 }, { "epoch": 0.5165185451014334, "grad_norm": 0.5162386894226074, "learning_rate": 9.256270941838419e-06, "loss": 0.0167, "step": 61170 }, { "epoch": 0.5166029849485972, "grad_norm": 0.2859729528427124, "learning_rate": 9.255884214231307e-06, "loss": 0.0193, "step": 61180 }, { "epoch": 0.5166874247957611, "grad_norm": 0.37198224663734436, "learning_rate": 9.255497394187265e-06, "loss": 0.0109, "step": 61190 }, { "epoch": 0.516771864642925, "grad_norm": 0.08568044006824493, "learning_rate": 9.25511048171469e-06, "loss": 0.0109, "step": 61200 }, { "epoch": 0.5168563044900889, "grad_norm": 0.3903314471244812, "learning_rate": 9.25472347682199e-06, "loss": 0.0287, "step": 61210 }, { "epoch": 0.5169407443372528, "grad_norm": 0.1692305952310562, "learning_rate": 9.254336379517566e-06, "loss": 0.0136, "step": 61220 }, { "epoch": 0.5170251841844167, "grad_norm": 0.81240314245224, "learning_rate": 9.253949189809826e-06, "loss": 0.0095, "step": 61230 }, { "epoch": 0.5171096240315805, "grad_norm": 0.4372849464416504, "learning_rate": 9.253561907707183e-06, "loss": 0.0164, "step": 61240 }, { "epoch": 0.5171940638787443, "grad_norm": 0.3780527114868164, "learning_rate": 9.253174533218047e-06, "loss": 0.0117, "step": 61250 }, { "epoch": 0.5172785037259082, "grad_norm": 0.2909637987613678, "learning_rate": 9.252787066350832e-06, "loss": 0.0182, "step": 61260 }, { "epoch": 0.5173629435730721, "grad_norm": 0.34020864963531494, "learning_rate": 9.252399507113952e-06, "loss": 0.0095, "step": 61270 }, { "epoch": 0.517447383420236, "grad_norm": 0.4224109649658203, "learning_rate": 9.252011855515827e-06, "loss": 0.0185, "step": 61280 }, { "epoch": 0.5175318232673999, "grad_norm": 0.39604732394218445, "learning_rate": 9.251624111564877e-06, "loss": 0.0172, "step": 61290 }, { "epoch": 0.5176162631145638, "grad_norm": 0.40043696761131287, "learning_rate": 9.25123627526952e-06, "loss": 0.0183, "step": 61300 }, { "epoch": 0.5177007029617277, "grad_norm": 0.5020068287849426, "learning_rate": 9.250848346638184e-06, "loss": 0.0102, "step": 61310 }, { "epoch": 0.5177851428088915, "grad_norm": 0.4663543403148651, "learning_rate": 9.250460325679294e-06, "loss": 0.0143, "step": 61320 }, { "epoch": 0.5178695826560554, "grad_norm": 0.8827739953994751, "learning_rate": 9.250072212401275e-06, "loss": 0.0133, "step": 61330 }, { "epoch": 0.5179540225032193, "grad_norm": 0.335068017244339, "learning_rate": 9.249684006812557e-06, "loss": 0.0153, "step": 61340 }, { "epoch": 0.5180384623503832, "grad_norm": 0.457925945520401, "learning_rate": 9.249295708921577e-06, "loss": 0.0212, "step": 61350 }, { "epoch": 0.518122902197547, "grad_norm": 0.5829018950462341, "learning_rate": 9.248907318736761e-06, "loss": 0.0188, "step": 61360 }, { "epoch": 0.5182073420447109, "grad_norm": 0.5913971066474915, "learning_rate": 9.248518836266553e-06, "loss": 0.027, "step": 61370 }, { "epoch": 0.5182917818918747, "grad_norm": 0.33624497056007385, "learning_rate": 9.248130261519384e-06, "loss": 0.0151, "step": 61380 }, { "epoch": 0.5183762217390386, "grad_norm": 0.4850338399410248, "learning_rate": 9.247741594503696e-06, "loss": 0.0128, "step": 61390 }, { "epoch": 0.5184606615862025, "grad_norm": 0.45096102356910706, "learning_rate": 9.247352835227931e-06, "loss": 0.0102, "step": 61400 }, { "epoch": 0.5185451014333664, "grad_norm": 0.6476851105690002, "learning_rate": 9.246963983700534e-06, "loss": 0.0222, "step": 61410 }, { "epoch": 0.5186295412805303, "grad_norm": 1.1504019498825073, "learning_rate": 9.246575039929948e-06, "loss": 0.0166, "step": 61420 }, { "epoch": 0.5187139811276942, "grad_norm": 0.8825271725654602, "learning_rate": 9.246186003924624e-06, "loss": 0.0174, "step": 61430 }, { "epoch": 0.5187984209748581, "grad_norm": 0.25620153546333313, "learning_rate": 9.24579687569301e-06, "loss": 0.013, "step": 61440 }, { "epoch": 0.518882860822022, "grad_norm": 0.8035848736763, "learning_rate": 9.245407655243557e-06, "loss": 0.0391, "step": 61450 }, { "epoch": 0.5189673006691858, "grad_norm": 0.6481781601905823, "learning_rate": 9.24501834258472e-06, "loss": 0.0123, "step": 61460 }, { "epoch": 0.5190517405163496, "grad_norm": 0.0759025439620018, "learning_rate": 9.244628937724952e-06, "loss": 0.0101, "step": 61470 }, { "epoch": 0.5191361803635135, "grad_norm": 0.5251703262329102, "learning_rate": 9.244239440672718e-06, "loss": 0.0088, "step": 61480 }, { "epoch": 0.5192206202106774, "grad_norm": 0.5074587464332581, "learning_rate": 9.24384985143647e-06, "loss": 0.007, "step": 61490 }, { "epoch": 0.5193050600578413, "grad_norm": 0.48580342531204224, "learning_rate": 9.243460170024674e-06, "loss": 0.0159, "step": 61500 }, { "epoch": 0.5193894999050052, "grad_norm": 0.9425806999206543, "learning_rate": 9.243070396445793e-06, "loss": 0.0215, "step": 61510 }, { "epoch": 0.519473939752169, "grad_norm": 0.4614119231700897, "learning_rate": 9.24268053070829e-06, "loss": 0.0167, "step": 61520 }, { "epoch": 0.5195583795993329, "grad_norm": 0.7143502235412598, "learning_rate": 9.242290572820637e-06, "loss": 0.0185, "step": 61530 }, { "epoch": 0.5196428194464968, "grad_norm": 0.3537040650844574, "learning_rate": 9.241900522791302e-06, "loss": 0.0165, "step": 61540 }, { "epoch": 0.5197272592936607, "grad_norm": 0.8146287202835083, "learning_rate": 9.241510380628758e-06, "loss": 0.0156, "step": 61550 }, { "epoch": 0.5198116991408246, "grad_norm": 0.4237121641635895, "learning_rate": 9.241120146341477e-06, "loss": 0.0091, "step": 61560 }, { "epoch": 0.5198961389879885, "grad_norm": 0.2997176945209503, "learning_rate": 9.240729819937934e-06, "loss": 0.0177, "step": 61570 }, { "epoch": 0.5199805788351524, "grad_norm": 0.2896443009376526, "learning_rate": 9.24033940142661e-06, "loss": 0.0126, "step": 61580 }, { "epoch": 0.5200650186823161, "grad_norm": 0.3106786012649536, "learning_rate": 9.239948890815984e-06, "loss": 0.0176, "step": 61590 }, { "epoch": 0.52014945852948, "grad_norm": 0.6314466595649719, "learning_rate": 9.239558288114534e-06, "loss": 0.0172, "step": 61600 }, { "epoch": 0.5202338983766439, "grad_norm": 0.4205467700958252, "learning_rate": 9.23916759333075e-06, "loss": 0.0154, "step": 61610 }, { "epoch": 0.5203183382238078, "grad_norm": 0.07937122881412506, "learning_rate": 9.238776806473113e-06, "loss": 0.0127, "step": 61620 }, { "epoch": 0.5204027780709717, "grad_norm": 0.43989425897598267, "learning_rate": 9.238385927550112e-06, "loss": 0.0154, "step": 61630 }, { "epoch": 0.5204872179181356, "grad_norm": 0.38911595940589905, "learning_rate": 9.237994956570237e-06, "loss": 0.0108, "step": 61640 }, { "epoch": 0.5205716577652995, "grad_norm": 0.3050748109817505, "learning_rate": 9.237603893541979e-06, "loss": 0.0188, "step": 61650 }, { "epoch": 0.5206560976124633, "grad_norm": 0.281368225812912, "learning_rate": 9.237212738473835e-06, "loss": 0.0107, "step": 61660 }, { "epoch": 0.5207405374596272, "grad_norm": 0.760657787322998, "learning_rate": 9.236821491374299e-06, "loss": 0.0126, "step": 61670 }, { "epoch": 0.5208249773067911, "grad_norm": 0.8471970558166504, "learning_rate": 9.236430152251867e-06, "loss": 0.0119, "step": 61680 }, { "epoch": 0.520909417153955, "grad_norm": 0.5483145117759705, "learning_rate": 9.23603872111504e-06, "loss": 0.0101, "step": 61690 }, { "epoch": 0.5209938570011188, "grad_norm": 0.2862258553504944, "learning_rate": 9.235647197972322e-06, "loss": 0.0162, "step": 61700 }, { "epoch": 0.5210782968482827, "grad_norm": 0.5363470911979675, "learning_rate": 9.235255582832212e-06, "loss": 0.0071, "step": 61710 }, { "epoch": 0.5211627366954465, "grad_norm": 0.4602021872997284, "learning_rate": 9.234863875703221e-06, "loss": 0.0198, "step": 61720 }, { "epoch": 0.5212471765426104, "grad_norm": 0.004929547663778067, "learning_rate": 9.234472076593852e-06, "loss": 0.0178, "step": 61730 }, { "epoch": 0.5213316163897743, "grad_norm": 0.43733665347099304, "learning_rate": 9.23408018551262e-06, "loss": 0.0272, "step": 61740 }, { "epoch": 0.5214160562369382, "grad_norm": 0.18252982199192047, "learning_rate": 9.233688202468032e-06, "loss": 0.0119, "step": 61750 }, { "epoch": 0.5215004960841021, "grad_norm": 0.7984058260917664, "learning_rate": 9.233296127468605e-06, "loss": 0.0158, "step": 61760 }, { "epoch": 0.521584935931266, "grad_norm": 0.7884252071380615, "learning_rate": 9.232903960522852e-06, "loss": 0.0131, "step": 61770 }, { "epoch": 0.5216693757784299, "grad_norm": 0.04103632643818855, "learning_rate": 9.232511701639295e-06, "loss": 0.0203, "step": 61780 }, { "epoch": 0.5217538156255938, "grad_norm": 0.46861666440963745, "learning_rate": 9.23211935082645e-06, "loss": 0.0102, "step": 61790 }, { "epoch": 0.5218382554727576, "grad_norm": 0.2807346284389496, "learning_rate": 9.231726908092838e-06, "loss": 0.0189, "step": 61800 }, { "epoch": 0.5219226953199215, "grad_norm": 0.20224517583847046, "learning_rate": 9.231334373446988e-06, "loss": 0.0122, "step": 61810 }, { "epoch": 0.5220071351670853, "grad_norm": 0.6627886891365051, "learning_rate": 9.230941746897419e-06, "loss": 0.0152, "step": 61820 }, { "epoch": 0.5220915750142492, "grad_norm": 0.7995612025260925, "learning_rate": 9.230549028452665e-06, "loss": 0.0243, "step": 61830 }, { "epoch": 0.5221760148614131, "grad_norm": 0.5789650082588196, "learning_rate": 9.230156218121251e-06, "loss": 0.0097, "step": 61840 }, { "epoch": 0.522260454708577, "grad_norm": 0.3113830089569092, "learning_rate": 9.229763315911713e-06, "loss": 0.0103, "step": 61850 }, { "epoch": 0.5223448945557408, "grad_norm": 0.5245594382286072, "learning_rate": 9.22937032183258e-06, "loss": 0.0121, "step": 61860 }, { "epoch": 0.5224293344029047, "grad_norm": 0.7369492650032043, "learning_rate": 9.228977235892392e-06, "loss": 0.0114, "step": 61870 }, { "epoch": 0.5225137742500686, "grad_norm": 0.6007855534553528, "learning_rate": 9.228584058099685e-06, "loss": 0.0123, "step": 61880 }, { "epoch": 0.5225982140972325, "grad_norm": 0.3015211820602417, "learning_rate": 9.228190788463e-06, "loss": 0.0182, "step": 61890 }, { "epoch": 0.5226826539443964, "grad_norm": 0.46432292461395264, "learning_rate": 9.227797426990874e-06, "loss": 0.0169, "step": 61900 }, { "epoch": 0.5227670937915603, "grad_norm": 0.25481265783309937, "learning_rate": 9.227403973691856e-06, "loss": 0.0197, "step": 61910 }, { "epoch": 0.5228515336387242, "grad_norm": 0.4907456934452057, "learning_rate": 9.22701042857449e-06, "loss": 0.0109, "step": 61920 }, { "epoch": 0.5229359734858879, "grad_norm": 0.279239296913147, "learning_rate": 9.226616791647324e-06, "loss": 0.023, "step": 61930 }, { "epoch": 0.5230204133330518, "grad_norm": 0.13210529088974, "learning_rate": 9.22622306291891e-06, "loss": 0.0143, "step": 61940 }, { "epoch": 0.5231048531802157, "grad_norm": 0.40404215455055237, "learning_rate": 9.225829242397792e-06, "loss": 0.0283, "step": 61950 }, { "epoch": 0.5231892930273796, "grad_norm": 0.4499689042568207, "learning_rate": 9.225435330092531e-06, "loss": 0.0192, "step": 61960 }, { "epoch": 0.5232737328745435, "grad_norm": 0.3650038242340088, "learning_rate": 9.225041326011681e-06, "loss": 0.0105, "step": 61970 }, { "epoch": 0.5233581727217074, "grad_norm": 0.6569957137107849, "learning_rate": 9.224647230163799e-06, "loss": 0.0096, "step": 61980 }, { "epoch": 0.5234426125688713, "grad_norm": 0.376544713973999, "learning_rate": 9.224253042557445e-06, "loss": 0.0123, "step": 61990 }, { "epoch": 0.5235270524160351, "grad_norm": 0.48367440700531006, "learning_rate": 9.223858763201182e-06, "loss": 0.0213, "step": 62000 }, { "epoch": 0.523611492263199, "grad_norm": 0.106033094227314, "learning_rate": 9.22346439210357e-06, "loss": 0.0121, "step": 62010 }, { "epoch": 0.5236959321103629, "grad_norm": 0.2668883204460144, "learning_rate": 9.223069929273177e-06, "loss": 0.0146, "step": 62020 }, { "epoch": 0.5237803719575268, "grad_norm": 0.5671143531799316, "learning_rate": 9.22267537471857e-06, "loss": 0.0085, "step": 62030 }, { "epoch": 0.5238648118046907, "grad_norm": 0.4056245982646942, "learning_rate": 9.222280728448321e-06, "loss": 0.0207, "step": 62040 }, { "epoch": 0.5239492516518545, "grad_norm": 0.6565013527870178, "learning_rate": 9.221885990470999e-06, "loss": 0.0124, "step": 62050 }, { "epoch": 0.5240336914990184, "grad_norm": 0.3055628538131714, "learning_rate": 9.221491160795177e-06, "loss": 0.0094, "step": 62060 }, { "epoch": 0.5241181313461822, "grad_norm": 0.6623561382293701, "learning_rate": 9.221096239429434e-06, "loss": 0.0152, "step": 62070 }, { "epoch": 0.5242025711933461, "grad_norm": 0.5159276723861694, "learning_rate": 9.220701226382344e-06, "loss": 0.0117, "step": 62080 }, { "epoch": 0.52428701104051, "grad_norm": 0.28388261795043945, "learning_rate": 9.22030612166249e-06, "loss": 0.0097, "step": 62090 }, { "epoch": 0.5243714508876739, "grad_norm": 0.3137972354888916, "learning_rate": 9.219910925278453e-06, "loss": 0.012, "step": 62100 }, { "epoch": 0.5244558907348378, "grad_norm": 0.279813289642334, "learning_rate": 9.219515637238814e-06, "loss": 0.0145, "step": 62110 }, { "epoch": 0.5245403305820017, "grad_norm": 0.15974806249141693, "learning_rate": 9.21912025755216e-06, "loss": 0.024, "step": 62120 }, { "epoch": 0.5246247704291656, "grad_norm": 0.5036340355873108, "learning_rate": 9.218724786227075e-06, "loss": 0.0147, "step": 62130 }, { "epoch": 0.5247092102763294, "grad_norm": 0.41229501366615295, "learning_rate": 9.218329223272158e-06, "loss": 0.0098, "step": 62140 }, { "epoch": 0.5247936501234933, "grad_norm": 0.6424546241760254, "learning_rate": 9.217933568695992e-06, "loss": 0.0084, "step": 62150 }, { "epoch": 0.5248780899706571, "grad_norm": 0.34786903858184814, "learning_rate": 9.217537822507172e-06, "loss": 0.0085, "step": 62160 }, { "epoch": 0.524962529817821, "grad_norm": 0.17845842242240906, "learning_rate": 9.217141984714296e-06, "loss": 0.026, "step": 62170 }, { "epoch": 0.5250469696649849, "grad_norm": 0.32982638478279114, "learning_rate": 9.21674605532596e-06, "loss": 0.0097, "step": 62180 }, { "epoch": 0.5251314095121488, "grad_norm": 0.6363471746444702, "learning_rate": 9.216350034350766e-06, "loss": 0.0104, "step": 62190 }, { "epoch": 0.5252158493593126, "grad_norm": 0.5592069029808044, "learning_rate": 9.21595392179731e-06, "loss": 0.02, "step": 62200 }, { "epoch": 0.5253002892064765, "grad_norm": 0.43044623732566833, "learning_rate": 9.2155577176742e-06, "loss": 0.005, "step": 62210 }, { "epoch": 0.5253847290536404, "grad_norm": 0.5973203182220459, "learning_rate": 9.21516142199004e-06, "loss": 0.0182, "step": 62220 }, { "epoch": 0.5254691689008043, "grad_norm": 0.384965181350708, "learning_rate": 9.21476503475344e-06, "loss": 0.0255, "step": 62230 }, { "epoch": 0.5255536087479682, "grad_norm": 0.2090170532464981, "learning_rate": 9.214368555973003e-06, "loss": 0.0088, "step": 62240 }, { "epoch": 0.5256380485951321, "grad_norm": 0.6182520389556885, "learning_rate": 9.213971985657347e-06, "loss": 0.0179, "step": 62250 }, { "epoch": 0.525722488442296, "grad_norm": 0.28724393248558044, "learning_rate": 9.213575323815083e-06, "loss": 0.0109, "step": 62260 }, { "epoch": 0.5258069282894597, "grad_norm": 0.25459909439086914, "learning_rate": 9.213178570454826e-06, "loss": 0.0078, "step": 62270 }, { "epoch": 0.5258913681366236, "grad_norm": 0.30721810460090637, "learning_rate": 9.212781725585195e-06, "loss": 0.0138, "step": 62280 }, { "epoch": 0.5259758079837875, "grad_norm": 0.3701708912849426, "learning_rate": 9.212384789214807e-06, "loss": 0.008, "step": 62290 }, { "epoch": 0.5260602478309514, "grad_norm": 0.020624715834856033, "learning_rate": 9.211987761352284e-06, "loss": 0.0123, "step": 62300 }, { "epoch": 0.5261446876781153, "grad_norm": 0.36843442916870117, "learning_rate": 9.21159064200625e-06, "loss": 0.0147, "step": 62310 }, { "epoch": 0.5262291275252792, "grad_norm": 0.19552695751190186, "learning_rate": 9.211193431185332e-06, "loss": 0.0096, "step": 62320 }, { "epoch": 0.5263135673724431, "grad_norm": 0.2369818240404129, "learning_rate": 9.210796128898154e-06, "loss": 0.0105, "step": 62330 }, { "epoch": 0.526398007219607, "grad_norm": 0.07671436667442322, "learning_rate": 9.210398735153348e-06, "loss": 0.0061, "step": 62340 }, { "epoch": 0.5264824470667708, "grad_norm": 0.9730133414268494, "learning_rate": 9.210001249959544e-06, "loss": 0.0187, "step": 62350 }, { "epoch": 0.5265668869139347, "grad_norm": 0.24952250719070435, "learning_rate": 9.209603673325374e-06, "loss": 0.0193, "step": 62360 }, { "epoch": 0.5266513267610986, "grad_norm": 0.5674123764038086, "learning_rate": 9.209206005259476e-06, "loss": 0.0193, "step": 62370 }, { "epoch": 0.5267357666082625, "grad_norm": 0.24982747435569763, "learning_rate": 9.208808245770488e-06, "loss": 0.0147, "step": 62380 }, { "epoch": 0.5268202064554263, "grad_norm": 0.9713905453681946, "learning_rate": 9.208410394867045e-06, "loss": 0.0175, "step": 62390 }, { "epoch": 0.5269046463025902, "grad_norm": 0.277195006608963, "learning_rate": 9.208012452557791e-06, "loss": 0.0116, "step": 62400 }, { "epoch": 0.526989086149754, "grad_norm": 0.46329301595687866, "learning_rate": 9.20761441885137e-06, "loss": 0.0132, "step": 62410 }, { "epoch": 0.5270735259969179, "grad_norm": 0.18667195737361908, "learning_rate": 9.207216293756424e-06, "loss": 0.0188, "step": 62420 }, { "epoch": 0.5271579658440818, "grad_norm": 0.7932926416397095, "learning_rate": 9.206818077281604e-06, "loss": 0.0171, "step": 62430 }, { "epoch": 0.5272424056912457, "grad_norm": 0.3319900333881378, "learning_rate": 9.206419769435557e-06, "loss": 0.0125, "step": 62440 }, { "epoch": 0.5273268455384096, "grad_norm": 0.10835644602775574, "learning_rate": 9.206021370226934e-06, "loss": 0.02, "step": 62450 }, { "epoch": 0.5274112853855735, "grad_norm": 0.564069926738739, "learning_rate": 9.205622879664389e-06, "loss": 0.0114, "step": 62460 }, { "epoch": 0.5274957252327374, "grad_norm": 0.3486805856227875, "learning_rate": 9.205224297756577e-06, "loss": 0.018, "step": 62470 }, { "epoch": 0.5275801650799012, "grad_norm": 0.26449477672576904, "learning_rate": 9.204825624512154e-06, "loss": 0.0124, "step": 62480 }, { "epoch": 0.5276646049270651, "grad_norm": 0.08112272620201111, "learning_rate": 9.204426859939784e-06, "loss": 0.0053, "step": 62490 }, { "epoch": 0.5277490447742289, "grad_norm": 0.2518497407436371, "learning_rate": 9.20402800404812e-06, "loss": 0.0081, "step": 62500 }, { "epoch": 0.5278334846213928, "grad_norm": 0.6107924580574036, "learning_rate": 9.203629056845832e-06, "loss": 0.0139, "step": 62510 }, { "epoch": 0.5279179244685567, "grad_norm": 0.15590879321098328, "learning_rate": 9.20323001834158e-06, "loss": 0.0079, "step": 62520 }, { "epoch": 0.5280023643157206, "grad_norm": 0.36744847893714905, "learning_rate": 9.202830888544034e-06, "loss": 0.0112, "step": 62530 }, { "epoch": 0.5280868041628844, "grad_norm": 0.7721605896949768, "learning_rate": 9.202431667461862e-06, "loss": 0.0188, "step": 62540 }, { "epoch": 0.5281712440100483, "grad_norm": 0.49539676308631897, "learning_rate": 9.202032355103737e-06, "loss": 0.0095, "step": 62550 }, { "epoch": 0.5282556838572122, "grad_norm": 0.0396583117544651, "learning_rate": 9.201632951478328e-06, "loss": 0.0155, "step": 62560 }, { "epoch": 0.5283401237043761, "grad_norm": 0.34456074237823486, "learning_rate": 9.201233456594314e-06, "loss": 0.0086, "step": 62570 }, { "epoch": 0.52842456355154, "grad_norm": 0.7958987951278687, "learning_rate": 9.200833870460368e-06, "loss": 0.0178, "step": 62580 }, { "epoch": 0.5285090033987039, "grad_norm": 0.2611936628818512, "learning_rate": 9.200434193085174e-06, "loss": 0.0116, "step": 62590 }, { "epoch": 0.5285934432458678, "grad_norm": 0.3589519262313843, "learning_rate": 9.200034424477408e-06, "loss": 0.0117, "step": 62600 }, { "epoch": 0.5286778830930317, "grad_norm": 0.3031468987464905, "learning_rate": 9.199634564645756e-06, "loss": 0.0128, "step": 62610 }, { "epoch": 0.5287623229401954, "grad_norm": 0.3787369132041931, "learning_rate": 9.199234613598901e-06, "loss": 0.0141, "step": 62620 }, { "epoch": 0.5288467627873593, "grad_norm": 0.24348184466362, "learning_rate": 9.198834571345533e-06, "loss": 0.0083, "step": 62630 }, { "epoch": 0.5289312026345232, "grad_norm": 1.031035304069519, "learning_rate": 9.198434437894337e-06, "loss": 0.0107, "step": 62640 }, { "epoch": 0.5290156424816871, "grad_norm": 0.2525322437286377, "learning_rate": 9.198034213254006e-06, "loss": 0.0138, "step": 62650 }, { "epoch": 0.529100082328851, "grad_norm": 0.09801458567380905, "learning_rate": 9.19763389743323e-06, "loss": 0.0143, "step": 62660 }, { "epoch": 0.5291845221760149, "grad_norm": 0.9284816384315491, "learning_rate": 9.197233490440709e-06, "loss": 0.0142, "step": 62670 }, { "epoch": 0.5292689620231787, "grad_norm": 0.336220383644104, "learning_rate": 9.196832992285134e-06, "loss": 0.0143, "step": 62680 }, { "epoch": 0.5293534018703426, "grad_norm": 0.18405237793922424, "learning_rate": 9.196432402975208e-06, "loss": 0.0124, "step": 62690 }, { "epoch": 0.5294378417175065, "grad_norm": 0.12290380150079727, "learning_rate": 9.196031722519629e-06, "loss": 0.0165, "step": 62700 }, { "epoch": 0.5295222815646704, "grad_norm": 1.1886401176452637, "learning_rate": 9.1956309509271e-06, "loss": 0.0094, "step": 62710 }, { "epoch": 0.5296067214118343, "grad_norm": 0.35584497451782227, "learning_rate": 9.195230088206328e-06, "loss": 0.0132, "step": 62720 }, { "epoch": 0.5296911612589981, "grad_norm": 0.45348498225212097, "learning_rate": 9.194829134366019e-06, "loss": 0.0121, "step": 62730 }, { "epoch": 0.529775601106162, "grad_norm": 0.5715652108192444, "learning_rate": 9.19442808941488e-06, "loss": 0.0152, "step": 62740 }, { "epoch": 0.5298600409533258, "grad_norm": 0.053554944694042206, "learning_rate": 9.194026953361623e-06, "loss": 0.0128, "step": 62750 }, { "epoch": 0.5299444808004897, "grad_norm": 0.32080572843551636, "learning_rate": 9.193625726214959e-06, "loss": 0.0169, "step": 62760 }, { "epoch": 0.5300289206476536, "grad_norm": 0.2395712286233902, "learning_rate": 9.193224407983605e-06, "loss": 0.0145, "step": 62770 }, { "epoch": 0.5301133604948175, "grad_norm": 1.057504653930664, "learning_rate": 9.192822998676275e-06, "loss": 0.02, "step": 62780 }, { "epoch": 0.5301978003419814, "grad_norm": 0.25639814138412476, "learning_rate": 9.192421498301688e-06, "loss": 0.0085, "step": 62790 }, { "epoch": 0.5302822401891453, "grad_norm": 0.6796271204948425, "learning_rate": 9.192019906868566e-06, "loss": 0.0114, "step": 62800 }, { "epoch": 0.5303666800363092, "grad_norm": 0.20268602669239044, "learning_rate": 9.19161822438563e-06, "loss": 0.0115, "step": 62810 }, { "epoch": 0.530451119883473, "grad_norm": 0.46463948488235474, "learning_rate": 9.191216450861606e-06, "loss": 0.0078, "step": 62820 }, { "epoch": 0.5305355597306369, "grad_norm": 0.22393181920051575, "learning_rate": 9.190814586305219e-06, "loss": 0.0083, "step": 62830 }, { "epoch": 0.5306199995778008, "grad_norm": 0.6482208371162415, "learning_rate": 9.190412630725199e-06, "loss": 0.0114, "step": 62840 }, { "epoch": 0.5307044394249646, "grad_norm": 0.45778390765190125, "learning_rate": 9.190010584130274e-06, "loss": 0.0141, "step": 62850 }, { "epoch": 0.5307888792721285, "grad_norm": 1.0029937028884888, "learning_rate": 9.189608446529177e-06, "loss": 0.0205, "step": 62860 }, { "epoch": 0.5308733191192924, "grad_norm": 0.3431202173233032, "learning_rate": 9.189206217930645e-06, "loss": 0.0106, "step": 62870 }, { "epoch": 0.5309577589664563, "grad_norm": 0.7671012878417969, "learning_rate": 9.188803898343412e-06, "loss": 0.0155, "step": 62880 }, { "epoch": 0.5310421988136201, "grad_norm": 0.3368587791919708, "learning_rate": 9.188401487776216e-06, "loss": 0.0081, "step": 62890 }, { "epoch": 0.531126638660784, "grad_norm": 0.3620210587978363, "learning_rate": 9.187998986237798e-06, "loss": 0.0127, "step": 62900 }, { "epoch": 0.5312110785079479, "grad_norm": 0.17461974918842316, "learning_rate": 9.187596393736902e-06, "loss": 0.0229, "step": 62910 }, { "epoch": 0.5312955183551118, "grad_norm": 0.5134785175323486, "learning_rate": 9.18719371028227e-06, "loss": 0.0152, "step": 62920 }, { "epoch": 0.5313799582022757, "grad_norm": 0.5233854055404663, "learning_rate": 9.186790935882645e-06, "loss": 0.0143, "step": 62930 }, { "epoch": 0.5314643980494396, "grad_norm": 0.3546001613140106, "learning_rate": 9.186388070546783e-06, "loss": 0.0158, "step": 62940 }, { "epoch": 0.5315488378966035, "grad_norm": 0.5183781981468201, "learning_rate": 9.185985114283429e-06, "loss": 0.0109, "step": 62950 }, { "epoch": 0.5316332777437672, "grad_norm": 0.7463162541389465, "learning_rate": 9.185582067101335e-06, "loss": 0.0195, "step": 62960 }, { "epoch": 0.5317177175909311, "grad_norm": 0.0727701187133789, "learning_rate": 9.185178929009256e-06, "loss": 0.0141, "step": 62970 }, { "epoch": 0.531802157438095, "grad_norm": 0.3478338420391083, "learning_rate": 9.18477570001595e-06, "loss": 0.0131, "step": 62980 }, { "epoch": 0.5318865972852589, "grad_norm": 0.13209013640880585, "learning_rate": 9.184372380130174e-06, "loss": 0.0063, "step": 62990 }, { "epoch": 0.5319710371324228, "grad_norm": 0.23086844384670258, "learning_rate": 9.183968969360684e-06, "loss": 0.0116, "step": 63000 }, { "epoch": 0.5320554769795867, "grad_norm": 0.14685672521591187, "learning_rate": 9.183565467716247e-06, "loss": 0.0197, "step": 63010 }, { "epoch": 0.5321399168267505, "grad_norm": 0.22116327285766602, "learning_rate": 9.183161875205626e-06, "loss": 0.0063, "step": 63020 }, { "epoch": 0.5322243566739144, "grad_norm": 0.4343920350074768, "learning_rate": 9.182758191837587e-06, "loss": 0.0129, "step": 63030 }, { "epoch": 0.5323087965210783, "grad_norm": 0.3500632345676422, "learning_rate": 9.182354417620895e-06, "loss": 0.0113, "step": 63040 }, { "epoch": 0.5323932363682422, "grad_norm": 0.1126280203461647, "learning_rate": 9.181950552564323e-06, "loss": 0.0116, "step": 63050 }, { "epoch": 0.5324776762154061, "grad_norm": 0.8917087316513062, "learning_rate": 9.181546596676643e-06, "loss": 0.0248, "step": 63060 }, { "epoch": 0.53256211606257, "grad_norm": 0.3073609173297882, "learning_rate": 9.181142549966627e-06, "loss": 0.0132, "step": 63070 }, { "epoch": 0.5326465559097338, "grad_norm": 0.7183436751365662, "learning_rate": 9.180738412443052e-06, "loss": 0.0183, "step": 63080 }, { "epoch": 0.5327309957568976, "grad_norm": 0.6247175931930542, "learning_rate": 9.180334184114694e-06, "loss": 0.0119, "step": 63090 }, { "epoch": 0.5328154356040615, "grad_norm": 0.6142951846122742, "learning_rate": 9.179929864990335e-06, "loss": 0.0121, "step": 63100 }, { "epoch": 0.5328998754512254, "grad_norm": 0.6430513262748718, "learning_rate": 9.179525455078755e-06, "loss": 0.0154, "step": 63110 }, { "epoch": 0.5329843152983893, "grad_norm": 0.40581634640693665, "learning_rate": 9.17912095438874e-06, "loss": 0.0192, "step": 63120 }, { "epoch": 0.5330687551455532, "grad_norm": 0.28646528720855713, "learning_rate": 9.178716362929074e-06, "loss": 0.0148, "step": 63130 }, { "epoch": 0.5331531949927171, "grad_norm": 0.26162946224212646, "learning_rate": 9.178311680708543e-06, "loss": 0.0055, "step": 63140 }, { "epoch": 0.533237634839881, "grad_norm": 0.5778174996376038, "learning_rate": 9.17790690773594e-06, "loss": 0.0099, "step": 63150 }, { "epoch": 0.5333220746870448, "grad_norm": 0.3412994146347046, "learning_rate": 9.177502044020057e-06, "loss": 0.0136, "step": 63160 }, { "epoch": 0.5334065145342087, "grad_norm": 0.5272892117500305, "learning_rate": 9.177097089569683e-06, "loss": 0.0212, "step": 63170 }, { "epoch": 0.5334909543813726, "grad_norm": 0.40078142285346985, "learning_rate": 9.176692044393618e-06, "loss": 0.0108, "step": 63180 }, { "epoch": 0.5335753942285364, "grad_norm": 0.24570806324481964, "learning_rate": 9.176286908500655e-06, "loss": 0.009, "step": 63190 }, { "epoch": 0.5336598340757003, "grad_norm": 0.6520861983299255, "learning_rate": 9.1758816818996e-06, "loss": 0.0135, "step": 63200 }, { "epoch": 0.5337442739228642, "grad_norm": 0.500170111656189, "learning_rate": 9.175476364599247e-06, "loss": 0.0112, "step": 63210 }, { "epoch": 0.533828713770028, "grad_norm": 0.23090705275535583, "learning_rate": 9.175070956608405e-06, "loss": 0.0132, "step": 63220 }, { "epoch": 0.5339131536171919, "grad_norm": 0.2650552988052368, "learning_rate": 9.174665457935876e-06, "loss": 0.0091, "step": 63230 }, { "epoch": 0.5339975934643558, "grad_norm": 0.7109845280647278, "learning_rate": 9.17425986859047e-06, "loss": 0.0096, "step": 63240 }, { "epoch": 0.5340820333115197, "grad_norm": 0.4824064075946808, "learning_rate": 9.173854188580996e-06, "loss": 0.0184, "step": 63250 }, { "epoch": 0.5341664731586836, "grad_norm": 0.24939770996570587, "learning_rate": 9.173448417916262e-06, "loss": 0.012, "step": 63260 }, { "epoch": 0.5342509130058475, "grad_norm": 0.4904167652130127, "learning_rate": 9.173042556605085e-06, "loss": 0.0153, "step": 63270 }, { "epoch": 0.5343353528530114, "grad_norm": 0.22453880310058594, "learning_rate": 9.172636604656278e-06, "loss": 0.0099, "step": 63280 }, { "epoch": 0.5344197927001753, "grad_norm": 0.4837367832660675, "learning_rate": 9.172230562078659e-06, "loss": 0.0153, "step": 63290 }, { "epoch": 0.5345042325473391, "grad_norm": 0.5681564807891846, "learning_rate": 9.171824428881048e-06, "loss": 0.0159, "step": 63300 }, { "epoch": 0.5345886723945029, "grad_norm": 0.47007620334625244, "learning_rate": 9.171418205072264e-06, "loss": 0.018, "step": 63310 }, { "epoch": 0.5346731122416668, "grad_norm": 0.2819159924983978, "learning_rate": 9.171011890661132e-06, "loss": 0.0119, "step": 63320 }, { "epoch": 0.5347575520888307, "grad_norm": 0.17421799898147583, "learning_rate": 9.170605485656478e-06, "loss": 0.0134, "step": 63330 }, { "epoch": 0.5348419919359946, "grad_norm": 0.10217440128326416, "learning_rate": 9.170198990067125e-06, "loss": 0.0169, "step": 63340 }, { "epoch": 0.5349264317831585, "grad_norm": 0.6088838577270508, "learning_rate": 9.169792403901907e-06, "loss": 0.0229, "step": 63350 }, { "epoch": 0.5350108716303223, "grad_norm": 0.1705193966627121, "learning_rate": 9.16938572716965e-06, "loss": 0.0099, "step": 63360 }, { "epoch": 0.5350953114774862, "grad_norm": 0.621980607509613, "learning_rate": 9.16897895987919e-06, "loss": 0.009, "step": 63370 }, { "epoch": 0.5351797513246501, "grad_norm": 0.778219997882843, "learning_rate": 9.168572102039363e-06, "loss": 0.0096, "step": 63380 }, { "epoch": 0.535264191171814, "grad_norm": 0.22587834298610687, "learning_rate": 9.168165153659002e-06, "loss": 0.0105, "step": 63390 }, { "epoch": 0.5353486310189779, "grad_norm": 0.35089319944381714, "learning_rate": 9.16775811474695e-06, "loss": 0.0101, "step": 63400 }, { "epoch": 0.5354330708661418, "grad_norm": 0.4209383726119995, "learning_rate": 9.167350985312044e-06, "loss": 0.0132, "step": 63410 }, { "epoch": 0.5355175107133056, "grad_norm": 0.2614501714706421, "learning_rate": 9.16694376536313e-06, "loss": 0.0097, "step": 63420 }, { "epoch": 0.5356019505604694, "grad_norm": 0.38906988501548767, "learning_rate": 9.16653645490905e-06, "loss": 0.0184, "step": 63430 }, { "epoch": 0.5356863904076333, "grad_norm": 0.28254634141921997, "learning_rate": 9.166129053958653e-06, "loss": 0.0115, "step": 63440 }, { "epoch": 0.5357708302547972, "grad_norm": 0.733725905418396, "learning_rate": 9.165721562520786e-06, "loss": 0.0149, "step": 63450 }, { "epoch": 0.5358552701019611, "grad_norm": 0.14584475755691528, "learning_rate": 9.165313980604301e-06, "loss": 0.0194, "step": 63460 }, { "epoch": 0.535939709949125, "grad_norm": 0.3425444960594177, "learning_rate": 9.16490630821805e-06, "loss": 0.0122, "step": 63470 }, { "epoch": 0.5360241497962889, "grad_norm": 0.15459898114204407, "learning_rate": 9.164498545370885e-06, "loss": 0.0194, "step": 63480 }, { "epoch": 0.5361085896434528, "grad_norm": 0.38062942028045654, "learning_rate": 9.164090692071668e-06, "loss": 0.0202, "step": 63490 }, { "epoch": 0.5361930294906166, "grad_norm": 0.15639537572860718, "learning_rate": 9.163682748329254e-06, "loss": 0.0217, "step": 63500 }, { "epoch": 0.5362774693377805, "grad_norm": 0.7461597919464111, "learning_rate": 9.163274714152504e-06, "loss": 0.0159, "step": 63510 }, { "epoch": 0.5363619091849444, "grad_norm": 0.20097151398658752, "learning_rate": 9.162866589550278e-06, "loss": 0.0167, "step": 63520 }, { "epoch": 0.5364463490321083, "grad_norm": 0.586171567440033, "learning_rate": 9.162458374531446e-06, "loss": 0.0141, "step": 63530 }, { "epoch": 0.5365307888792721, "grad_norm": 0.38213443756103516, "learning_rate": 9.16205006910487e-06, "loss": 0.009, "step": 63540 }, { "epoch": 0.536615228726436, "grad_norm": 0.20474755764007568, "learning_rate": 9.161641673279418e-06, "loss": 0.0102, "step": 63550 }, { "epoch": 0.5366996685735999, "grad_norm": 0.40720421075820923, "learning_rate": 9.161233187063962e-06, "loss": 0.023, "step": 63560 }, { "epoch": 0.5367841084207637, "grad_norm": 0.3083738088607788, "learning_rate": 9.160824610467375e-06, "loss": 0.0155, "step": 63570 }, { "epoch": 0.5368685482679276, "grad_norm": 0.6003158688545227, "learning_rate": 9.160415943498528e-06, "loss": 0.0282, "step": 63580 }, { "epoch": 0.5369529881150915, "grad_norm": 0.6697856187820435, "learning_rate": 9.160007186166301e-06, "loss": 0.0107, "step": 63590 }, { "epoch": 0.5370374279622554, "grad_norm": 0.21003904938697815, "learning_rate": 9.159598338479572e-06, "loss": 0.0128, "step": 63600 }, { "epoch": 0.5371218678094193, "grad_norm": 0.1559416949748993, "learning_rate": 9.159189400447216e-06, "loss": 0.0113, "step": 63610 }, { "epoch": 0.5372063076565832, "grad_norm": 0.09809970110654831, "learning_rate": 9.15878037207812e-06, "loss": 0.0206, "step": 63620 }, { "epoch": 0.5372907475037471, "grad_norm": 0.4392567574977875, "learning_rate": 9.158371253381169e-06, "loss": 0.0159, "step": 63630 }, { "epoch": 0.5373751873509109, "grad_norm": 0.5352887511253357, "learning_rate": 9.157962044365243e-06, "loss": 0.013, "step": 63640 }, { "epoch": 0.5374596271980747, "grad_norm": 0.245040625333786, "learning_rate": 9.157552745039236e-06, "loss": 0.015, "step": 63650 }, { "epoch": 0.5375440670452386, "grad_norm": 0.3983648717403412, "learning_rate": 9.157143355412033e-06, "loss": 0.0135, "step": 63660 }, { "epoch": 0.5376285068924025, "grad_norm": 0.8838504552841187, "learning_rate": 9.15673387549253e-06, "loss": 0.0172, "step": 63670 }, { "epoch": 0.5377129467395664, "grad_norm": 0.33803677558898926, "learning_rate": 9.156324305289617e-06, "loss": 0.0123, "step": 63680 }, { "epoch": 0.5377973865867303, "grad_norm": 0.23755130171775818, "learning_rate": 9.155914644812195e-06, "loss": 0.0165, "step": 63690 }, { "epoch": 0.5378818264338942, "grad_norm": 0.7422756552696228, "learning_rate": 9.155504894069154e-06, "loss": 0.0196, "step": 63700 }, { "epoch": 0.537966266281058, "grad_norm": 0.09591402113437653, "learning_rate": 9.1550950530694e-06, "loss": 0.0112, "step": 63710 }, { "epoch": 0.5380507061282219, "grad_norm": 0.42056822776794434, "learning_rate": 9.154685121821835e-06, "loss": 0.0133, "step": 63720 }, { "epoch": 0.5381351459753858, "grad_norm": 0.49160832166671753, "learning_rate": 9.154275100335359e-06, "loss": 0.014, "step": 63730 }, { "epoch": 0.5382195858225497, "grad_norm": 0.46023866534233093, "learning_rate": 9.153864988618879e-06, "loss": 0.0114, "step": 63740 }, { "epoch": 0.5383040256697136, "grad_norm": 0.29362720251083374, "learning_rate": 9.153454786681303e-06, "loss": 0.0107, "step": 63750 }, { "epoch": 0.5383884655168775, "grad_norm": 0.43624964356422424, "learning_rate": 9.15304449453154e-06, "loss": 0.0166, "step": 63760 }, { "epoch": 0.5384729053640412, "grad_norm": 0.4495662450790405, "learning_rate": 9.152634112178502e-06, "loss": 0.0091, "step": 63770 }, { "epoch": 0.5385573452112051, "grad_norm": 0.43393346667289734, "learning_rate": 9.152223639631102e-06, "loss": 0.0115, "step": 63780 }, { "epoch": 0.538641785058369, "grad_norm": 0.36726927757263184, "learning_rate": 9.151813076898255e-06, "loss": 0.0187, "step": 63790 }, { "epoch": 0.5387262249055329, "grad_norm": 0.38596585392951965, "learning_rate": 9.15140242398888e-06, "loss": 0.0144, "step": 63800 }, { "epoch": 0.5388106647526968, "grad_norm": 0.33262449502944946, "learning_rate": 9.150991680911893e-06, "loss": 0.0102, "step": 63810 }, { "epoch": 0.5388951045998607, "grad_norm": 0.4881182610988617, "learning_rate": 9.150580847676218e-06, "loss": 0.0154, "step": 63820 }, { "epoch": 0.5389795444470246, "grad_norm": 0.480968713760376, "learning_rate": 9.15016992429078e-06, "loss": 0.0111, "step": 63830 }, { "epoch": 0.5390639842941884, "grad_norm": 0.29616832733154297, "learning_rate": 9.149758910764497e-06, "loss": 0.0227, "step": 63840 }, { "epoch": 0.5391484241413523, "grad_norm": 0.127818763256073, "learning_rate": 9.149347807106305e-06, "loss": 0.0187, "step": 63850 }, { "epoch": 0.5392328639885162, "grad_norm": 0.2282707691192627, "learning_rate": 9.148936613325127e-06, "loss": 0.0186, "step": 63860 }, { "epoch": 0.5393173038356801, "grad_norm": 0.5299131274223328, "learning_rate": 9.148525329429896e-06, "loss": 0.0192, "step": 63870 }, { "epoch": 0.5394017436828439, "grad_norm": 0.6007239818572998, "learning_rate": 9.148113955429544e-06, "loss": 0.0115, "step": 63880 }, { "epoch": 0.5394861835300078, "grad_norm": 0.43405696749687195, "learning_rate": 9.14770249133301e-06, "loss": 0.0127, "step": 63890 }, { "epoch": 0.5395706233771717, "grad_norm": 0.21926327049732208, "learning_rate": 9.147290937149225e-06, "loss": 0.0086, "step": 63900 }, { "epoch": 0.5396550632243355, "grad_norm": 0.6457385420799255, "learning_rate": 9.146879292887132e-06, "loss": 0.0113, "step": 63910 }, { "epoch": 0.5397395030714994, "grad_norm": 0.9608795046806335, "learning_rate": 9.14646755855567e-06, "loss": 0.0137, "step": 63920 }, { "epoch": 0.5398239429186633, "grad_norm": 0.29206162691116333, "learning_rate": 9.146055734163783e-06, "loss": 0.0157, "step": 63930 }, { "epoch": 0.5399083827658272, "grad_norm": 0.7073372602462769, "learning_rate": 9.145643819720414e-06, "loss": 0.0141, "step": 63940 }, { "epoch": 0.5399928226129911, "grad_norm": 0.6629050374031067, "learning_rate": 9.145231815234512e-06, "loss": 0.0157, "step": 63950 }, { "epoch": 0.540077262460155, "grad_norm": 0.19892320036888123, "learning_rate": 9.144819720715023e-06, "loss": 0.0074, "step": 63960 }, { "epoch": 0.5401617023073189, "grad_norm": 0.12998078763484955, "learning_rate": 9.144407536170901e-06, "loss": 0.0103, "step": 63970 }, { "epoch": 0.5402461421544827, "grad_norm": 0.2757522463798523, "learning_rate": 9.143995261611093e-06, "loss": 0.0109, "step": 63980 }, { "epoch": 0.5403305820016466, "grad_norm": 0.3223608136177063, "learning_rate": 9.14358289704456e-06, "loss": 0.0069, "step": 63990 }, { "epoch": 0.5404150218488104, "grad_norm": 0.28584912419319153, "learning_rate": 9.143170442480256e-06, "loss": 0.0114, "step": 64000 }, { "epoch": 0.5404994616959743, "grad_norm": 0.6930012702941895, "learning_rate": 9.142757897927138e-06, "loss": 0.0143, "step": 64010 }, { "epoch": 0.5405839015431382, "grad_norm": 0.5216724276542664, "learning_rate": 9.142345263394169e-06, "loss": 0.0121, "step": 64020 }, { "epoch": 0.5406683413903021, "grad_norm": 0.6825242042541504, "learning_rate": 9.141932538890307e-06, "loss": 0.0164, "step": 64030 }, { "epoch": 0.540752781237466, "grad_norm": 0.17198841273784637, "learning_rate": 9.14151972442452e-06, "loss": 0.0088, "step": 64040 }, { "epoch": 0.5408372210846298, "grad_norm": 0.12830927968025208, "learning_rate": 9.141106820005774e-06, "loss": 0.0313, "step": 64050 }, { "epoch": 0.5409216609317937, "grad_norm": 0.5954777002334595, "learning_rate": 9.140693825643036e-06, "loss": 0.0144, "step": 64060 }, { "epoch": 0.5410061007789576, "grad_norm": 0.4585641920566559, "learning_rate": 9.140280741345275e-06, "loss": 0.0111, "step": 64070 }, { "epoch": 0.5410905406261215, "grad_norm": 0.24346967041492462, "learning_rate": 9.139867567121467e-06, "loss": 0.0117, "step": 64080 }, { "epoch": 0.5411749804732854, "grad_norm": 0.34992092847824097, "learning_rate": 9.139454302980584e-06, "loss": 0.0237, "step": 64090 }, { "epoch": 0.5412594203204493, "grad_norm": 0.1806667596101761, "learning_rate": 9.1390409489316e-06, "loss": 0.0089, "step": 64100 }, { "epoch": 0.541343860167613, "grad_norm": 0.7712329030036926, "learning_rate": 9.138627504983496e-06, "loss": 0.0088, "step": 64110 }, { "epoch": 0.5414283000147769, "grad_norm": 0.2543802857398987, "learning_rate": 9.13821397114525e-06, "loss": 0.0103, "step": 64120 }, { "epoch": 0.5415127398619408, "grad_norm": 0.3334650993347168, "learning_rate": 9.137800347425845e-06, "loss": 0.0103, "step": 64130 }, { "epoch": 0.5415971797091047, "grad_norm": 0.6091602444648743, "learning_rate": 9.137386633834265e-06, "loss": 0.0115, "step": 64140 }, { "epoch": 0.5416816195562686, "grad_norm": 0.6091734766960144, "learning_rate": 9.136972830379494e-06, "loss": 0.0203, "step": 64150 }, { "epoch": 0.5417660594034325, "grad_norm": 0.2445969432592392, "learning_rate": 9.136558937070522e-06, "loss": 0.0106, "step": 64160 }, { "epoch": 0.5418504992505964, "grad_norm": 0.3908928334712982, "learning_rate": 9.136144953916334e-06, "loss": 0.0181, "step": 64170 }, { "epoch": 0.5419349390977602, "grad_norm": 0.8457827568054199, "learning_rate": 9.135730880925929e-06, "loss": 0.0196, "step": 64180 }, { "epoch": 0.5420193789449241, "grad_norm": 0.23905399441719055, "learning_rate": 9.135316718108296e-06, "loss": 0.0128, "step": 64190 }, { "epoch": 0.542103818792088, "grad_norm": 0.35600462555885315, "learning_rate": 9.134902465472431e-06, "loss": 0.0099, "step": 64200 }, { "epoch": 0.5421882586392519, "grad_norm": 0.7133082747459412, "learning_rate": 9.134488123027332e-06, "loss": 0.0162, "step": 64210 }, { "epoch": 0.5422726984864158, "grad_norm": 0.3663591742515564, "learning_rate": 9.134073690781996e-06, "loss": 0.0125, "step": 64220 }, { "epoch": 0.5423571383335796, "grad_norm": 0.3011837303638458, "learning_rate": 9.133659168745431e-06, "loss": 0.0177, "step": 64230 }, { "epoch": 0.5424415781807435, "grad_norm": 0.5254948139190674, "learning_rate": 9.133244556926632e-06, "loss": 0.0141, "step": 64240 }, { "epoch": 0.5425260180279073, "grad_norm": 0.3829164505004883, "learning_rate": 9.13282985533461e-06, "loss": 0.0105, "step": 64250 }, { "epoch": 0.5426104578750712, "grad_norm": 0.44772660732269287, "learning_rate": 9.13241506397837e-06, "loss": 0.0125, "step": 64260 }, { "epoch": 0.5426948977222351, "grad_norm": 0.41873031854629517, "learning_rate": 9.132000182866922e-06, "loss": 0.02, "step": 64270 }, { "epoch": 0.542779337569399, "grad_norm": 0.613778829574585, "learning_rate": 9.131585212009277e-06, "loss": 0.0172, "step": 64280 }, { "epoch": 0.5428637774165629, "grad_norm": 0.4880652129650116, "learning_rate": 9.131170151414447e-06, "loss": 0.012, "step": 64290 }, { "epoch": 0.5429482172637268, "grad_norm": 0.4825736880302429, "learning_rate": 9.130755001091449e-06, "loss": 0.0187, "step": 64300 }, { "epoch": 0.5430326571108907, "grad_norm": 0.107016921043396, "learning_rate": 9.130339761049296e-06, "loss": 0.0086, "step": 64310 }, { "epoch": 0.5431170969580545, "grad_norm": 0.28228864073753357, "learning_rate": 9.129924431297013e-06, "loss": 0.0138, "step": 64320 }, { "epoch": 0.5432015368052184, "grad_norm": 0.28893086314201355, "learning_rate": 9.129509011843615e-06, "loss": 0.0081, "step": 64330 }, { "epoch": 0.5432859766523822, "grad_norm": 0.38101840019226074, "learning_rate": 9.12909350269813e-06, "loss": 0.0155, "step": 64340 }, { "epoch": 0.5433704164995461, "grad_norm": 0.511762261390686, "learning_rate": 9.12867790386958e-06, "loss": 0.0131, "step": 64350 }, { "epoch": 0.54345485634671, "grad_norm": 0.31401729583740234, "learning_rate": 9.12826221536699e-06, "loss": 0.0108, "step": 64360 }, { "epoch": 0.5435392961938739, "grad_norm": 0.6957026720046997, "learning_rate": 9.127846437199393e-06, "loss": 0.0144, "step": 64370 }, { "epoch": 0.5436237360410378, "grad_norm": 0.3154365122318268, "learning_rate": 9.127430569375815e-06, "loss": 0.0105, "step": 64380 }, { "epoch": 0.5437081758882016, "grad_norm": 0.31389984488487244, "learning_rate": 9.12701461190529e-06, "loss": 0.0145, "step": 64390 }, { "epoch": 0.5437926157353655, "grad_norm": 0.1036781370639801, "learning_rate": 9.126598564796858e-06, "loss": 0.0124, "step": 64400 }, { "epoch": 0.5438770555825294, "grad_norm": 0.7587053179740906, "learning_rate": 9.126182428059547e-06, "loss": 0.0111, "step": 64410 }, { "epoch": 0.5439614954296933, "grad_norm": 0.4467964470386505, "learning_rate": 9.125766201702401e-06, "loss": 0.0178, "step": 64420 }, { "epoch": 0.5440459352768572, "grad_norm": 0.43602901697158813, "learning_rate": 9.125349885734458e-06, "loss": 0.0184, "step": 64430 }, { "epoch": 0.5441303751240211, "grad_norm": 0.575322151184082, "learning_rate": 9.12493348016476e-06, "loss": 0.0272, "step": 64440 }, { "epoch": 0.544214814971185, "grad_norm": 0.6237276196479797, "learning_rate": 9.124516985002352e-06, "loss": 0.0117, "step": 64450 }, { "epoch": 0.5442992548183487, "grad_norm": 0.38419944047927856, "learning_rate": 9.124100400256281e-06, "loss": 0.0128, "step": 64460 }, { "epoch": 0.5443836946655126, "grad_norm": 0.3688862919807434, "learning_rate": 9.123683725935595e-06, "loss": 0.0085, "step": 64470 }, { "epoch": 0.5444681345126765, "grad_norm": 0.2920226454734802, "learning_rate": 9.123266962049344e-06, "loss": 0.0113, "step": 64480 }, { "epoch": 0.5445525743598404, "grad_norm": 0.4926471412181854, "learning_rate": 9.122850108606578e-06, "loss": 0.0114, "step": 64490 }, { "epoch": 0.5446370142070043, "grad_norm": 0.41770172119140625, "learning_rate": 9.122433165616352e-06, "loss": 0.0119, "step": 64500 }, { "epoch": 0.5447214540541682, "grad_norm": 0.5011540055274963, "learning_rate": 9.122016133087723e-06, "loss": 0.0315, "step": 64510 }, { "epoch": 0.544805893901332, "grad_norm": 0.1951143741607666, "learning_rate": 9.12159901102975e-06, "loss": 0.0141, "step": 64520 }, { "epoch": 0.5448903337484959, "grad_norm": 0.3680180609226227, "learning_rate": 9.121181799451488e-06, "loss": 0.0145, "step": 64530 }, { "epoch": 0.5449747735956598, "grad_norm": 0.4690200388431549, "learning_rate": 9.120764498362005e-06, "loss": 0.0138, "step": 64540 }, { "epoch": 0.5450592134428237, "grad_norm": 0.5948374271392822, "learning_rate": 9.12034710777036e-06, "loss": 0.0134, "step": 64550 }, { "epoch": 0.5451436532899876, "grad_norm": 0.6755700707435608, "learning_rate": 9.11992962768562e-06, "loss": 0.0069, "step": 64560 }, { "epoch": 0.5452280931371514, "grad_norm": 0.1520814299583435, "learning_rate": 9.119512058116852e-06, "loss": 0.0133, "step": 64570 }, { "epoch": 0.5453125329843153, "grad_norm": 0.4309890568256378, "learning_rate": 9.119094399073128e-06, "loss": 0.0095, "step": 64580 }, { "epoch": 0.5453969728314791, "grad_norm": 0.4032444655895233, "learning_rate": 9.118676650563517e-06, "loss": 0.0123, "step": 64590 }, { "epoch": 0.545481412678643, "grad_norm": 0.3451565206050873, "learning_rate": 9.118258812597093e-06, "loss": 0.0163, "step": 64600 }, { "epoch": 0.5455658525258069, "grad_norm": 0.44932296872138977, "learning_rate": 9.117840885182932e-06, "loss": 0.0115, "step": 64610 }, { "epoch": 0.5456502923729708, "grad_norm": 0.38880518078804016, "learning_rate": 9.117422868330109e-06, "loss": 0.0157, "step": 64620 }, { "epoch": 0.5457347322201347, "grad_norm": 0.29696890711784363, "learning_rate": 9.117004762047708e-06, "loss": 0.0095, "step": 64630 }, { "epoch": 0.5458191720672986, "grad_norm": 0.3177453279495239, "learning_rate": 9.116586566344806e-06, "loss": 0.0126, "step": 64640 }, { "epoch": 0.5459036119144625, "grad_norm": 0.40982550382614136, "learning_rate": 9.116168281230487e-06, "loss": 0.0135, "step": 64650 }, { "epoch": 0.5459880517616263, "grad_norm": 0.9625276923179626, "learning_rate": 9.115749906713836e-06, "loss": 0.0149, "step": 64660 }, { "epoch": 0.5460724916087902, "grad_norm": 0.5532652735710144, "learning_rate": 9.11533144280394e-06, "loss": 0.0192, "step": 64670 }, { "epoch": 0.5461569314559541, "grad_norm": 0.387339323759079, "learning_rate": 9.114912889509889e-06, "loss": 0.0246, "step": 64680 }, { "epoch": 0.5462413713031179, "grad_norm": 0.20254361629486084, "learning_rate": 9.114494246840774e-06, "loss": 0.0266, "step": 64690 }, { "epoch": 0.5463258111502818, "grad_norm": 0.6476821899414062, "learning_rate": 9.114075514805686e-06, "loss": 0.0128, "step": 64700 }, { "epoch": 0.5464102509974457, "grad_norm": 0.29361632466316223, "learning_rate": 9.113656693413722e-06, "loss": 0.0129, "step": 64710 }, { "epoch": 0.5464946908446096, "grad_norm": 0.32193097472190857, "learning_rate": 9.113237782673977e-06, "loss": 0.0117, "step": 64720 }, { "epoch": 0.5465791306917734, "grad_norm": 0.04569639638066292, "learning_rate": 9.112818782595549e-06, "loss": 0.0144, "step": 64730 }, { "epoch": 0.5466635705389373, "grad_norm": 0.530100405216217, "learning_rate": 9.11239969318754e-06, "loss": 0.0121, "step": 64740 }, { "epoch": 0.5467480103861012, "grad_norm": 0.11934149265289307, "learning_rate": 9.111980514459056e-06, "loss": 0.0079, "step": 64750 }, { "epoch": 0.5468324502332651, "grad_norm": 0.5991396307945251, "learning_rate": 9.111561246419194e-06, "loss": 0.0144, "step": 64760 }, { "epoch": 0.546916890080429, "grad_norm": 0.4280768036842346, "learning_rate": 9.111141889077066e-06, "loss": 0.0208, "step": 64770 }, { "epoch": 0.5470013299275929, "grad_norm": 0.26833733916282654, "learning_rate": 9.110722442441779e-06, "loss": 0.0183, "step": 64780 }, { "epoch": 0.5470857697747568, "grad_norm": 0.3707754611968994, "learning_rate": 9.110302906522441e-06, "loss": 0.018, "step": 64790 }, { "epoch": 0.5471702096219205, "grad_norm": 0.43709155917167664, "learning_rate": 9.109883281328167e-06, "loss": 0.011, "step": 64800 }, { "epoch": 0.5472546494690844, "grad_norm": 0.31694456934928894, "learning_rate": 9.10946356686807e-06, "loss": 0.0071, "step": 64810 }, { "epoch": 0.5473390893162483, "grad_norm": 0.15624402463436127, "learning_rate": 9.109043763151269e-06, "loss": 0.0128, "step": 64820 }, { "epoch": 0.5474235291634122, "grad_norm": 0.34139376878738403, "learning_rate": 9.108623870186877e-06, "loss": 0.0094, "step": 64830 }, { "epoch": 0.5475079690105761, "grad_norm": 0.2582133114337921, "learning_rate": 9.108203887984017e-06, "loss": 0.0121, "step": 64840 }, { "epoch": 0.54759240885774, "grad_norm": 0.5194640755653381, "learning_rate": 9.107783816551809e-06, "loss": 0.015, "step": 64850 }, { "epoch": 0.5476768487049039, "grad_norm": 0.19389165937900543, "learning_rate": 9.10736365589938e-06, "loss": 0.02, "step": 64860 }, { "epoch": 0.5477612885520677, "grad_norm": 0.3663148880004883, "learning_rate": 9.106943406035855e-06, "loss": 0.0137, "step": 64870 }, { "epoch": 0.5478457283992316, "grad_norm": 0.19231343269348145, "learning_rate": 9.106523066970358e-06, "loss": 0.0131, "step": 64880 }, { "epoch": 0.5479301682463955, "grad_norm": 0.41100016236305237, "learning_rate": 9.106102638712024e-06, "loss": 0.0167, "step": 64890 }, { "epoch": 0.5480146080935594, "grad_norm": 1.1568504571914673, "learning_rate": 9.10568212126998e-06, "loss": 0.013, "step": 64900 }, { "epoch": 0.5480990479407232, "grad_norm": 0.296192467212677, "learning_rate": 9.105261514653364e-06, "loss": 0.0051, "step": 64910 }, { "epoch": 0.5481834877878871, "grad_norm": 0.4029102921485901, "learning_rate": 9.104840818871308e-06, "loss": 0.0102, "step": 64920 }, { "epoch": 0.548267927635051, "grad_norm": 0.10218963772058487, "learning_rate": 9.10442003393295e-06, "loss": 0.0061, "step": 64930 }, { "epoch": 0.5483523674822148, "grad_norm": 0.30809837579727173, "learning_rate": 9.103999159847431e-06, "loss": 0.0101, "step": 64940 }, { "epoch": 0.5484368073293787, "grad_norm": 0.3472250998020172, "learning_rate": 9.103578196623891e-06, "loss": 0.0145, "step": 64950 }, { "epoch": 0.5485212471765426, "grad_norm": 0.673815906047821, "learning_rate": 9.103157144271473e-06, "loss": 0.0169, "step": 64960 }, { "epoch": 0.5486056870237065, "grad_norm": 1.0325651168823242, "learning_rate": 9.102736002799324e-06, "loss": 0.0203, "step": 64970 }, { "epoch": 0.5486901268708704, "grad_norm": 0.5270192623138428, "learning_rate": 9.102314772216588e-06, "loss": 0.0085, "step": 64980 }, { "epoch": 0.5487745667180343, "grad_norm": 0.3748774826526642, "learning_rate": 9.101893452532415e-06, "loss": 0.0109, "step": 64990 }, { "epoch": 0.5488590065651981, "grad_norm": 0.15754196047782898, "learning_rate": 9.10147204375596e-06, "loss": 0.0107, "step": 65000 }, { "epoch": 0.548943446412362, "grad_norm": 0.3123035728931427, "learning_rate": 9.10105054589637e-06, "loss": 0.0119, "step": 65010 }, { "epoch": 0.5490278862595259, "grad_norm": 0.18679176270961761, "learning_rate": 9.100628958962802e-06, "loss": 0.0088, "step": 65020 }, { "epoch": 0.5491123261066897, "grad_norm": 0.33563926815986633, "learning_rate": 9.100207282964416e-06, "loss": 0.0134, "step": 65030 }, { "epoch": 0.5491967659538536, "grad_norm": 0.33087605237960815, "learning_rate": 9.099785517910367e-06, "loss": 0.0125, "step": 65040 }, { "epoch": 0.5492812058010175, "grad_norm": 0.34754908084869385, "learning_rate": 9.099363663809816e-06, "loss": 0.0158, "step": 65050 }, { "epoch": 0.5493656456481814, "grad_norm": 0.47885605692863464, "learning_rate": 9.098941720671926e-06, "loss": 0.0154, "step": 65060 }, { "epoch": 0.5494500854953452, "grad_norm": 0.50501549243927, "learning_rate": 9.098519688505862e-06, "loss": 0.0134, "step": 65070 }, { "epoch": 0.5495345253425091, "grad_norm": 0.28176289796829224, "learning_rate": 9.098097567320791e-06, "loss": 0.0137, "step": 65080 }, { "epoch": 0.549618965189673, "grad_norm": 1.1037729978561401, "learning_rate": 9.097675357125881e-06, "loss": 0.0185, "step": 65090 }, { "epoch": 0.5497034050368369, "grad_norm": 0.2249028980731964, "learning_rate": 9.097253057930301e-06, "loss": 0.0131, "step": 65100 }, { "epoch": 0.5497878448840008, "grad_norm": 0.45507094264030457, "learning_rate": 9.096830669743225e-06, "loss": 0.0075, "step": 65110 }, { "epoch": 0.5498722847311647, "grad_norm": 0.275541752576828, "learning_rate": 9.096408192573825e-06, "loss": 0.0119, "step": 65120 }, { "epoch": 0.5499567245783286, "grad_norm": 0.9045043587684631, "learning_rate": 9.09598562643128e-06, "loss": 0.0105, "step": 65130 }, { "epoch": 0.5500411644254923, "grad_norm": 0.20862126350402832, "learning_rate": 9.095562971324765e-06, "loss": 0.0143, "step": 65140 }, { "epoch": 0.5501256042726562, "grad_norm": 0.5194298624992371, "learning_rate": 9.095140227263463e-06, "loss": 0.0118, "step": 65150 }, { "epoch": 0.5502100441198201, "grad_norm": 0.2196069210767746, "learning_rate": 9.094717394256553e-06, "loss": 0.0118, "step": 65160 }, { "epoch": 0.550294483966984, "grad_norm": 0.6536879539489746, "learning_rate": 9.09429447231322e-06, "loss": 0.0181, "step": 65170 }, { "epoch": 0.5503789238141479, "grad_norm": 0.40542924404144287, "learning_rate": 9.093871461442653e-06, "loss": 0.0112, "step": 65180 }, { "epoch": 0.5504633636613118, "grad_norm": 0.2037128061056137, "learning_rate": 9.093448361654034e-06, "loss": 0.0091, "step": 65190 }, { "epoch": 0.5505478035084757, "grad_norm": 0.7978342771530151, "learning_rate": 9.093025172956556e-06, "loss": 0.0213, "step": 65200 }, { "epoch": 0.5506322433556395, "grad_norm": 0.15040801465511322, "learning_rate": 9.09260189535941e-06, "loss": 0.0119, "step": 65210 }, { "epoch": 0.5507166832028034, "grad_norm": 0.44806182384490967, "learning_rate": 9.092178528871791e-06, "loss": 0.0125, "step": 65220 }, { "epoch": 0.5508011230499673, "grad_norm": 0.24598157405853271, "learning_rate": 9.091755073502892e-06, "loss": 0.0112, "step": 65230 }, { "epoch": 0.5508855628971312, "grad_norm": 0.30274316668510437, "learning_rate": 9.09133152926191e-06, "loss": 0.0117, "step": 65240 }, { "epoch": 0.5509700027442951, "grad_norm": 0.07394642382860184, "learning_rate": 9.090907896158047e-06, "loss": 0.01, "step": 65250 }, { "epoch": 0.5510544425914589, "grad_norm": 0.3071865141391754, "learning_rate": 9.090484174200503e-06, "loss": 0.0136, "step": 65260 }, { "epoch": 0.5511388824386227, "grad_norm": 0.30125099420547485, "learning_rate": 9.090060363398478e-06, "loss": 0.0149, "step": 65270 }, { "epoch": 0.5512233222857866, "grad_norm": 0.3541013300418854, "learning_rate": 9.089636463761184e-06, "loss": 0.0108, "step": 65280 }, { "epoch": 0.5513077621329505, "grad_norm": 0.31493863463401794, "learning_rate": 9.089212475297822e-06, "loss": 0.0122, "step": 65290 }, { "epoch": 0.5513922019801144, "grad_norm": 0.3873831331729889, "learning_rate": 9.088788398017604e-06, "loss": 0.0071, "step": 65300 }, { "epoch": 0.5514766418272783, "grad_norm": 0.2586505115032196, "learning_rate": 9.088364231929738e-06, "loss": 0.019, "step": 65310 }, { "epoch": 0.5515610816744422, "grad_norm": 0.17917032539844513, "learning_rate": 9.087939977043441e-06, "loss": 0.0158, "step": 65320 }, { "epoch": 0.5516455215216061, "grad_norm": 0.28913500905036926, "learning_rate": 9.087515633367924e-06, "loss": 0.0151, "step": 65330 }, { "epoch": 0.55172996136877, "grad_norm": 0.46301040053367615, "learning_rate": 9.087091200912405e-06, "loss": 0.0096, "step": 65340 }, { "epoch": 0.5518144012159338, "grad_norm": 0.49810829758644104, "learning_rate": 9.086666679686104e-06, "loss": 0.0152, "step": 65350 }, { "epoch": 0.5518988410630977, "grad_norm": 0.35402190685272217, "learning_rate": 9.08624206969824e-06, "loss": 0.0169, "step": 65360 }, { "epoch": 0.5519832809102615, "grad_norm": 0.339927077293396, "learning_rate": 9.085817370958034e-06, "loss": 0.016, "step": 65370 }, { "epoch": 0.5520677207574254, "grad_norm": 0.11283606290817261, "learning_rate": 9.085392583474713e-06, "loss": 0.0053, "step": 65380 }, { "epoch": 0.5521521606045893, "grad_norm": 0.7102558016777039, "learning_rate": 9.084967707257502e-06, "loss": 0.0163, "step": 65390 }, { "epoch": 0.5522366004517532, "grad_norm": 1.1202833652496338, "learning_rate": 9.08454274231563e-06, "loss": 0.0269, "step": 65400 }, { "epoch": 0.552321040298917, "grad_norm": 0.32815060019493103, "learning_rate": 9.084117688658325e-06, "loss": 0.0154, "step": 65410 }, { "epoch": 0.5524054801460809, "grad_norm": 0.13082648813724518, "learning_rate": 9.083692546294821e-06, "loss": 0.0144, "step": 65420 }, { "epoch": 0.5524899199932448, "grad_norm": 0.9646917581558228, "learning_rate": 9.083267315234353e-06, "loss": 0.0181, "step": 65430 }, { "epoch": 0.5525743598404087, "grad_norm": 0.491868257522583, "learning_rate": 9.082841995486155e-06, "loss": 0.0116, "step": 65440 }, { "epoch": 0.5526587996875726, "grad_norm": 0.4620809257030487, "learning_rate": 9.082416587059466e-06, "loss": 0.0161, "step": 65450 }, { "epoch": 0.5527432395347365, "grad_norm": 0.5068920850753784, "learning_rate": 9.081991089963526e-06, "loss": 0.0136, "step": 65460 }, { "epoch": 0.5528276793819004, "grad_norm": 0.1244635060429573, "learning_rate": 9.081565504207574e-06, "loss": 0.0171, "step": 65470 }, { "epoch": 0.5529121192290642, "grad_norm": 0.622941255569458, "learning_rate": 9.081139829800857e-06, "loss": 0.0201, "step": 65480 }, { "epoch": 0.552996559076228, "grad_norm": 0.8818827271461487, "learning_rate": 9.080714066752618e-06, "loss": 0.0179, "step": 65490 }, { "epoch": 0.5530809989233919, "grad_norm": 0.4797087609767914, "learning_rate": 9.080288215072106e-06, "loss": 0.0143, "step": 65500 }, { "epoch": 0.5531654387705558, "grad_norm": 0.24926860630512238, "learning_rate": 9.07986227476857e-06, "loss": 0.011, "step": 65510 }, { "epoch": 0.5532498786177197, "grad_norm": 0.6834853887557983, "learning_rate": 9.079436245851264e-06, "loss": 0.0151, "step": 65520 }, { "epoch": 0.5533343184648836, "grad_norm": 0.3433392643928528, "learning_rate": 9.079010128329437e-06, "loss": 0.018, "step": 65530 }, { "epoch": 0.5534187583120475, "grad_norm": 1.2718660831451416, "learning_rate": 9.078583922212346e-06, "loss": 0.013, "step": 65540 }, { "epoch": 0.5535031981592113, "grad_norm": 0.6732326149940491, "learning_rate": 9.078157627509248e-06, "loss": 0.008, "step": 65550 }, { "epoch": 0.5535876380063752, "grad_norm": 0.1546405702829361, "learning_rate": 9.0777312442294e-06, "loss": 0.016, "step": 65560 }, { "epoch": 0.5536720778535391, "grad_norm": 0.11034277826547623, "learning_rate": 9.077304772382068e-06, "loss": 0.0095, "step": 65570 }, { "epoch": 0.553756517700703, "grad_norm": 0.8009501695632935, "learning_rate": 9.07687821197651e-06, "loss": 0.0237, "step": 65580 }, { "epoch": 0.5538409575478669, "grad_norm": 0.10408446192741394, "learning_rate": 9.076451563021993e-06, "loss": 0.0114, "step": 65590 }, { "epoch": 0.5539253973950307, "grad_norm": 0.3511178493499756, "learning_rate": 9.076024825527783e-06, "loss": 0.0147, "step": 65600 }, { "epoch": 0.5540098372421945, "grad_norm": 0.22928473353385925, "learning_rate": 9.07559799950315e-06, "loss": 0.0183, "step": 65610 }, { "epoch": 0.5540942770893584, "grad_norm": 0.44354748725891113, "learning_rate": 9.075171084957364e-06, "loss": 0.0109, "step": 65620 }, { "epoch": 0.5541787169365223, "grad_norm": 0.7877190113067627, "learning_rate": 9.074744081899695e-06, "loss": 0.0096, "step": 65630 }, { "epoch": 0.5542631567836862, "grad_norm": 0.6611826419830322, "learning_rate": 9.074316990339422e-06, "loss": 0.0154, "step": 65640 }, { "epoch": 0.5543475966308501, "grad_norm": 0.22449210286140442, "learning_rate": 9.073889810285818e-06, "loss": 0.0132, "step": 65650 }, { "epoch": 0.554432036478014, "grad_norm": 0.23882852494716644, "learning_rate": 9.073462541748161e-06, "loss": 0.01, "step": 65660 }, { "epoch": 0.5545164763251779, "grad_norm": 0.2988041639328003, "learning_rate": 9.073035184735734e-06, "loss": 0.009, "step": 65670 }, { "epoch": 0.5546009161723418, "grad_norm": 0.27789387106895447, "learning_rate": 9.072607739257816e-06, "loss": 0.0139, "step": 65680 }, { "epoch": 0.5546853560195056, "grad_norm": 0.3293345868587494, "learning_rate": 9.072180205323693e-06, "loss": 0.0069, "step": 65690 }, { "epoch": 0.5547697958666695, "grad_norm": 0.5144878029823303, "learning_rate": 9.07175258294265e-06, "loss": 0.0155, "step": 65700 }, { "epoch": 0.5548542357138334, "grad_norm": 0.5842637419700623, "learning_rate": 9.071324872123977e-06, "loss": 0.0188, "step": 65710 }, { "epoch": 0.5549386755609972, "grad_norm": 0.010918493382632732, "learning_rate": 9.070897072876961e-06, "loss": 0.0204, "step": 65720 }, { "epoch": 0.5550231154081611, "grad_norm": 0.26022103428840637, "learning_rate": 9.070469185210895e-06, "loss": 0.0114, "step": 65730 }, { "epoch": 0.555107555255325, "grad_norm": 0.2314460277557373, "learning_rate": 9.070041209135072e-06, "loss": 0.0092, "step": 65740 }, { "epoch": 0.5551919951024888, "grad_norm": 0.3015086352825165, "learning_rate": 9.069613144658788e-06, "loss": 0.0117, "step": 65750 }, { "epoch": 0.5552764349496527, "grad_norm": 0.4132625162601471, "learning_rate": 9.06918499179134e-06, "loss": 0.0094, "step": 65760 }, { "epoch": 0.5553608747968166, "grad_norm": 0.19571079313755035, "learning_rate": 9.068756750542029e-06, "loss": 0.0072, "step": 65770 }, { "epoch": 0.5554453146439805, "grad_norm": 0.18132953345775604, "learning_rate": 9.068328420920154e-06, "loss": 0.0075, "step": 65780 }, { "epoch": 0.5555297544911444, "grad_norm": 0.3569013476371765, "learning_rate": 9.06790000293502e-06, "loss": 0.01, "step": 65790 }, { "epoch": 0.5556141943383083, "grad_norm": 0.14831849932670593, "learning_rate": 9.067471496595932e-06, "loss": 0.0139, "step": 65800 }, { "epoch": 0.5556986341854722, "grad_norm": 0.24492394924163818, "learning_rate": 9.067042901912195e-06, "loss": 0.0108, "step": 65810 }, { "epoch": 0.555783074032636, "grad_norm": 0.2537079155445099, "learning_rate": 9.066614218893121e-06, "loss": 0.0156, "step": 65820 }, { "epoch": 0.5558675138797998, "grad_norm": 0.10998101532459259, "learning_rate": 9.06618544754802e-06, "loss": 0.0161, "step": 65830 }, { "epoch": 0.5559519537269637, "grad_norm": 1.0542535781860352, "learning_rate": 9.065756587886204e-06, "loss": 0.0203, "step": 65840 }, { "epoch": 0.5560363935741276, "grad_norm": 0.7419146299362183, "learning_rate": 9.065327639916985e-06, "loss": 0.0151, "step": 65850 }, { "epoch": 0.5561208334212915, "grad_norm": 0.26052001118659973, "learning_rate": 9.064898603649685e-06, "loss": 0.0207, "step": 65860 }, { "epoch": 0.5562052732684554, "grad_norm": 0.44049492478370667, "learning_rate": 9.064469479093622e-06, "loss": 0.0112, "step": 65870 }, { "epoch": 0.5562897131156193, "grad_norm": 0.35395050048828125, "learning_rate": 9.064040266258113e-06, "loss": 0.0129, "step": 65880 }, { "epoch": 0.5563741529627831, "grad_norm": 0.1597486138343811, "learning_rate": 9.063610965152483e-06, "loss": 0.0063, "step": 65890 }, { "epoch": 0.556458592809947, "grad_norm": 0.566561758518219, "learning_rate": 9.063181575786055e-06, "loss": 0.0166, "step": 65900 }, { "epoch": 0.5565430326571109, "grad_norm": 0.47848036885261536, "learning_rate": 9.062752098168155e-06, "loss": 0.0162, "step": 65910 }, { "epoch": 0.5566274725042748, "grad_norm": 0.37393277883529663, "learning_rate": 9.062322532308113e-06, "loss": 0.0109, "step": 65920 }, { "epoch": 0.5567119123514387, "grad_norm": 0.1685924530029297, "learning_rate": 9.061892878215258e-06, "loss": 0.0089, "step": 65930 }, { "epoch": 0.5567963521986026, "grad_norm": 0.2839011251926422, "learning_rate": 9.061463135898923e-06, "loss": 0.0126, "step": 65940 }, { "epoch": 0.5568807920457663, "grad_norm": 0.3752729594707489, "learning_rate": 9.06103330536844e-06, "loss": 0.0139, "step": 65950 }, { "epoch": 0.5569652318929302, "grad_norm": 0.26537853479385376, "learning_rate": 9.060603386633145e-06, "loss": 0.0151, "step": 65960 }, { "epoch": 0.5570496717400941, "grad_norm": 0.41374799609184265, "learning_rate": 9.060173379702378e-06, "loss": 0.012, "step": 65970 }, { "epoch": 0.557134111587258, "grad_norm": 0.18738460540771484, "learning_rate": 9.059743284585476e-06, "loss": 0.0096, "step": 65980 }, { "epoch": 0.5572185514344219, "grad_norm": 0.7300302386283875, "learning_rate": 9.059313101291781e-06, "loss": 0.0117, "step": 65990 }, { "epoch": 0.5573029912815858, "grad_norm": 0.456112802028656, "learning_rate": 9.058882829830638e-06, "loss": 0.0155, "step": 66000 }, { "epoch": 0.5573874311287497, "grad_norm": 0.33045345544815063, "learning_rate": 9.058452470211393e-06, "loss": 0.0128, "step": 66010 }, { "epoch": 0.5574718709759136, "grad_norm": 0.6961272358894348, "learning_rate": 9.058022022443392e-06, "loss": 0.018, "step": 66020 }, { "epoch": 0.5575563108230774, "grad_norm": 0.4574987590312958, "learning_rate": 9.057591486535982e-06, "loss": 0.0079, "step": 66030 }, { "epoch": 0.5576407506702413, "grad_norm": 0.7060009837150574, "learning_rate": 9.057160862498518e-06, "loss": 0.0107, "step": 66040 }, { "epoch": 0.5577251905174052, "grad_norm": 0.3956716060638428, "learning_rate": 9.056730150340352e-06, "loss": 0.0112, "step": 66050 }, { "epoch": 0.557809630364569, "grad_norm": 0.1915370672941208, "learning_rate": 9.056299350070837e-06, "loss": 0.0106, "step": 66060 }, { "epoch": 0.5578940702117329, "grad_norm": 0.1153283640742302, "learning_rate": 9.055868461699331e-06, "loss": 0.006, "step": 66070 }, { "epoch": 0.5579785100588968, "grad_norm": 0.4321998655796051, "learning_rate": 9.055437485235195e-06, "loss": 0.0166, "step": 66080 }, { "epoch": 0.5580629499060606, "grad_norm": 0.5629144310951233, "learning_rate": 9.055006420687787e-06, "loss": 0.0132, "step": 66090 }, { "epoch": 0.5581473897532245, "grad_norm": 0.07227829098701477, "learning_rate": 9.05457526806647e-06, "loss": 0.0081, "step": 66100 }, { "epoch": 0.5582318296003884, "grad_norm": 0.18467360734939575, "learning_rate": 9.05414402738061e-06, "loss": 0.0115, "step": 66110 }, { "epoch": 0.5583162694475523, "grad_norm": 0.3304799497127533, "learning_rate": 9.053712698639572e-06, "loss": 0.0123, "step": 66120 }, { "epoch": 0.5584007092947162, "grad_norm": 0.21952852606773376, "learning_rate": 9.053281281852725e-06, "loss": 0.0182, "step": 66130 }, { "epoch": 0.5584851491418801, "grad_norm": 0.24458417296409607, "learning_rate": 9.05284977702944e-06, "loss": 0.0098, "step": 66140 }, { "epoch": 0.558569588989044, "grad_norm": 0.613335132598877, "learning_rate": 9.052418184179085e-06, "loss": 0.022, "step": 66150 }, { "epoch": 0.5586540288362078, "grad_norm": 0.720363438129425, "learning_rate": 9.05198650331104e-06, "loss": 0.0139, "step": 66160 }, { "epoch": 0.5587384686833717, "grad_norm": 0.19774697721004486, "learning_rate": 9.05155473443468e-06, "loss": 0.0064, "step": 66170 }, { "epoch": 0.5588229085305355, "grad_norm": 0.5128251910209656, "learning_rate": 9.05112287755938e-06, "loss": 0.0153, "step": 66180 }, { "epoch": 0.5589073483776994, "grad_norm": 0.01830815151333809, "learning_rate": 9.05069093269452e-06, "loss": 0.0091, "step": 66190 }, { "epoch": 0.5589917882248633, "grad_norm": 0.19156527519226074, "learning_rate": 9.050258899849484e-06, "loss": 0.0121, "step": 66200 }, { "epoch": 0.5590762280720272, "grad_norm": 0.28847676515579224, "learning_rate": 9.049826779033654e-06, "loss": 0.0151, "step": 66210 }, { "epoch": 0.5591606679191911, "grad_norm": 0.4097009003162384, "learning_rate": 9.049394570256418e-06, "loss": 0.017, "step": 66220 }, { "epoch": 0.5592451077663549, "grad_norm": 0.3685203790664673, "learning_rate": 9.048962273527161e-06, "loss": 0.0202, "step": 66230 }, { "epoch": 0.5593295476135188, "grad_norm": 0.3474558889865875, "learning_rate": 9.048529888855273e-06, "loss": 0.007, "step": 66240 }, { "epoch": 0.5594139874606827, "grad_norm": 1.0354056358337402, "learning_rate": 9.048097416250145e-06, "loss": 0.0202, "step": 66250 }, { "epoch": 0.5594984273078466, "grad_norm": 0.2760932743549347, "learning_rate": 9.04766485572117e-06, "loss": 0.0112, "step": 66260 }, { "epoch": 0.5595828671550105, "grad_norm": 0.1673547774553299, "learning_rate": 9.047232207277746e-06, "loss": 0.0053, "step": 66270 }, { "epoch": 0.5596673070021744, "grad_norm": 0.38956496119499207, "learning_rate": 9.046799470929267e-06, "loss": 0.0085, "step": 66280 }, { "epoch": 0.5597517468493381, "grad_norm": 0.28897807002067566, "learning_rate": 9.046366646685133e-06, "loss": 0.0175, "step": 66290 }, { "epoch": 0.559836186696502, "grad_norm": 0.2310643494129181, "learning_rate": 9.045933734554742e-06, "loss": 0.0151, "step": 66300 }, { "epoch": 0.5599206265436659, "grad_norm": 0.30442705750465393, "learning_rate": 9.0455007345475e-06, "loss": 0.0196, "step": 66310 }, { "epoch": 0.5600050663908298, "grad_norm": 0.4994693696498871, "learning_rate": 9.045067646672813e-06, "loss": 0.0125, "step": 66320 }, { "epoch": 0.5600895062379937, "grad_norm": 0.23653219640254974, "learning_rate": 9.044634470940086e-06, "loss": 0.0098, "step": 66330 }, { "epoch": 0.5601739460851576, "grad_norm": 0.18925052881240845, "learning_rate": 9.044201207358726e-06, "loss": 0.0086, "step": 66340 }, { "epoch": 0.5602583859323215, "grad_norm": 0.4510553181171417, "learning_rate": 9.043767855938143e-06, "loss": 0.0101, "step": 66350 }, { "epoch": 0.5603428257794854, "grad_norm": 0.6898196935653687, "learning_rate": 9.043334416687753e-06, "loss": 0.017, "step": 66360 }, { "epoch": 0.5604272656266492, "grad_norm": 0.17815092206001282, "learning_rate": 9.042900889616967e-06, "loss": 0.015, "step": 66370 }, { "epoch": 0.5605117054738131, "grad_norm": 0.009907763451337814, "learning_rate": 9.042467274735202e-06, "loss": 0.0209, "step": 66380 }, { "epoch": 0.560596145320977, "grad_norm": 0.5373110771179199, "learning_rate": 9.042033572051878e-06, "loss": 0.0089, "step": 66390 }, { "epoch": 0.5606805851681409, "grad_norm": 0.3408973813056946, "learning_rate": 9.041599781576412e-06, "loss": 0.0097, "step": 66400 }, { "epoch": 0.5607650250153047, "grad_norm": 0.15708717703819275, "learning_rate": 9.041165903318226e-06, "loss": 0.017, "step": 66410 }, { "epoch": 0.5608494648624686, "grad_norm": 0.8843386173248291, "learning_rate": 9.040731937286747e-06, "loss": 0.0219, "step": 66420 }, { "epoch": 0.5609339047096324, "grad_norm": 3.2959752082824707, "learning_rate": 9.040297883491396e-06, "loss": 0.0144, "step": 66430 }, { "epoch": 0.5610183445567963, "grad_norm": 0.7927817702293396, "learning_rate": 9.039863741941603e-06, "loss": 0.0206, "step": 66440 }, { "epoch": 0.5611027844039602, "grad_norm": 0.4363563358783722, "learning_rate": 9.039429512646797e-06, "loss": 0.0123, "step": 66450 }, { "epoch": 0.5611872242511241, "grad_norm": 0.600379467010498, "learning_rate": 9.038995195616412e-06, "loss": 0.0142, "step": 66460 }, { "epoch": 0.561271664098288, "grad_norm": 0.10245559364557266, "learning_rate": 9.038560790859878e-06, "loss": 0.0191, "step": 66470 }, { "epoch": 0.5613561039454519, "grad_norm": 0.5854907035827637, "learning_rate": 9.038126298386631e-06, "loss": 0.0168, "step": 66480 }, { "epoch": 0.5614405437926158, "grad_norm": 0.9866177439689636, "learning_rate": 9.037691718206109e-06, "loss": 0.0129, "step": 66490 }, { "epoch": 0.5615249836397797, "grad_norm": 0.40365397930145264, "learning_rate": 9.03725705032775e-06, "loss": 0.0188, "step": 66500 }, { "epoch": 0.5616094234869435, "grad_norm": 0.019279416650533676, "learning_rate": 9.036822294760994e-06, "loss": 0.0191, "step": 66510 }, { "epoch": 0.5616938633341073, "grad_norm": 0.10410759598016739, "learning_rate": 9.036387451515285e-06, "loss": 0.0151, "step": 66520 }, { "epoch": 0.5617783031812712, "grad_norm": 0.5550744533538818, "learning_rate": 9.035952520600068e-06, "loss": 0.0131, "step": 66530 }, { "epoch": 0.5618627430284351, "grad_norm": 0.23213867843151093, "learning_rate": 9.035517502024791e-06, "loss": 0.0141, "step": 66540 }, { "epoch": 0.561947182875599, "grad_norm": 0.26850301027297974, "learning_rate": 9.0350823957989e-06, "loss": 0.0077, "step": 66550 }, { "epoch": 0.5620316227227629, "grad_norm": 0.01642903871834278, "learning_rate": 9.034647201931845e-06, "loss": 0.0123, "step": 66560 }, { "epoch": 0.5621160625699267, "grad_norm": 0.4899750351905823, "learning_rate": 9.034211920433081e-06, "loss": 0.0112, "step": 66570 }, { "epoch": 0.5622005024170906, "grad_norm": 0.2773109972476959, "learning_rate": 9.033776551312059e-06, "loss": 0.0186, "step": 66580 }, { "epoch": 0.5622849422642545, "grad_norm": 0.7171167731285095, "learning_rate": 9.033341094578238e-06, "loss": 0.0116, "step": 66590 }, { "epoch": 0.5623693821114184, "grad_norm": 0.24775999784469604, "learning_rate": 9.032905550241076e-06, "loss": 0.0192, "step": 66600 }, { "epoch": 0.5624538219585823, "grad_norm": 0.348276823759079, "learning_rate": 9.03246991831003e-06, "loss": 0.0111, "step": 66610 }, { "epoch": 0.5625382618057462, "grad_norm": 0.6712862849235535, "learning_rate": 9.032034198794564e-06, "loss": 0.0139, "step": 66620 }, { "epoch": 0.5626227016529101, "grad_norm": 0.15855590999126434, "learning_rate": 9.031598391704143e-06, "loss": 0.0143, "step": 66630 }, { "epoch": 0.5627071415000738, "grad_norm": 0.21390335261821747, "learning_rate": 9.031162497048229e-06, "loss": 0.0171, "step": 66640 }, { "epoch": 0.5627915813472377, "grad_norm": 0.3846571743488312, "learning_rate": 9.030726514836294e-06, "loss": 0.0173, "step": 66650 }, { "epoch": 0.5628760211944016, "grad_norm": 0.26372671127319336, "learning_rate": 9.030290445077802e-06, "loss": 0.0113, "step": 66660 }, { "epoch": 0.5629604610415655, "grad_norm": 0.45555341243743896, "learning_rate": 9.029854287782231e-06, "loss": 0.0141, "step": 66670 }, { "epoch": 0.5630449008887294, "grad_norm": 0.4878903925418854, "learning_rate": 9.029418042959049e-06, "loss": 0.0225, "step": 66680 }, { "epoch": 0.5631293407358933, "grad_norm": 0.07626639306545258, "learning_rate": 9.028981710617733e-06, "loss": 0.01, "step": 66690 }, { "epoch": 0.5632137805830572, "grad_norm": 0.4378528296947479, "learning_rate": 9.028545290767759e-06, "loss": 0.0144, "step": 66700 }, { "epoch": 0.563298220430221, "grad_norm": 0.14245474338531494, "learning_rate": 9.028108783418607e-06, "loss": 0.013, "step": 66710 }, { "epoch": 0.5633826602773849, "grad_norm": 0.09900471568107605, "learning_rate": 9.027672188579758e-06, "loss": 0.0123, "step": 66720 }, { "epoch": 0.5634671001245488, "grad_norm": 0.14419035613536835, "learning_rate": 9.027235506260694e-06, "loss": 0.0162, "step": 66730 }, { "epoch": 0.5635515399717127, "grad_norm": 0.3258810043334961, "learning_rate": 9.026798736470901e-06, "loss": 0.0135, "step": 66740 }, { "epoch": 0.5636359798188765, "grad_norm": 0.7410369515419006, "learning_rate": 9.026361879219864e-06, "loss": 0.0124, "step": 66750 }, { "epoch": 0.5637204196660404, "grad_norm": 0.4929810166358948, "learning_rate": 9.025924934517072e-06, "loss": 0.0183, "step": 66760 }, { "epoch": 0.5638048595132042, "grad_norm": 0.17504006624221802, "learning_rate": 9.025487902372017e-06, "loss": 0.0104, "step": 66770 }, { "epoch": 0.5638892993603681, "grad_norm": 0.4928210973739624, "learning_rate": 9.025050782794186e-06, "loss": 0.0137, "step": 66780 }, { "epoch": 0.563973739207532, "grad_norm": 0.2544787526130676, "learning_rate": 9.02461357579308e-06, "loss": 0.0136, "step": 66790 }, { "epoch": 0.5640581790546959, "grad_norm": 0.20809967815876007, "learning_rate": 9.02417628137819e-06, "loss": 0.0114, "step": 66800 }, { "epoch": 0.5641426189018598, "grad_norm": 0.3584812879562378, "learning_rate": 9.023738899559017e-06, "loss": 0.0146, "step": 66810 }, { "epoch": 0.5642270587490237, "grad_norm": 0.40007853507995605, "learning_rate": 9.02330143034506e-06, "loss": 0.0176, "step": 66820 }, { "epoch": 0.5643114985961876, "grad_norm": 0.3937901556491852, "learning_rate": 9.02286387374582e-06, "loss": 0.0138, "step": 66830 }, { "epoch": 0.5643959384433515, "grad_norm": 0.16202788054943085, "learning_rate": 9.0224262297708e-06, "loss": 0.0071, "step": 66840 }, { "epoch": 0.5644803782905153, "grad_norm": 0.18743130564689636, "learning_rate": 9.021988498429507e-06, "loss": 0.0113, "step": 66850 }, { "epoch": 0.5645648181376792, "grad_norm": 0.6504120230674744, "learning_rate": 9.021550679731448e-06, "loss": 0.0124, "step": 66860 }, { "epoch": 0.564649257984843, "grad_norm": 0.326418936252594, "learning_rate": 9.021112773686134e-06, "loss": 0.0102, "step": 66870 }, { "epoch": 0.5647336978320069, "grad_norm": 0.3842107653617859, "learning_rate": 9.020674780303072e-06, "loss": 0.0136, "step": 66880 }, { "epoch": 0.5648181376791708, "grad_norm": 0.547809898853302, "learning_rate": 9.02023669959178e-06, "loss": 0.0114, "step": 66890 }, { "epoch": 0.5649025775263347, "grad_norm": 0.10825365036725998, "learning_rate": 9.01979853156177e-06, "loss": 0.0133, "step": 66900 }, { "epoch": 0.5649870173734985, "grad_norm": 0.23252981901168823, "learning_rate": 9.019360276222558e-06, "loss": 0.0087, "step": 66910 }, { "epoch": 0.5650714572206624, "grad_norm": 0.6304786801338196, "learning_rate": 9.018921933583667e-06, "loss": 0.0179, "step": 66920 }, { "epoch": 0.5651558970678263, "grad_norm": 0.2888677716255188, "learning_rate": 9.018483503654612e-06, "loss": 0.0117, "step": 66930 }, { "epoch": 0.5652403369149902, "grad_norm": 0.40507447719573975, "learning_rate": 9.018044986444922e-06, "loss": 0.016, "step": 66940 }, { "epoch": 0.5653247767621541, "grad_norm": 0.1418730467557907, "learning_rate": 9.017606381964117e-06, "loss": 0.0152, "step": 66950 }, { "epoch": 0.565409216609318, "grad_norm": 0.6508456468582153, "learning_rate": 9.017167690221725e-06, "loss": 0.0114, "step": 66960 }, { "epoch": 0.5654936564564819, "grad_norm": 0.4638577997684479, "learning_rate": 9.016728911227273e-06, "loss": 0.0081, "step": 66970 }, { "epoch": 0.5655780963036456, "grad_norm": 0.6935641169548035, "learning_rate": 9.016290044990292e-06, "loss": 0.0156, "step": 66980 }, { "epoch": 0.5656625361508095, "grad_norm": 0.21593891084194183, "learning_rate": 9.015851091520315e-06, "loss": 0.0116, "step": 66990 }, { "epoch": 0.5657469759979734, "grad_norm": 1.1997385025024414, "learning_rate": 9.015412050826876e-06, "loss": 0.0188, "step": 67000 }, { "epoch": 0.5658314158451373, "grad_norm": 0.21638761460781097, "learning_rate": 9.014972922919508e-06, "loss": 0.0105, "step": 67010 }, { "epoch": 0.5659158556923012, "grad_norm": 0.22437234222888947, "learning_rate": 9.014533707807753e-06, "loss": 0.0059, "step": 67020 }, { "epoch": 0.5660002955394651, "grad_norm": 0.5178572535514832, "learning_rate": 9.014094405501148e-06, "loss": 0.0149, "step": 67030 }, { "epoch": 0.566084735386629, "grad_norm": 0.3977401852607727, "learning_rate": 9.013655016009233e-06, "loss": 0.0145, "step": 67040 }, { "epoch": 0.5661691752337928, "grad_norm": 0.517189621925354, "learning_rate": 9.013215539341556e-06, "loss": 0.009, "step": 67050 }, { "epoch": 0.5662536150809567, "grad_norm": 0.5305629968643188, "learning_rate": 9.01277597550766e-06, "loss": 0.019, "step": 67060 }, { "epoch": 0.5663380549281206, "grad_norm": 0.8301684856414795, "learning_rate": 9.012336324517091e-06, "loss": 0.02, "step": 67070 }, { "epoch": 0.5664224947752845, "grad_norm": 0.36429768800735474, "learning_rate": 9.011896586379398e-06, "loss": 0.0177, "step": 67080 }, { "epoch": 0.5665069346224484, "grad_norm": 0.5641628503799438, "learning_rate": 9.011456761104134e-06, "loss": 0.0134, "step": 67090 }, { "epoch": 0.5665913744696122, "grad_norm": 0.39964622259140015, "learning_rate": 9.011016848700853e-06, "loss": 0.026, "step": 67100 }, { "epoch": 0.566675814316776, "grad_norm": 0.4241262674331665, "learning_rate": 9.010576849179108e-06, "loss": 0.0095, "step": 67110 }, { "epoch": 0.5667602541639399, "grad_norm": 0.14601990580558777, "learning_rate": 9.010136762548453e-06, "loss": 0.0137, "step": 67120 }, { "epoch": 0.5668446940111038, "grad_norm": 0.5845010876655579, "learning_rate": 9.00969658881845e-06, "loss": 0.0156, "step": 67130 }, { "epoch": 0.5669291338582677, "grad_norm": 0.3199373185634613, "learning_rate": 9.009256327998662e-06, "loss": 0.0097, "step": 67140 }, { "epoch": 0.5670135737054316, "grad_norm": 0.4465177655220032, "learning_rate": 9.008815980098644e-06, "loss": 0.0134, "step": 67150 }, { "epoch": 0.5670980135525955, "grad_norm": 0.4368881285190582, "learning_rate": 9.008375545127965e-06, "loss": 0.0195, "step": 67160 }, { "epoch": 0.5671824533997594, "grad_norm": 0.22966456413269043, "learning_rate": 9.007935023096193e-06, "loss": 0.0098, "step": 67170 }, { "epoch": 0.5672668932469233, "grad_norm": 0.39172324538230896, "learning_rate": 9.007494414012891e-06, "loss": 0.0217, "step": 67180 }, { "epoch": 0.5673513330940871, "grad_norm": 0.30195263028144836, "learning_rate": 9.007053717887633e-06, "loss": 0.0154, "step": 67190 }, { "epoch": 0.567435772941251, "grad_norm": 0.18184325098991394, "learning_rate": 9.00661293472999e-06, "loss": 0.0123, "step": 67200 }, { "epoch": 0.5675202127884148, "grad_norm": 0.09728843718767166, "learning_rate": 9.006172064549534e-06, "loss": 0.0135, "step": 67210 }, { "epoch": 0.5676046526355787, "grad_norm": 0.4319981336593628, "learning_rate": 9.005731107355841e-06, "loss": 0.0138, "step": 67220 }, { "epoch": 0.5676890924827426, "grad_norm": 0.1850743591785431, "learning_rate": 9.005290063158491e-06, "loss": 0.0126, "step": 67230 }, { "epoch": 0.5677735323299065, "grad_norm": 0.28529128432273865, "learning_rate": 9.00484893196706e-06, "loss": 0.022, "step": 67240 }, { "epoch": 0.5678579721770703, "grad_norm": 0.3651488423347473, "learning_rate": 9.004407713791133e-06, "loss": 0.0156, "step": 67250 }, { "epoch": 0.5679424120242342, "grad_norm": 0.3588219881057739, "learning_rate": 9.003966408640289e-06, "loss": 0.0209, "step": 67260 }, { "epoch": 0.5680268518713981, "grad_norm": 0.2662939131259918, "learning_rate": 9.003525016524115e-06, "loss": 0.0132, "step": 67270 }, { "epoch": 0.568111291718562, "grad_norm": 0.28049135208129883, "learning_rate": 9.003083537452199e-06, "loss": 0.0171, "step": 67280 }, { "epoch": 0.5681957315657259, "grad_norm": 0.346170574426651, "learning_rate": 9.00264197143413e-06, "loss": 0.0132, "step": 67290 }, { "epoch": 0.5682801714128898, "grad_norm": 0.18172068893909454, "learning_rate": 9.002200318479494e-06, "loss": 0.0075, "step": 67300 }, { "epoch": 0.5683646112600537, "grad_norm": 0.16706398129463196, "learning_rate": 9.00175857859789e-06, "loss": 0.0132, "step": 67310 }, { "epoch": 0.5684490511072176, "grad_norm": 0.39711493253707886, "learning_rate": 9.00131675179891e-06, "loss": 0.0099, "step": 67320 }, { "epoch": 0.5685334909543813, "grad_norm": 0.3504403531551361, "learning_rate": 9.000874838092148e-06, "loss": 0.0083, "step": 67330 }, { "epoch": 0.5686179308015452, "grad_norm": 0.5126628875732422, "learning_rate": 9.000432837487206e-06, "loss": 0.0121, "step": 67340 }, { "epoch": 0.5687023706487091, "grad_norm": 0.3806069493293762, "learning_rate": 8.999990749993683e-06, "loss": 0.0145, "step": 67350 }, { "epoch": 0.568786810495873, "grad_norm": 0.4287334382534027, "learning_rate": 8.99954857562118e-06, "loss": 0.0083, "step": 67360 }, { "epoch": 0.5688712503430369, "grad_norm": 0.16678041219711304, "learning_rate": 8.9991063143793e-06, "loss": 0.013, "step": 67370 }, { "epoch": 0.5689556901902008, "grad_norm": 0.2947548031806946, "learning_rate": 8.998663966277652e-06, "loss": 0.0112, "step": 67380 }, { "epoch": 0.5690401300373646, "grad_norm": 0.5345296859741211, "learning_rate": 8.998221531325842e-06, "loss": 0.0086, "step": 67390 }, { "epoch": 0.5691245698845285, "grad_norm": 0.15751487016677856, "learning_rate": 8.99777900953348e-06, "loss": 0.01, "step": 67400 }, { "epoch": 0.5692090097316924, "grad_norm": 0.6114255785942078, "learning_rate": 8.997336400910177e-06, "loss": 0.0105, "step": 67410 }, { "epoch": 0.5692934495788563, "grad_norm": 0.3148003816604614, "learning_rate": 8.996893705465545e-06, "loss": 0.0098, "step": 67420 }, { "epoch": 0.5693778894260202, "grad_norm": 0.5820783376693726, "learning_rate": 8.996450923209201e-06, "loss": 0.0108, "step": 67430 }, { "epoch": 0.569462329273184, "grad_norm": 0.8273856043815613, "learning_rate": 8.996008054150764e-06, "loss": 0.0091, "step": 67440 }, { "epoch": 0.5695467691203479, "grad_norm": 0.7889817357063293, "learning_rate": 8.995565098299849e-06, "loss": 0.0076, "step": 67450 }, { "epoch": 0.5696312089675117, "grad_norm": 0.3480105698108673, "learning_rate": 8.995122055666078e-06, "loss": 0.0133, "step": 67460 }, { "epoch": 0.5697156488146756, "grad_norm": 0.3968314528465271, "learning_rate": 8.994678926259077e-06, "loss": 0.0077, "step": 67470 }, { "epoch": 0.5698000886618395, "grad_norm": 0.3436771035194397, "learning_rate": 8.994235710088469e-06, "loss": 0.0152, "step": 67480 }, { "epoch": 0.5698845285090034, "grad_norm": 0.42886117100715637, "learning_rate": 8.993792407163877e-06, "loss": 0.0138, "step": 67490 }, { "epoch": 0.5699689683561673, "grad_norm": 0.4252214729785919, "learning_rate": 8.993349017494933e-06, "loss": 0.0081, "step": 67500 }, { "epoch": 0.5700534082033312, "grad_norm": 0.437409907579422, "learning_rate": 8.992905541091267e-06, "loss": 0.0194, "step": 67510 }, { "epoch": 0.570137848050495, "grad_norm": 0.08013731986284256, "learning_rate": 8.992461977962512e-06, "loss": 0.0127, "step": 67520 }, { "epoch": 0.5702222878976589, "grad_norm": 0.3601813316345215, "learning_rate": 8.9920183281183e-06, "loss": 0.0125, "step": 67530 }, { "epoch": 0.5703067277448228, "grad_norm": 0.34093043208122253, "learning_rate": 8.991574591568267e-06, "loss": 0.0121, "step": 67540 }, { "epoch": 0.5703911675919866, "grad_norm": 0.04245582967996597, "learning_rate": 8.991130768322053e-06, "loss": 0.0115, "step": 67550 }, { "epoch": 0.5704756074391505, "grad_norm": 1.0018776655197144, "learning_rate": 8.990686858389298e-06, "loss": 0.0147, "step": 67560 }, { "epoch": 0.5705600472863144, "grad_norm": 0.2271893173456192, "learning_rate": 8.990242861779639e-06, "loss": 0.0211, "step": 67570 }, { "epoch": 0.5706444871334783, "grad_norm": 0.1053728386759758, "learning_rate": 8.989798778502725e-06, "loss": 0.0125, "step": 67580 }, { "epoch": 0.5707289269806421, "grad_norm": 0.4565689265727997, "learning_rate": 8.989354608568197e-06, "loss": 0.0107, "step": 67590 }, { "epoch": 0.570813366827806, "grad_norm": 0.14818529784679413, "learning_rate": 8.988910351985707e-06, "loss": 0.0073, "step": 67600 }, { "epoch": 0.5708978066749699, "grad_norm": 0.3547571301460266, "learning_rate": 8.9884660087649e-06, "loss": 0.0123, "step": 67610 }, { "epoch": 0.5709822465221338, "grad_norm": 0.49520960450172424, "learning_rate": 8.988021578915429e-06, "loss": 0.0169, "step": 67620 }, { "epoch": 0.5710666863692977, "grad_norm": 0.13938553631305695, "learning_rate": 8.987577062446946e-06, "loss": 0.014, "step": 67630 }, { "epoch": 0.5711511262164616, "grad_norm": 0.28586751222610474, "learning_rate": 8.987132459369108e-06, "loss": 0.0088, "step": 67640 }, { "epoch": 0.5712355660636255, "grad_norm": 0.37075281143188477, "learning_rate": 8.98668776969157e-06, "loss": 0.0126, "step": 67650 }, { "epoch": 0.5713200059107894, "grad_norm": 0.32475578784942627, "learning_rate": 8.986242993423988e-06, "loss": 0.0121, "step": 67660 }, { "epoch": 0.5714044457579531, "grad_norm": 0.10115736722946167, "learning_rate": 8.985798130576027e-06, "loss": 0.0117, "step": 67670 }, { "epoch": 0.571488885605117, "grad_norm": 0.14798009395599365, "learning_rate": 8.985353181157346e-06, "loss": 0.0145, "step": 67680 }, { "epoch": 0.5715733254522809, "grad_norm": 0.5862881541252136, "learning_rate": 8.984908145177611e-06, "loss": 0.0091, "step": 67690 }, { "epoch": 0.5716577652994448, "grad_norm": 0.2515488862991333, "learning_rate": 8.984463022646489e-06, "loss": 0.0119, "step": 67700 }, { "epoch": 0.5717422051466087, "grad_norm": 0.19412410259246826, "learning_rate": 8.984017813573644e-06, "loss": 0.01, "step": 67710 }, { "epoch": 0.5718266449937726, "grad_norm": 0.40758267045021057, "learning_rate": 8.98357251796875e-06, "loss": 0.0108, "step": 67720 }, { "epoch": 0.5719110848409364, "grad_norm": 0.4360542297363281, "learning_rate": 8.983127135841478e-06, "loss": 0.0109, "step": 67730 }, { "epoch": 0.5719955246881003, "grad_norm": 0.4753037393093109, "learning_rate": 8.9826816672015e-06, "loss": 0.0124, "step": 67740 }, { "epoch": 0.5720799645352642, "grad_norm": 0.38920673727989197, "learning_rate": 8.982236112058493e-06, "loss": 0.015, "step": 67750 }, { "epoch": 0.5721644043824281, "grad_norm": 0.4133022427558899, "learning_rate": 8.981790470422133e-06, "loss": 0.0094, "step": 67760 }, { "epoch": 0.572248844229592, "grad_norm": 0.5536079406738281, "learning_rate": 8.9813447423021e-06, "loss": 0.0171, "step": 67770 }, { "epoch": 0.5723332840767558, "grad_norm": 0.694441556930542, "learning_rate": 8.980898927708077e-06, "loss": 0.014, "step": 67780 }, { "epoch": 0.5724177239239197, "grad_norm": 0.23787426948547363, "learning_rate": 8.980453026649743e-06, "loss": 0.0112, "step": 67790 }, { "epoch": 0.5725021637710835, "grad_norm": 0.31467342376708984, "learning_rate": 8.980007039136784e-06, "loss": 0.0117, "step": 67800 }, { "epoch": 0.5725866036182474, "grad_norm": 0.4110095500946045, "learning_rate": 8.97956096517889e-06, "loss": 0.0123, "step": 67810 }, { "epoch": 0.5726710434654113, "grad_norm": 0.17500969767570496, "learning_rate": 8.979114804785748e-06, "loss": 0.011, "step": 67820 }, { "epoch": 0.5727554833125752, "grad_norm": 0.4241478145122528, "learning_rate": 8.978668557967045e-06, "loss": 0.0104, "step": 67830 }, { "epoch": 0.5728399231597391, "grad_norm": 0.19530563056468964, "learning_rate": 8.978222224732478e-06, "loss": 0.0149, "step": 67840 }, { "epoch": 0.572924363006903, "grad_norm": 0.376844197511673, "learning_rate": 8.977775805091737e-06, "loss": 0.015, "step": 67850 }, { "epoch": 0.5730088028540669, "grad_norm": 0.14636319875717163, "learning_rate": 8.977329299054522e-06, "loss": 0.0131, "step": 67860 }, { "epoch": 0.5730932427012307, "grad_norm": 0.3023606836795807, "learning_rate": 8.976882706630532e-06, "loss": 0.0127, "step": 67870 }, { "epoch": 0.5731776825483946, "grad_norm": 0.36972886323928833, "learning_rate": 8.976436027829462e-06, "loss": 0.0121, "step": 67880 }, { "epoch": 0.5732621223955585, "grad_norm": 0.36406683921813965, "learning_rate": 8.975989262661017e-06, "loss": 0.0126, "step": 67890 }, { "epoch": 0.5733465622427223, "grad_norm": 0.47949472069740295, "learning_rate": 8.9755424111349e-06, "loss": 0.0126, "step": 67900 }, { "epoch": 0.5734310020898862, "grad_norm": 0.18316040933132172, "learning_rate": 8.975095473260816e-06, "loss": 0.0122, "step": 67910 }, { "epoch": 0.5735154419370501, "grad_norm": 0.40592673420906067, "learning_rate": 8.974648449048473e-06, "loss": 0.0165, "step": 67920 }, { "epoch": 0.573599881784214, "grad_norm": 0.26317355036735535, "learning_rate": 8.974201338507582e-06, "loss": 0.0124, "step": 67930 }, { "epoch": 0.5736843216313778, "grad_norm": 0.3567676842212677, "learning_rate": 8.97375414164785e-06, "loss": 0.0147, "step": 67940 }, { "epoch": 0.5737687614785417, "grad_norm": 0.09229348599910736, "learning_rate": 8.973306858478994e-06, "loss": 0.0099, "step": 67950 }, { "epoch": 0.5738532013257056, "grad_norm": 0.20442375540733337, "learning_rate": 8.972859489010726e-06, "loss": 0.0201, "step": 67960 }, { "epoch": 0.5739376411728695, "grad_norm": 0.18040980398654938, "learning_rate": 8.972412033252764e-06, "loss": 0.0105, "step": 67970 }, { "epoch": 0.5740220810200334, "grad_norm": 0.08264195173978806, "learning_rate": 8.971964491214828e-06, "loss": 0.0106, "step": 67980 }, { "epoch": 0.5741065208671973, "grad_norm": 0.20782022178173065, "learning_rate": 8.971516862906635e-06, "loss": 0.0174, "step": 67990 }, { "epoch": 0.5741909607143612, "grad_norm": 0.601421058177948, "learning_rate": 8.971069148337912e-06, "loss": 0.0117, "step": 68000 }, { "epoch": 0.5742754005615249, "grad_norm": 0.8579553961753845, "learning_rate": 8.97062134751838e-06, "loss": 0.0075, "step": 68010 }, { "epoch": 0.5743598404086888, "grad_norm": 0.32357946038246155, "learning_rate": 8.970173460457765e-06, "loss": 0.013, "step": 68020 }, { "epoch": 0.5744442802558527, "grad_norm": 0.49047014117240906, "learning_rate": 8.969725487165797e-06, "loss": 0.0174, "step": 68030 }, { "epoch": 0.5745287201030166, "grad_norm": 0.3454868793487549, "learning_rate": 8.969277427652205e-06, "loss": 0.0108, "step": 68040 }, { "epoch": 0.5746131599501805, "grad_norm": 0.2334107756614685, "learning_rate": 8.96882928192672e-06, "loss": 0.0191, "step": 68050 }, { "epoch": 0.5746975997973444, "grad_norm": 0.587937593460083, "learning_rate": 8.968381049999076e-06, "loss": 0.0108, "step": 68060 }, { "epoch": 0.5747820396445082, "grad_norm": 0.308710515499115, "learning_rate": 8.96793273187901e-06, "loss": 0.0096, "step": 68070 }, { "epoch": 0.5748664794916721, "grad_norm": 0.18056872487068176, "learning_rate": 8.967484327576257e-06, "loss": 0.0125, "step": 68080 }, { "epoch": 0.574950919338836, "grad_norm": 0.2304760217666626, "learning_rate": 8.967035837100559e-06, "loss": 0.0139, "step": 68090 }, { "epoch": 0.5750353591859999, "grad_norm": 0.6523309350013733, "learning_rate": 8.966587260461653e-06, "loss": 0.0234, "step": 68100 }, { "epoch": 0.5751197990331638, "grad_norm": 0.5030391216278076, "learning_rate": 8.966138597669288e-06, "loss": 0.0101, "step": 68110 }, { "epoch": 0.5752042388803277, "grad_norm": 0.35202857851982117, "learning_rate": 8.965689848733203e-06, "loss": 0.0186, "step": 68120 }, { "epoch": 0.5752886787274915, "grad_norm": 0.45240518450737, "learning_rate": 8.965241013663147e-06, "loss": 0.0094, "step": 68130 }, { "epoch": 0.5753731185746553, "grad_norm": 0.14605224132537842, "learning_rate": 8.96479209246887e-06, "loss": 0.0105, "step": 68140 }, { "epoch": 0.5754575584218192, "grad_norm": 0.0038733428809791803, "learning_rate": 8.964343085160119e-06, "loss": 0.0138, "step": 68150 }, { "epoch": 0.5755419982689831, "grad_norm": 0.40266549587249756, "learning_rate": 8.96389399174665e-06, "loss": 0.023, "step": 68160 }, { "epoch": 0.575626438116147, "grad_norm": 0.4292200803756714, "learning_rate": 8.963444812238216e-06, "loss": 0.014, "step": 68170 }, { "epoch": 0.5757108779633109, "grad_norm": 0.474984347820282, "learning_rate": 8.962995546644573e-06, "loss": 0.0166, "step": 68180 }, { "epoch": 0.5757953178104748, "grad_norm": 0.3575166165828705, "learning_rate": 8.962546194975479e-06, "loss": 0.0163, "step": 68190 }, { "epoch": 0.5758797576576387, "grad_norm": 0.2861292362213135, "learning_rate": 8.962096757240693e-06, "loss": 0.0184, "step": 68200 }, { "epoch": 0.5759641975048025, "grad_norm": 0.5741934776306152, "learning_rate": 8.961647233449977e-06, "loss": 0.0122, "step": 68210 }, { "epoch": 0.5760486373519664, "grad_norm": 0.19351759552955627, "learning_rate": 8.961197623613096e-06, "loss": 0.0209, "step": 68220 }, { "epoch": 0.5761330771991303, "grad_norm": 0.40422913432121277, "learning_rate": 8.960747927739814e-06, "loss": 0.0096, "step": 68230 }, { "epoch": 0.5762175170462941, "grad_norm": 0.21865640580654144, "learning_rate": 8.9602981458399e-06, "loss": 0.008, "step": 68240 }, { "epoch": 0.576301956893458, "grad_norm": 0.38097402453422546, "learning_rate": 8.959848277923118e-06, "loss": 0.0122, "step": 68250 }, { "epoch": 0.5763863967406219, "grad_norm": 0.30890461802482605, "learning_rate": 8.959398323999246e-06, "loss": 0.0107, "step": 68260 }, { "epoch": 0.5764708365877858, "grad_norm": 0.1271958351135254, "learning_rate": 8.958948284078053e-06, "loss": 0.015, "step": 68270 }, { "epoch": 0.5765552764349496, "grad_norm": 0.11123800277709961, "learning_rate": 8.958498158169314e-06, "loss": 0.0115, "step": 68280 }, { "epoch": 0.5766397162821135, "grad_norm": 0.10268524289131165, "learning_rate": 8.958047946282806e-06, "loss": 0.0167, "step": 68290 }, { "epoch": 0.5767241561292774, "grad_norm": 0.23841431736946106, "learning_rate": 8.95759764842831e-06, "loss": 0.0081, "step": 68300 }, { "epoch": 0.5768085959764413, "grad_norm": 0.44164717197418213, "learning_rate": 8.9571472646156e-06, "loss": 0.0057, "step": 68310 }, { "epoch": 0.5768930358236052, "grad_norm": 0.30218306183815, "learning_rate": 8.956696794854465e-06, "loss": 0.0086, "step": 68320 }, { "epoch": 0.5769774756707691, "grad_norm": 0.7170488834381104, "learning_rate": 8.956246239154687e-06, "loss": 0.0202, "step": 68330 }, { "epoch": 0.577061915517933, "grad_norm": 0.45987892150878906, "learning_rate": 8.95579559752605e-06, "loss": 0.0135, "step": 68340 }, { "epoch": 0.5771463553650968, "grad_norm": 0.5389148592948914, "learning_rate": 8.955344869978344e-06, "loss": 0.0167, "step": 68350 }, { "epoch": 0.5772307952122606, "grad_norm": 0.1672113984823227, "learning_rate": 8.954894056521356e-06, "loss": 0.0109, "step": 68360 }, { "epoch": 0.5773152350594245, "grad_norm": 0.2643323242664337, "learning_rate": 8.954443157164883e-06, "loss": 0.0136, "step": 68370 }, { "epoch": 0.5773996749065884, "grad_norm": 0.7297821640968323, "learning_rate": 8.953992171918712e-06, "loss": 0.0106, "step": 68380 }, { "epoch": 0.5774841147537523, "grad_norm": 0.3863312005996704, "learning_rate": 8.953541100792642e-06, "loss": 0.0142, "step": 68390 }, { "epoch": 0.5775685546009162, "grad_norm": 0.6060550212860107, "learning_rate": 8.95308994379647e-06, "loss": 0.0177, "step": 68400 }, { "epoch": 0.57765299444808, "grad_norm": 0.08819695562124252, "learning_rate": 8.952638700939995e-06, "loss": 0.01, "step": 68410 }, { "epoch": 0.5777374342952439, "grad_norm": 0.12246309220790863, "learning_rate": 8.952187372233016e-06, "loss": 0.011, "step": 68420 }, { "epoch": 0.5778218741424078, "grad_norm": 0.19825685024261475, "learning_rate": 8.951735957685339e-06, "loss": 0.0133, "step": 68430 }, { "epoch": 0.5779063139895717, "grad_norm": 0.23844070732593536, "learning_rate": 8.951284457306766e-06, "loss": 0.0135, "step": 68440 }, { "epoch": 0.5779907538367356, "grad_norm": 0.5236238837242126, "learning_rate": 8.950832871107102e-06, "loss": 0.0215, "step": 68450 }, { "epoch": 0.5780751936838995, "grad_norm": 0.4554641544818878, "learning_rate": 8.950381199096161e-06, "loss": 0.0126, "step": 68460 }, { "epoch": 0.5781596335310633, "grad_norm": 0.18457862734794617, "learning_rate": 8.949929441283748e-06, "loss": 0.0112, "step": 68470 }, { "epoch": 0.5782440733782271, "grad_norm": 0.11932563036680222, "learning_rate": 8.949477597679678e-06, "loss": 0.013, "step": 68480 }, { "epoch": 0.578328513225391, "grad_norm": 0.8790026903152466, "learning_rate": 8.949025668293763e-06, "loss": 0.0221, "step": 68490 }, { "epoch": 0.5784129530725549, "grad_norm": 0.8433776497840881, "learning_rate": 8.94857365313582e-06, "loss": 0.0203, "step": 68500 }, { "epoch": 0.5784973929197188, "grad_norm": 0.308607280254364, "learning_rate": 8.948121552215667e-06, "loss": 0.0119, "step": 68510 }, { "epoch": 0.5785818327668827, "grad_norm": 0.3182603418827057, "learning_rate": 8.94766936554312e-06, "loss": 0.0142, "step": 68520 }, { "epoch": 0.5786662726140466, "grad_norm": 0.7293050289154053, "learning_rate": 8.947217093128006e-06, "loss": 0.0159, "step": 68530 }, { "epoch": 0.5787507124612105, "grad_norm": 0.5721470713615417, "learning_rate": 8.946764734980147e-06, "loss": 0.0121, "step": 68540 }, { "epoch": 0.5788351523083743, "grad_norm": 0.17578601837158203, "learning_rate": 8.946312291109365e-06, "loss": 0.009, "step": 68550 }, { "epoch": 0.5789195921555382, "grad_norm": 0.25267598032951355, "learning_rate": 8.945859761525488e-06, "loss": 0.0079, "step": 68560 }, { "epoch": 0.5790040320027021, "grad_norm": 0.5405188798904419, "learning_rate": 8.945407146238348e-06, "loss": 0.0157, "step": 68570 }, { "epoch": 0.579088471849866, "grad_norm": 0.8363542556762695, "learning_rate": 8.94495444525777e-06, "loss": 0.0174, "step": 68580 }, { "epoch": 0.5791729116970298, "grad_norm": 0.39066261053085327, "learning_rate": 8.944501658593592e-06, "loss": 0.0199, "step": 68590 }, { "epoch": 0.5792573515441937, "grad_norm": 0.31976139545440674, "learning_rate": 8.944048786255646e-06, "loss": 0.0113, "step": 68600 }, { "epoch": 0.5793417913913576, "grad_norm": 0.6071191430091858, "learning_rate": 8.943595828253768e-06, "loss": 0.0125, "step": 68610 }, { "epoch": 0.5794262312385214, "grad_norm": 0.58148193359375, "learning_rate": 8.943142784597797e-06, "loss": 0.0168, "step": 68620 }, { "epoch": 0.5795106710856853, "grad_norm": 0.5742999911308289, "learning_rate": 8.942689655297571e-06, "loss": 0.0246, "step": 68630 }, { "epoch": 0.5795951109328492, "grad_norm": 0.23374661803245544, "learning_rate": 8.942236440362936e-06, "loss": 0.0118, "step": 68640 }, { "epoch": 0.5796795507800131, "grad_norm": 0.2979874312877655, "learning_rate": 8.941783139803733e-06, "loss": 0.0115, "step": 68650 }, { "epoch": 0.579763990627177, "grad_norm": 0.11363018304109573, "learning_rate": 8.941329753629809e-06, "loss": 0.0134, "step": 68660 }, { "epoch": 0.5798484304743409, "grad_norm": 0.7334505915641785, "learning_rate": 8.940876281851008e-06, "loss": 0.0115, "step": 68670 }, { "epoch": 0.5799328703215048, "grad_norm": 0.28502389788627625, "learning_rate": 8.940422724477182e-06, "loss": 0.0064, "step": 68680 }, { "epoch": 0.5800173101686686, "grad_norm": 0.15111985802650452, "learning_rate": 8.939969081518184e-06, "loss": 0.0084, "step": 68690 }, { "epoch": 0.5801017500158324, "grad_norm": 0.08349450677633286, "learning_rate": 8.939515352983863e-06, "loss": 0.0149, "step": 68700 }, { "epoch": 0.5801861898629963, "grad_norm": 0.4961977005004883, "learning_rate": 8.939061538884078e-06, "loss": 0.0166, "step": 68710 }, { "epoch": 0.5802706297101602, "grad_norm": 0.6637746691703796, "learning_rate": 8.93860763922868e-06, "loss": 0.0091, "step": 68720 }, { "epoch": 0.5803550695573241, "grad_norm": 0.17678172886371613, "learning_rate": 8.938153654027534e-06, "loss": 0.0141, "step": 68730 }, { "epoch": 0.580439509404488, "grad_norm": 0.19768303632736206, "learning_rate": 8.937699583290497e-06, "loss": 0.0096, "step": 68740 }, { "epoch": 0.5805239492516518, "grad_norm": 0.6142870187759399, "learning_rate": 8.937245427027431e-06, "loss": 0.0164, "step": 68750 }, { "epoch": 0.5806083890988157, "grad_norm": 0.3514058589935303, "learning_rate": 8.936791185248202e-06, "loss": 0.0166, "step": 68760 }, { "epoch": 0.5806928289459796, "grad_norm": 0.34911635518074036, "learning_rate": 8.936336857962675e-06, "loss": 0.0078, "step": 68770 }, { "epoch": 0.5807772687931435, "grad_norm": 0.4077041447162628, "learning_rate": 8.93588244518072e-06, "loss": 0.0132, "step": 68780 }, { "epoch": 0.5808617086403074, "grad_norm": 0.11243734508752823, "learning_rate": 8.935427946912202e-06, "loss": 0.0098, "step": 68790 }, { "epoch": 0.5809461484874713, "grad_norm": 0.35544607043266296, "learning_rate": 8.934973363166997e-06, "loss": 0.0087, "step": 68800 }, { "epoch": 0.5810305883346352, "grad_norm": 0.8457003831863403, "learning_rate": 8.934518693954976e-06, "loss": 0.0097, "step": 68810 }, { "epoch": 0.5811150281817989, "grad_norm": 0.3809278905391693, "learning_rate": 8.934063939286015e-06, "loss": 0.0116, "step": 68820 }, { "epoch": 0.5811994680289628, "grad_norm": 0.28609228134155273, "learning_rate": 8.933609099169992e-06, "loss": 0.0207, "step": 68830 }, { "epoch": 0.5812839078761267, "grad_norm": 0.1511373370885849, "learning_rate": 8.933154173616786e-06, "loss": 0.0121, "step": 68840 }, { "epoch": 0.5813683477232906, "grad_norm": 1.0643290281295776, "learning_rate": 8.932699162636276e-06, "loss": 0.027, "step": 68850 }, { "epoch": 0.5814527875704545, "grad_norm": 0.08055354654788971, "learning_rate": 8.932244066238347e-06, "loss": 0.0144, "step": 68860 }, { "epoch": 0.5815372274176184, "grad_norm": 0.13107836246490479, "learning_rate": 8.931788884432883e-06, "loss": 0.007, "step": 68870 }, { "epoch": 0.5816216672647823, "grad_norm": 1.2162368297576904, "learning_rate": 8.931333617229769e-06, "loss": 0.0177, "step": 68880 }, { "epoch": 0.5817061071119461, "grad_norm": 0.61400306224823, "learning_rate": 8.930878264638896e-06, "loss": 0.01, "step": 68890 }, { "epoch": 0.58179054695911, "grad_norm": 0.598949134349823, "learning_rate": 8.93042282667015e-06, "loss": 0.0183, "step": 68900 }, { "epoch": 0.5818749868062739, "grad_norm": 0.34468182921409607, "learning_rate": 8.929967303333429e-06, "loss": 0.0082, "step": 68910 }, { "epoch": 0.5819594266534378, "grad_norm": 0.4228936731815338, "learning_rate": 8.929511694638621e-06, "loss": 0.015, "step": 68920 }, { "epoch": 0.5820438665006016, "grad_norm": 0.5708571076393127, "learning_rate": 8.929056000595625e-06, "loss": 0.0133, "step": 68930 }, { "epoch": 0.5821283063477655, "grad_norm": 0.281272292137146, "learning_rate": 8.92860022121434e-06, "loss": 0.0118, "step": 68940 }, { "epoch": 0.5822127461949294, "grad_norm": 0.29537999629974365, "learning_rate": 8.92814435650466e-06, "loss": 0.0096, "step": 68950 }, { "epoch": 0.5822971860420932, "grad_norm": 0.35128313302993774, "learning_rate": 8.92768840647649e-06, "loss": 0.0091, "step": 68960 }, { "epoch": 0.5823816258892571, "grad_norm": 0.34873488545417786, "learning_rate": 8.927232371139733e-06, "loss": 0.0119, "step": 68970 }, { "epoch": 0.582466065736421, "grad_norm": 0.6934722661972046, "learning_rate": 8.926776250504294e-06, "loss": 0.0111, "step": 68980 }, { "epoch": 0.5825505055835849, "grad_norm": 0.5695963501930237, "learning_rate": 8.926320044580079e-06, "loss": 0.0142, "step": 68990 }, { "epoch": 0.5826349454307488, "grad_norm": 0.4051755964756012, "learning_rate": 8.925863753376998e-06, "loss": 0.015, "step": 69000 }, { "epoch": 0.5827193852779127, "grad_norm": 0.36156243085861206, "learning_rate": 8.92540737690496e-06, "loss": 0.0214, "step": 69010 }, { "epoch": 0.5828038251250766, "grad_norm": 0.2793925106525421, "learning_rate": 8.924950915173878e-06, "loss": 0.011, "step": 69020 }, { "epoch": 0.5828882649722404, "grad_norm": 0.05694412440061569, "learning_rate": 8.924494368193668e-06, "loss": 0.013, "step": 69030 }, { "epoch": 0.5829727048194043, "grad_norm": 0.22947970032691956, "learning_rate": 8.924037735974244e-06, "loss": 0.0112, "step": 69040 }, { "epoch": 0.5830571446665681, "grad_norm": 0.32601433992385864, "learning_rate": 8.923581018525524e-06, "loss": 0.0136, "step": 69050 }, { "epoch": 0.583141584513732, "grad_norm": 0.6178840398788452, "learning_rate": 8.923124215857429e-06, "loss": 0.0139, "step": 69060 }, { "epoch": 0.5832260243608959, "grad_norm": 0.6860058903694153, "learning_rate": 8.92266732797988e-06, "loss": 0.0088, "step": 69070 }, { "epoch": 0.5833104642080598, "grad_norm": 0.06353504210710526, "learning_rate": 8.922210354902798e-06, "loss": 0.0142, "step": 69080 }, { "epoch": 0.5833949040552237, "grad_norm": 0.18365657329559326, "learning_rate": 8.921753296636113e-06, "loss": 0.0093, "step": 69090 }, { "epoch": 0.5834793439023875, "grad_norm": 0.4142357409000397, "learning_rate": 8.921296153189748e-06, "loss": 0.007, "step": 69100 }, { "epoch": 0.5835637837495514, "grad_norm": 0.18468278646469116, "learning_rate": 8.920838924573636e-06, "loss": 0.0175, "step": 69110 }, { "epoch": 0.5836482235967153, "grad_norm": 0.05638458579778671, "learning_rate": 8.920381610797706e-06, "loss": 0.012, "step": 69120 }, { "epoch": 0.5837326634438792, "grad_norm": 0.4633188545703888, "learning_rate": 8.919924211871891e-06, "loss": 0.0178, "step": 69130 }, { "epoch": 0.5838171032910431, "grad_norm": 0.36328473687171936, "learning_rate": 8.919466727806125e-06, "loss": 0.0094, "step": 69140 }, { "epoch": 0.583901543138207, "grad_norm": 0.11991588771343231, "learning_rate": 8.919009158610345e-06, "loss": 0.0085, "step": 69150 }, { "epoch": 0.5839859829853707, "grad_norm": 0.7190538644790649, "learning_rate": 8.918551504294488e-06, "loss": 0.0145, "step": 69160 }, { "epoch": 0.5840704228325346, "grad_norm": 0.39930468797683716, "learning_rate": 8.918093764868496e-06, "loss": 0.0137, "step": 69170 }, { "epoch": 0.5841548626796985, "grad_norm": 0.25291773676872253, "learning_rate": 8.91763594034231e-06, "loss": 0.0104, "step": 69180 }, { "epoch": 0.5842393025268624, "grad_norm": 0.5792745351791382, "learning_rate": 8.917178030725878e-06, "loss": 0.0115, "step": 69190 }, { "epoch": 0.5843237423740263, "grad_norm": 0.19233554601669312, "learning_rate": 8.916720036029136e-06, "loss": 0.0106, "step": 69200 }, { "epoch": 0.5844081822211902, "grad_norm": 0.4645201861858368, "learning_rate": 8.91626195626204e-06, "loss": 0.0093, "step": 69210 }, { "epoch": 0.5844926220683541, "grad_norm": 0.46989306807518005, "learning_rate": 8.915803791434538e-06, "loss": 0.0202, "step": 69220 }, { "epoch": 0.584577061915518, "grad_norm": 0.40945637226104736, "learning_rate": 8.915345541556579e-06, "loss": 0.0144, "step": 69230 }, { "epoch": 0.5846615017626818, "grad_norm": 0.3400558531284332, "learning_rate": 8.914887206638117e-06, "loss": 0.0098, "step": 69240 }, { "epoch": 0.5847459416098457, "grad_norm": 0.23747055232524872, "learning_rate": 8.914428786689106e-06, "loss": 0.0112, "step": 69250 }, { "epoch": 0.5848303814570096, "grad_norm": 0.6104056239128113, "learning_rate": 8.913970281719507e-06, "loss": 0.0072, "step": 69260 }, { "epoch": 0.5849148213041735, "grad_norm": 0.28859224915504456, "learning_rate": 8.913511691739272e-06, "loss": 0.0112, "step": 69270 }, { "epoch": 0.5849992611513373, "grad_norm": 0.3207074701786041, "learning_rate": 8.913053016758368e-06, "loss": 0.0159, "step": 69280 }, { "epoch": 0.5850837009985012, "grad_norm": 0.4251619875431061, "learning_rate": 8.912594256786753e-06, "loss": 0.0113, "step": 69290 }, { "epoch": 0.585168140845665, "grad_norm": 0.10844141989946365, "learning_rate": 8.912135411834391e-06, "loss": 0.0127, "step": 69300 }, { "epoch": 0.5852525806928289, "grad_norm": 0.39700597524642944, "learning_rate": 8.911676481911251e-06, "loss": 0.0076, "step": 69310 }, { "epoch": 0.5853370205399928, "grad_norm": 0.15847867727279663, "learning_rate": 8.9112174670273e-06, "loss": 0.0152, "step": 69320 }, { "epoch": 0.5854214603871567, "grad_norm": 0.6685972213745117, "learning_rate": 8.910758367192507e-06, "loss": 0.014, "step": 69330 }, { "epoch": 0.5855059002343206, "grad_norm": 0.38701295852661133, "learning_rate": 8.910299182416843e-06, "loss": 0.0131, "step": 69340 }, { "epoch": 0.5855903400814845, "grad_norm": 0.20038330554962158, "learning_rate": 8.909839912710282e-06, "loss": 0.0074, "step": 69350 }, { "epoch": 0.5856747799286484, "grad_norm": 0.2978380024433136, "learning_rate": 8.909380558082799e-06, "loss": 0.0137, "step": 69360 }, { "epoch": 0.5857592197758122, "grad_norm": 0.1137075200676918, "learning_rate": 8.908921118544373e-06, "loss": 0.014, "step": 69370 }, { "epoch": 0.5858436596229761, "grad_norm": 0.20110757648944855, "learning_rate": 8.90846159410498e-06, "loss": 0.0094, "step": 69380 }, { "epoch": 0.5859280994701399, "grad_norm": 0.36375299096107483, "learning_rate": 8.908001984774602e-06, "loss": 0.0203, "step": 69390 }, { "epoch": 0.5860125393173038, "grad_norm": 0.652651846408844, "learning_rate": 8.907542290563223e-06, "loss": 0.0076, "step": 69400 }, { "epoch": 0.5860969791644677, "grad_norm": 0.16702674329280853, "learning_rate": 8.907082511480824e-06, "loss": 0.0119, "step": 69410 }, { "epoch": 0.5861814190116316, "grad_norm": 0.13768936693668365, "learning_rate": 8.906622647537395e-06, "loss": 0.0108, "step": 69420 }, { "epoch": 0.5862658588587955, "grad_norm": 0.45743444561958313, "learning_rate": 8.906162698742922e-06, "loss": 0.0154, "step": 69430 }, { "epoch": 0.5863502987059593, "grad_norm": 0.09898483008146286, "learning_rate": 8.905702665107396e-06, "loss": 0.0146, "step": 69440 }, { "epoch": 0.5864347385531232, "grad_norm": 0.1426260769367218, "learning_rate": 8.905242546640809e-06, "loss": 0.0128, "step": 69450 }, { "epoch": 0.5865191784002871, "grad_norm": 0.1634397953748703, "learning_rate": 8.904782343353152e-06, "loss": 0.0131, "step": 69460 }, { "epoch": 0.586603618247451, "grad_norm": 0.7564654350280762, "learning_rate": 8.904322055254425e-06, "loss": 0.0179, "step": 69470 }, { "epoch": 0.5866880580946149, "grad_norm": 0.24617309868335724, "learning_rate": 8.903861682354622e-06, "loss": 0.0077, "step": 69480 }, { "epoch": 0.5867724979417788, "grad_norm": 0.44149500131607056, "learning_rate": 8.903401224663744e-06, "loss": 0.0106, "step": 69490 }, { "epoch": 0.5868569377889427, "grad_norm": 0.46187159419059753, "learning_rate": 8.902940682191791e-06, "loss": 0.0082, "step": 69500 }, { "epoch": 0.5869413776361064, "grad_norm": 0.42825114727020264, "learning_rate": 8.902480054948766e-06, "loss": 0.0086, "step": 69510 }, { "epoch": 0.5870258174832703, "grad_norm": 0.25646886229515076, "learning_rate": 8.902019342944674e-06, "loss": 0.0134, "step": 69520 }, { "epoch": 0.5871102573304342, "grad_norm": 0.13679519295692444, "learning_rate": 8.901558546189521e-06, "loss": 0.0171, "step": 69530 }, { "epoch": 0.5871946971775981, "grad_norm": 0.28740739822387695, "learning_rate": 8.901097664693317e-06, "loss": 0.0114, "step": 69540 }, { "epoch": 0.587279137024762, "grad_norm": 0.1253427267074585, "learning_rate": 8.90063669846607e-06, "loss": 0.0154, "step": 69550 }, { "epoch": 0.5873635768719259, "grad_norm": 0.020239917561411858, "learning_rate": 8.900175647517792e-06, "loss": 0.0063, "step": 69560 }, { "epoch": 0.5874480167190897, "grad_norm": 0.4098885953426361, "learning_rate": 8.899714511858502e-06, "loss": 0.0124, "step": 69570 }, { "epoch": 0.5875324565662536, "grad_norm": 0.07836548984050751, "learning_rate": 8.89925329149821e-06, "loss": 0.0072, "step": 69580 }, { "epoch": 0.5876168964134175, "grad_norm": 0.4411822259426117, "learning_rate": 8.898791986446932e-06, "loss": 0.0156, "step": 69590 }, { "epoch": 0.5877013362605814, "grad_norm": 0.5563254356384277, "learning_rate": 8.898330596714696e-06, "loss": 0.0136, "step": 69600 }, { "epoch": 0.5877857761077453, "grad_norm": 0.5478739738464355, "learning_rate": 8.897869122311517e-06, "loss": 0.0111, "step": 69610 }, { "epoch": 0.5878702159549091, "grad_norm": 0.668694794178009, "learning_rate": 8.897407563247418e-06, "loss": 0.0158, "step": 69620 }, { "epoch": 0.587954655802073, "grad_norm": 0.27750012278556824, "learning_rate": 8.896945919532428e-06, "loss": 0.0286, "step": 69630 }, { "epoch": 0.5880390956492368, "grad_norm": 0.3063112199306488, "learning_rate": 8.89648419117657e-06, "loss": 0.0097, "step": 69640 }, { "epoch": 0.5881235354964007, "grad_norm": 0.5563737750053406, "learning_rate": 8.896022378189871e-06, "loss": 0.0184, "step": 69650 }, { "epoch": 0.5882079753435646, "grad_norm": 0.7330586910247803, "learning_rate": 8.895560480582368e-06, "loss": 0.014, "step": 69660 }, { "epoch": 0.5882924151907285, "grad_norm": 0.3373667597770691, "learning_rate": 8.895098498364088e-06, "loss": 0.0137, "step": 69670 }, { "epoch": 0.5883768550378924, "grad_norm": 0.6196162104606628, "learning_rate": 8.894636431545067e-06, "loss": 0.0141, "step": 69680 }, { "epoch": 0.5884612948850563, "grad_norm": 0.8487862348556519, "learning_rate": 8.89417428013534e-06, "loss": 0.0224, "step": 69690 }, { "epoch": 0.5885457347322202, "grad_norm": 0.32352519035339355, "learning_rate": 8.893712044144947e-06, "loss": 0.0087, "step": 69700 }, { "epoch": 0.588630174579384, "grad_norm": 0.3172866106033325, "learning_rate": 8.893249723583925e-06, "loss": 0.0103, "step": 69710 }, { "epoch": 0.5887146144265479, "grad_norm": 0.6399529576301575, "learning_rate": 8.892787318462318e-06, "loss": 0.0177, "step": 69720 }, { "epoch": 0.5887990542737118, "grad_norm": 0.6396499872207642, "learning_rate": 8.892324828790168e-06, "loss": 0.0131, "step": 69730 }, { "epoch": 0.5888834941208756, "grad_norm": 0.28977170586586, "learning_rate": 8.89186225457752e-06, "loss": 0.0094, "step": 69740 }, { "epoch": 0.5889679339680395, "grad_norm": 0.35406044125556946, "learning_rate": 8.891399595834422e-06, "loss": 0.0068, "step": 69750 }, { "epoch": 0.5890523738152034, "grad_norm": 0.6849130392074585, "learning_rate": 8.890936852570922e-06, "loss": 0.0183, "step": 69760 }, { "epoch": 0.5891368136623673, "grad_norm": 0.5660622715950012, "learning_rate": 8.890474024797069e-06, "loss": 0.0153, "step": 69770 }, { "epoch": 0.5892212535095311, "grad_norm": 0.39057016372680664, "learning_rate": 8.89001111252292e-06, "loss": 0.0158, "step": 69780 }, { "epoch": 0.589305693356695, "grad_norm": 0.22032766044139862, "learning_rate": 8.889548115758524e-06, "loss": 0.0132, "step": 69790 }, { "epoch": 0.5893901332038589, "grad_norm": 0.2882458567619324, "learning_rate": 8.889085034513943e-06, "loss": 0.0094, "step": 69800 }, { "epoch": 0.5894745730510228, "grad_norm": 0.09786548465490341, "learning_rate": 8.88862186879923e-06, "loss": 0.0076, "step": 69810 }, { "epoch": 0.5895590128981867, "grad_norm": 0.35665252804756165, "learning_rate": 8.888158618624447e-06, "loss": 0.0156, "step": 69820 }, { "epoch": 0.5896434527453506, "grad_norm": 0.2526116371154785, "learning_rate": 8.887695283999657e-06, "loss": 0.0121, "step": 69830 }, { "epoch": 0.5897278925925145, "grad_norm": 0.6754565238952637, "learning_rate": 8.887231864934923e-06, "loss": 0.0167, "step": 69840 }, { "epoch": 0.5898123324396782, "grad_norm": 2.2482314109802246, "learning_rate": 8.886768361440306e-06, "loss": 0.0233, "step": 69850 }, { "epoch": 0.5898967722868421, "grad_norm": 0.5777921080589294, "learning_rate": 8.886304773525877e-06, "loss": 0.0156, "step": 69860 }, { "epoch": 0.589981212134006, "grad_norm": 0.3042369782924652, "learning_rate": 8.885841101201707e-06, "loss": 0.0159, "step": 69870 }, { "epoch": 0.5900656519811699, "grad_norm": 0.11793076246976852, "learning_rate": 8.885377344477864e-06, "loss": 0.0087, "step": 69880 }, { "epoch": 0.5901500918283338, "grad_norm": 0.40319064259529114, "learning_rate": 8.88491350336442e-06, "loss": 0.0134, "step": 69890 }, { "epoch": 0.5902345316754977, "grad_norm": 0.38022711873054504, "learning_rate": 8.884449577871452e-06, "loss": 0.0117, "step": 69900 }, { "epoch": 0.5903189715226616, "grad_norm": 0.39882829785346985, "learning_rate": 8.883985568009035e-06, "loss": 0.0108, "step": 69910 }, { "epoch": 0.5904034113698254, "grad_norm": 0.28879523277282715, "learning_rate": 8.883521473787247e-06, "loss": 0.0209, "step": 69920 }, { "epoch": 0.5904878512169893, "grad_norm": 0.011019829660654068, "learning_rate": 8.883057295216168e-06, "loss": 0.0121, "step": 69930 }, { "epoch": 0.5905722910641532, "grad_norm": 0.6197896003723145, "learning_rate": 8.882593032305882e-06, "loss": 0.0197, "step": 69940 }, { "epoch": 0.5906567309113171, "grad_norm": 0.31996721029281616, "learning_rate": 8.882128685066469e-06, "loss": 0.0094, "step": 69950 }, { "epoch": 0.590741170758481, "grad_norm": 0.6485353708267212, "learning_rate": 8.881664253508018e-06, "loss": 0.0191, "step": 69960 }, { "epoch": 0.5908256106056448, "grad_norm": 0.6151976585388184, "learning_rate": 8.881199737640614e-06, "loss": 0.017, "step": 69970 }, { "epoch": 0.5909100504528086, "grad_norm": 0.8238551616668701, "learning_rate": 8.880735137474347e-06, "loss": 0.0138, "step": 69980 }, { "epoch": 0.5909944902999725, "grad_norm": 0.2715838849544525, "learning_rate": 8.880270453019309e-06, "loss": 0.0082, "step": 69990 }, { "epoch": 0.5910789301471364, "grad_norm": 0.5440129041671753, "learning_rate": 8.87980568428559e-06, "loss": 0.01, "step": 70000 }, { "epoch": 0.5911633699943003, "grad_norm": 0.46337512135505676, "learning_rate": 8.879340831283289e-06, "loss": 0.014, "step": 70010 }, { "epoch": 0.5912478098414642, "grad_norm": 0.22635965049266815, "learning_rate": 8.878875894022499e-06, "loss": 0.0092, "step": 70020 }, { "epoch": 0.5913322496886281, "grad_norm": 0.16571052372455597, "learning_rate": 8.878410872513318e-06, "loss": 0.0126, "step": 70030 }, { "epoch": 0.591416689535792, "grad_norm": 0.34875717759132385, "learning_rate": 8.877945766765849e-06, "loss": 0.0176, "step": 70040 }, { "epoch": 0.5915011293829558, "grad_norm": 0.3335011303424835, "learning_rate": 8.877480576790192e-06, "loss": 0.0144, "step": 70050 }, { "epoch": 0.5915855692301197, "grad_norm": 0.478818416595459, "learning_rate": 8.877015302596451e-06, "loss": 0.0121, "step": 70060 }, { "epoch": 0.5916700090772836, "grad_norm": 0.24483942985534668, "learning_rate": 8.876549944194734e-06, "loss": 0.0131, "step": 70070 }, { "epoch": 0.5917544489244474, "grad_norm": 0.1474718451499939, "learning_rate": 8.876084501595147e-06, "loss": 0.0106, "step": 70080 }, { "epoch": 0.5918388887716113, "grad_norm": 0.27632448077201843, "learning_rate": 8.875618974807796e-06, "loss": 0.007, "step": 70090 }, { "epoch": 0.5919233286187752, "grad_norm": 0.1978674829006195, "learning_rate": 8.875153363842797e-06, "loss": 0.0117, "step": 70100 }, { "epoch": 0.592007768465939, "grad_norm": 0.10243522375822067, "learning_rate": 8.87468766871026e-06, "loss": 0.0075, "step": 70110 }, { "epoch": 0.5920922083131029, "grad_norm": 0.1369297355413437, "learning_rate": 8.874221889420302e-06, "loss": 0.0158, "step": 70120 }, { "epoch": 0.5921766481602668, "grad_norm": 0.3593575656414032, "learning_rate": 8.873756025983038e-06, "loss": 0.0155, "step": 70130 }, { "epoch": 0.5922610880074307, "grad_norm": 0.648257315158844, "learning_rate": 8.873290078408588e-06, "loss": 0.0101, "step": 70140 }, { "epoch": 0.5923455278545946, "grad_norm": 0.3164742588996887, "learning_rate": 8.872824046707069e-06, "loss": 0.0096, "step": 70150 }, { "epoch": 0.5924299677017585, "grad_norm": 0.513972282409668, "learning_rate": 8.872357930888607e-06, "loss": 0.0207, "step": 70160 }, { "epoch": 0.5925144075489224, "grad_norm": 0.510304868221283, "learning_rate": 8.871891730963322e-06, "loss": 0.0215, "step": 70170 }, { "epoch": 0.5925988473960863, "grad_norm": 0.4804021716117859, "learning_rate": 8.871425446941344e-06, "loss": 0.0113, "step": 70180 }, { "epoch": 0.59268328724325, "grad_norm": 0.07737987488508224, "learning_rate": 8.870959078832799e-06, "loss": 0.0118, "step": 70190 }, { "epoch": 0.5927677270904139, "grad_norm": 0.515069305896759, "learning_rate": 8.870492626647816e-06, "loss": 0.0099, "step": 70200 }, { "epoch": 0.5928521669375778, "grad_norm": 0.7841353416442871, "learning_rate": 8.870026090396524e-06, "loss": 0.0129, "step": 70210 }, { "epoch": 0.5929366067847417, "grad_norm": 0.8215082883834839, "learning_rate": 8.869559470089059e-06, "loss": 0.0263, "step": 70220 }, { "epoch": 0.5930210466319056, "grad_norm": 0.8976551294326782, "learning_rate": 8.869092765735558e-06, "loss": 0.0169, "step": 70230 }, { "epoch": 0.5931054864790695, "grad_norm": 0.41021728515625, "learning_rate": 8.868625977346152e-06, "loss": 0.0146, "step": 70240 }, { "epoch": 0.5931899263262334, "grad_norm": 0.20253391563892365, "learning_rate": 8.868159104930984e-06, "loss": 0.0118, "step": 70250 }, { "epoch": 0.5932743661733972, "grad_norm": 0.35004040598869324, "learning_rate": 8.867692148500193e-06, "loss": 0.0112, "step": 70260 }, { "epoch": 0.5933588060205611, "grad_norm": 0.2237175554037094, "learning_rate": 8.867225108063922e-06, "loss": 0.0101, "step": 70270 }, { "epoch": 0.593443245867725, "grad_norm": 0.7259570360183716, "learning_rate": 8.866757983632313e-06, "loss": 0.0133, "step": 70280 }, { "epoch": 0.5935276857148889, "grad_norm": 0.3702254891395569, "learning_rate": 8.866290775215512e-06, "loss": 0.0116, "step": 70290 }, { "epoch": 0.5936121255620528, "grad_norm": 0.29590025544166565, "learning_rate": 8.865823482823668e-06, "loss": 0.0127, "step": 70300 }, { "epoch": 0.5936965654092166, "grad_norm": 0.8369847536087036, "learning_rate": 8.865356106466932e-06, "loss": 0.0095, "step": 70310 }, { "epoch": 0.5937810052563804, "grad_norm": 0.44620567560195923, "learning_rate": 8.864888646155451e-06, "loss": 0.0105, "step": 70320 }, { "epoch": 0.5938654451035443, "grad_norm": 0.01895669661462307, "learning_rate": 8.864421101899382e-06, "loss": 0.005, "step": 70330 }, { "epoch": 0.5939498849507082, "grad_norm": 0.627112627029419, "learning_rate": 8.863953473708878e-06, "loss": 0.0131, "step": 70340 }, { "epoch": 0.5940343247978721, "grad_norm": 0.2512213885784149, "learning_rate": 8.863485761594096e-06, "loss": 0.0105, "step": 70350 }, { "epoch": 0.594118764645036, "grad_norm": 0.259895920753479, "learning_rate": 8.863017965565194e-06, "loss": 0.0162, "step": 70360 }, { "epoch": 0.5942032044921999, "grad_norm": 0.4820202589035034, "learning_rate": 8.862550085632338e-06, "loss": 0.0306, "step": 70370 }, { "epoch": 0.5942876443393638, "grad_norm": 0.4938257336616516, "learning_rate": 8.862082121805683e-06, "loss": 0.0116, "step": 70380 }, { "epoch": 0.5943720841865276, "grad_norm": 0.21439824998378754, "learning_rate": 8.861614074095395e-06, "loss": 0.0175, "step": 70390 }, { "epoch": 0.5944565240336915, "grad_norm": 0.2059684544801712, "learning_rate": 8.86114594251164e-06, "loss": 0.0125, "step": 70400 }, { "epoch": 0.5945409638808554, "grad_norm": 0.31139692664146423, "learning_rate": 8.860677727064586e-06, "loss": 0.0113, "step": 70410 }, { "epoch": 0.5946254037280192, "grad_norm": 0.38032791018486023, "learning_rate": 8.860209427764405e-06, "loss": 0.0162, "step": 70420 }, { "epoch": 0.5947098435751831, "grad_norm": 0.09228182584047318, "learning_rate": 8.859741044621265e-06, "loss": 0.0095, "step": 70430 }, { "epoch": 0.594794283422347, "grad_norm": 0.0691014900803566, "learning_rate": 8.859272577645342e-06, "loss": 0.0067, "step": 70440 }, { "epoch": 0.5948787232695109, "grad_norm": 0.3594301640987396, "learning_rate": 8.858804026846807e-06, "loss": 0.0087, "step": 70450 }, { "epoch": 0.5949631631166747, "grad_norm": 0.444294810295105, "learning_rate": 8.858335392235842e-06, "loss": 0.0118, "step": 70460 }, { "epoch": 0.5950476029638386, "grad_norm": 0.17479699850082397, "learning_rate": 8.857866673822622e-06, "loss": 0.0156, "step": 70470 }, { "epoch": 0.5951320428110025, "grad_norm": 0.3700791895389557, "learning_rate": 8.857397871617326e-06, "loss": 0.0186, "step": 70480 }, { "epoch": 0.5952164826581664, "grad_norm": 0.356251984834671, "learning_rate": 8.856928985630143e-06, "loss": 0.0129, "step": 70490 }, { "epoch": 0.5953009225053303, "grad_norm": 0.7102128267288208, "learning_rate": 8.856460015871249e-06, "loss": 0.0114, "step": 70500 }, { "epoch": 0.5953853623524942, "grad_norm": 0.5381515622138977, "learning_rate": 8.855990962350836e-06, "loss": 0.0109, "step": 70510 }, { "epoch": 0.5954698021996581, "grad_norm": 0.612126886844635, "learning_rate": 8.855521825079088e-06, "loss": 0.011, "step": 70520 }, { "epoch": 0.595554242046822, "grad_norm": 0.1064838171005249, "learning_rate": 8.855052604066196e-06, "loss": 0.0106, "step": 70530 }, { "epoch": 0.5956386818939857, "grad_norm": 0.42360907793045044, "learning_rate": 8.854583299322353e-06, "loss": 0.0152, "step": 70540 }, { "epoch": 0.5957231217411496, "grad_norm": 0.30847612023353577, "learning_rate": 8.854113910857748e-06, "loss": 0.0191, "step": 70550 }, { "epoch": 0.5958075615883135, "grad_norm": 0.14122743904590607, "learning_rate": 8.853644438682581e-06, "loss": 0.0224, "step": 70560 }, { "epoch": 0.5958920014354774, "grad_norm": 0.414156049489975, "learning_rate": 8.853174882807046e-06, "loss": 0.0123, "step": 70570 }, { "epoch": 0.5959764412826413, "grad_norm": 0.8752584457397461, "learning_rate": 8.852705243241341e-06, "loss": 0.0122, "step": 70580 }, { "epoch": 0.5960608811298052, "grad_norm": 0.3705607056617737, "learning_rate": 8.852235519995668e-06, "loss": 0.0109, "step": 70590 }, { "epoch": 0.596145320976969, "grad_norm": 0.5128137469291687, "learning_rate": 8.851765713080231e-06, "loss": 0.0111, "step": 70600 }, { "epoch": 0.5962297608241329, "grad_norm": 0.5638371109962463, "learning_rate": 8.85129582250523e-06, "loss": 0.0128, "step": 70610 }, { "epoch": 0.5963142006712968, "grad_norm": 0.7687093615531921, "learning_rate": 8.850825848280874e-06, "loss": 0.0107, "step": 70620 }, { "epoch": 0.5963986405184607, "grad_norm": 0.42891639471054077, "learning_rate": 8.85035579041737e-06, "loss": 0.0121, "step": 70630 }, { "epoch": 0.5964830803656246, "grad_norm": 0.2657772898674011, "learning_rate": 8.849885648924925e-06, "loss": 0.0154, "step": 70640 }, { "epoch": 0.5965675202127884, "grad_norm": 0.1460592895746231, "learning_rate": 8.849415423813755e-06, "loss": 0.0111, "step": 70650 }, { "epoch": 0.5966519600599522, "grad_norm": 0.46426117420196533, "learning_rate": 8.84894511509407e-06, "loss": 0.0134, "step": 70660 }, { "epoch": 0.5967363999071161, "grad_norm": 0.13299287855625153, "learning_rate": 8.848474722776084e-06, "loss": 0.0212, "step": 70670 }, { "epoch": 0.59682083975428, "grad_norm": 0.3370959162712097, "learning_rate": 8.848004246870017e-06, "loss": 0.0085, "step": 70680 }, { "epoch": 0.5969052796014439, "grad_norm": 0.5475896596908569, "learning_rate": 8.847533687386088e-06, "loss": 0.014, "step": 70690 }, { "epoch": 0.5969897194486078, "grad_norm": 0.14947186410427094, "learning_rate": 8.847063044334513e-06, "loss": 0.0086, "step": 70700 }, { "epoch": 0.5970741592957717, "grad_norm": 0.026186302304267883, "learning_rate": 8.846592317725519e-06, "loss": 0.0151, "step": 70710 }, { "epoch": 0.5971585991429356, "grad_norm": 0.22451293468475342, "learning_rate": 8.846121507569327e-06, "loss": 0.0097, "step": 70720 }, { "epoch": 0.5972430389900995, "grad_norm": 0.7020491361618042, "learning_rate": 8.845650613876165e-06, "loss": 0.0145, "step": 70730 }, { "epoch": 0.5973274788372633, "grad_norm": 0.31338170170783997, "learning_rate": 8.845179636656259e-06, "loss": 0.0155, "step": 70740 }, { "epoch": 0.5974119186844272, "grad_norm": 0.7127974033355713, "learning_rate": 8.844708575919838e-06, "loss": 0.0152, "step": 70750 }, { "epoch": 0.5974963585315911, "grad_norm": 0.25579363107681274, "learning_rate": 8.844237431677137e-06, "loss": 0.011, "step": 70760 }, { "epoch": 0.5975807983787549, "grad_norm": 1.1495673656463623, "learning_rate": 8.843766203938385e-06, "loss": 0.0178, "step": 70770 }, { "epoch": 0.5976652382259188, "grad_norm": 0.2845216393470764, "learning_rate": 8.843294892713821e-06, "loss": 0.0109, "step": 70780 }, { "epoch": 0.5977496780730827, "grad_norm": 0.3308914601802826, "learning_rate": 8.842823498013678e-06, "loss": 0.0138, "step": 70790 }, { "epoch": 0.5978341179202465, "grad_norm": 0.3036424517631531, "learning_rate": 8.842352019848195e-06, "loss": 0.0147, "step": 70800 }, { "epoch": 0.5979185577674104, "grad_norm": 0.3288794457912445, "learning_rate": 8.841880458227614e-06, "loss": 0.0145, "step": 70810 }, { "epoch": 0.5980029976145743, "grad_norm": 0.33557912707328796, "learning_rate": 8.841408813162178e-06, "loss": 0.0169, "step": 70820 }, { "epoch": 0.5980874374617382, "grad_norm": 0.30709031224250793, "learning_rate": 8.840937084662129e-06, "loss": 0.0072, "step": 70830 }, { "epoch": 0.5981718773089021, "grad_norm": 0.2727930247783661, "learning_rate": 8.840465272737713e-06, "loss": 0.012, "step": 70840 }, { "epoch": 0.598256317156066, "grad_norm": 0.37765949964523315, "learning_rate": 8.839993377399178e-06, "loss": 0.0064, "step": 70850 }, { "epoch": 0.5983407570032299, "grad_norm": 0.32407405972480774, "learning_rate": 8.839521398656776e-06, "loss": 0.0167, "step": 70860 }, { "epoch": 0.5984251968503937, "grad_norm": 0.6500611901283264, "learning_rate": 8.839049336520752e-06, "loss": 0.0109, "step": 70870 }, { "epoch": 0.5985096366975575, "grad_norm": 0.5177162885665894, "learning_rate": 8.838577191001367e-06, "loss": 0.0091, "step": 70880 }, { "epoch": 0.5985940765447214, "grad_norm": 0.39451614022254944, "learning_rate": 8.838104962108872e-06, "loss": 0.0122, "step": 70890 }, { "epoch": 0.5986785163918853, "grad_norm": 0.3419186472892761, "learning_rate": 8.837632649853522e-06, "loss": 0.0111, "step": 70900 }, { "epoch": 0.5987629562390492, "grad_norm": 0.3551696836948395, "learning_rate": 8.837160254245578e-06, "loss": 0.0091, "step": 70910 }, { "epoch": 0.5988473960862131, "grad_norm": 0.33231669664382935, "learning_rate": 8.836687775295298e-06, "loss": 0.015, "step": 70920 }, { "epoch": 0.598931835933377, "grad_norm": 0.6536023616790771, "learning_rate": 8.836215213012948e-06, "loss": 0.0125, "step": 70930 }, { "epoch": 0.5990162757805408, "grad_norm": 0.48002541065216064, "learning_rate": 8.835742567408788e-06, "loss": 0.0127, "step": 70940 }, { "epoch": 0.5991007156277047, "grad_norm": 0.6746527552604675, "learning_rate": 8.835269838493085e-06, "loss": 0.0075, "step": 70950 }, { "epoch": 0.5991851554748686, "grad_norm": 0.3516559302806854, "learning_rate": 8.83479702627611e-06, "loss": 0.0158, "step": 70960 }, { "epoch": 0.5992695953220325, "grad_norm": 0.3773139715194702, "learning_rate": 8.834324130768128e-06, "loss": 0.0074, "step": 70970 }, { "epoch": 0.5993540351691964, "grad_norm": 0.48567846417427063, "learning_rate": 8.833851151979411e-06, "loss": 0.0166, "step": 70980 }, { "epoch": 0.5994384750163603, "grad_norm": 0.171866312623024, "learning_rate": 8.833378089920232e-06, "loss": 0.0108, "step": 70990 }, { "epoch": 0.599522914863524, "grad_norm": 0.1342158168554306, "learning_rate": 8.832904944600867e-06, "loss": 0.0101, "step": 71000 }, { "epoch": 0.5996073547106879, "grad_norm": 0.3735840916633606, "learning_rate": 8.832431716031592e-06, "loss": 0.0145, "step": 71010 }, { "epoch": 0.5996917945578518, "grad_norm": 0.6552032232284546, "learning_rate": 8.831958404222687e-06, "loss": 0.0167, "step": 71020 }, { "epoch": 0.5997762344050157, "grad_norm": 0.5095136165618896, "learning_rate": 8.831485009184428e-06, "loss": 0.0145, "step": 71030 }, { "epoch": 0.5998606742521796, "grad_norm": 0.24676381051540375, "learning_rate": 8.831011530927103e-06, "loss": 0.0111, "step": 71040 }, { "epoch": 0.5999451140993435, "grad_norm": 0.19583845138549805, "learning_rate": 8.83053796946099e-06, "loss": 0.0108, "step": 71050 }, { "epoch": 0.6000295539465074, "grad_norm": 0.2239667773246765, "learning_rate": 8.830064324796376e-06, "loss": 0.0127, "step": 71060 }, { "epoch": 0.6001139937936713, "grad_norm": 0.5610796809196472, "learning_rate": 8.829590596943554e-06, "loss": 0.0135, "step": 71070 }, { "epoch": 0.6001984336408351, "grad_norm": 0.5659559965133667, "learning_rate": 8.829116785912805e-06, "loss": 0.0166, "step": 71080 }, { "epoch": 0.600282873487999, "grad_norm": 0.02823498658835888, "learning_rate": 8.828642891714427e-06, "loss": 0.0084, "step": 71090 }, { "epoch": 0.6003673133351629, "grad_norm": 0.27568915486335754, "learning_rate": 8.828168914358708e-06, "loss": 0.0169, "step": 71100 }, { "epoch": 0.6004517531823267, "grad_norm": 0.398044228553772, "learning_rate": 8.827694853855945e-06, "loss": 0.0155, "step": 71110 }, { "epoch": 0.6005361930294906, "grad_norm": 0.2092505544424057, "learning_rate": 8.827220710216435e-06, "loss": 0.0119, "step": 71120 }, { "epoch": 0.6006206328766545, "grad_norm": 0.2561799883842468, "learning_rate": 8.826746483450474e-06, "loss": 0.0122, "step": 71130 }, { "epoch": 0.6007050727238183, "grad_norm": 0.28263959288597107, "learning_rate": 8.826272173568366e-06, "loss": 0.0219, "step": 71140 }, { "epoch": 0.6007895125709822, "grad_norm": 0.47196438908576965, "learning_rate": 8.82579778058041e-06, "loss": 0.0144, "step": 71150 }, { "epoch": 0.6008739524181461, "grad_norm": 0.21337798237800598, "learning_rate": 8.82532330449691e-06, "loss": 0.013, "step": 71160 }, { "epoch": 0.60095839226531, "grad_norm": 0.34427157044410706, "learning_rate": 8.82484874532817e-06, "loss": 0.0125, "step": 71170 }, { "epoch": 0.6010428321124739, "grad_norm": 0.18952420353889465, "learning_rate": 8.8243741030845e-06, "loss": 0.0086, "step": 71180 }, { "epoch": 0.6011272719596378, "grad_norm": 0.467586487531662, "learning_rate": 8.823899377776208e-06, "loss": 0.0171, "step": 71190 }, { "epoch": 0.6012117118068017, "grad_norm": 0.6868593692779541, "learning_rate": 8.823424569413604e-06, "loss": 0.0127, "step": 71200 }, { "epoch": 0.6012961516539655, "grad_norm": 0.380393922328949, "learning_rate": 8.822949678007003e-06, "loss": 0.0203, "step": 71210 }, { "epoch": 0.6013805915011294, "grad_norm": 0.2739667296409607, "learning_rate": 8.82247470356672e-06, "loss": 0.0159, "step": 71220 }, { "epoch": 0.6014650313482932, "grad_norm": 0.3786265552043915, "learning_rate": 8.821999646103068e-06, "loss": 0.0146, "step": 71230 }, { "epoch": 0.6015494711954571, "grad_norm": 0.3250437378883362, "learning_rate": 8.821524505626366e-06, "loss": 0.0151, "step": 71240 }, { "epoch": 0.601633911042621, "grad_norm": 0.3882814049720764, "learning_rate": 8.821049282146935e-06, "loss": 0.0133, "step": 71250 }, { "epoch": 0.6017183508897849, "grad_norm": 0.3882240653038025, "learning_rate": 8.820573975675098e-06, "loss": 0.0144, "step": 71260 }, { "epoch": 0.6018027907369488, "grad_norm": 0.44182059168815613, "learning_rate": 8.820098586221177e-06, "loss": 0.0136, "step": 71270 }, { "epoch": 0.6018872305841126, "grad_norm": 0.47621333599090576, "learning_rate": 8.819623113795497e-06, "loss": 0.0158, "step": 71280 }, { "epoch": 0.6019716704312765, "grad_norm": 0.37362781167030334, "learning_rate": 8.819147558408387e-06, "loss": 0.016, "step": 71290 }, { "epoch": 0.6020561102784404, "grad_norm": 0.19515936076641083, "learning_rate": 8.818671920070172e-06, "loss": 0.0075, "step": 71300 }, { "epoch": 0.6021405501256043, "grad_norm": 0.1510612666606903, "learning_rate": 8.818196198791188e-06, "loss": 0.0196, "step": 71310 }, { "epoch": 0.6022249899727682, "grad_norm": 0.31075286865234375, "learning_rate": 8.817720394581764e-06, "loss": 0.007, "step": 71320 }, { "epoch": 0.6023094298199321, "grad_norm": 0.37188199162483215, "learning_rate": 8.817244507452236e-06, "loss": 0.0066, "step": 71330 }, { "epoch": 0.6023938696670958, "grad_norm": 0.8564198017120361, "learning_rate": 8.81676853741294e-06, "loss": 0.0123, "step": 71340 }, { "epoch": 0.6024783095142597, "grad_norm": 0.7655377984046936, "learning_rate": 8.81629248447421e-06, "loss": 0.0155, "step": 71350 }, { "epoch": 0.6025627493614236, "grad_norm": 0.41390281915664673, "learning_rate": 8.815816348646391e-06, "loss": 0.0115, "step": 71360 }, { "epoch": 0.6026471892085875, "grad_norm": 0.3057597875595093, "learning_rate": 8.815340129939824e-06, "loss": 0.0093, "step": 71370 }, { "epoch": 0.6027316290557514, "grad_norm": 0.1612236052751541, "learning_rate": 8.81486382836485e-06, "loss": 0.0122, "step": 71380 }, { "epoch": 0.6028160689029153, "grad_norm": 0.15481162071228027, "learning_rate": 8.814387443931819e-06, "loss": 0.0166, "step": 71390 }, { "epoch": 0.6029005087500792, "grad_norm": 0.26799657940864563, "learning_rate": 8.81391097665107e-06, "loss": 0.0055, "step": 71400 }, { "epoch": 0.602984948597243, "grad_norm": 0.3891451358795166, "learning_rate": 8.813434426532957e-06, "loss": 0.0127, "step": 71410 }, { "epoch": 0.6030693884444069, "grad_norm": 0.4096590280532837, "learning_rate": 8.81295779358783e-06, "loss": 0.0101, "step": 71420 }, { "epoch": 0.6031538282915708, "grad_norm": 0.8299562335014343, "learning_rate": 8.812481077826042e-06, "loss": 0.0143, "step": 71430 }, { "epoch": 0.6032382681387347, "grad_norm": 0.26150912046432495, "learning_rate": 8.812004279257946e-06, "loss": 0.0112, "step": 71440 }, { "epoch": 0.6033227079858986, "grad_norm": 0.7832672595977783, "learning_rate": 8.811527397893897e-06, "loss": 0.0082, "step": 71450 }, { "epoch": 0.6034071478330624, "grad_norm": 0.3827352523803711, "learning_rate": 8.811050433744255e-06, "loss": 0.0107, "step": 71460 }, { "epoch": 0.6034915876802263, "grad_norm": 0.509005606174469, "learning_rate": 8.810573386819377e-06, "loss": 0.0201, "step": 71470 }, { "epoch": 0.6035760275273901, "grad_norm": 0.34844985604286194, "learning_rate": 8.810096257129627e-06, "loss": 0.0192, "step": 71480 }, { "epoch": 0.603660467374554, "grad_norm": 0.14597655832767487, "learning_rate": 8.809619044685369e-06, "loss": 0.0118, "step": 71490 }, { "epoch": 0.6037449072217179, "grad_norm": 0.44953402876853943, "learning_rate": 8.809141749496963e-06, "loss": 0.0143, "step": 71500 }, { "epoch": 0.6038293470688818, "grad_norm": 0.9426469206809998, "learning_rate": 8.80866437157478e-06, "loss": 0.0122, "step": 71510 }, { "epoch": 0.6039137869160457, "grad_norm": 0.24763460457324982, "learning_rate": 8.808186910929187e-06, "loss": 0.0105, "step": 71520 }, { "epoch": 0.6039982267632096, "grad_norm": 0.19644491374492645, "learning_rate": 8.807709367570556e-06, "loss": 0.0082, "step": 71530 }, { "epoch": 0.6040826666103735, "grad_norm": 1.243801236152649, "learning_rate": 8.807231741509257e-06, "loss": 0.0134, "step": 71540 }, { "epoch": 0.6041671064575374, "grad_norm": 1.0566061735153198, "learning_rate": 8.806754032755665e-06, "loss": 0.03, "step": 71550 }, { "epoch": 0.6042515463047012, "grad_norm": 0.47361546754837036, "learning_rate": 8.806276241320154e-06, "loss": 0.0139, "step": 71560 }, { "epoch": 0.604335986151865, "grad_norm": 0.6308233141899109, "learning_rate": 8.805798367213103e-06, "loss": 0.0102, "step": 71570 }, { "epoch": 0.6044204259990289, "grad_norm": 0.4572001099586487, "learning_rate": 8.805320410444893e-06, "loss": 0.0158, "step": 71580 }, { "epoch": 0.6045048658461928, "grad_norm": 0.28327447175979614, "learning_rate": 8.804842371025904e-06, "loss": 0.0069, "step": 71590 }, { "epoch": 0.6045893056933567, "grad_norm": 0.3484283983707428, "learning_rate": 8.804364248966517e-06, "loss": 0.0184, "step": 71600 }, { "epoch": 0.6046737455405206, "grad_norm": 0.10168599337339401, "learning_rate": 8.803886044277118e-06, "loss": 0.0081, "step": 71610 }, { "epoch": 0.6047581853876844, "grad_norm": 0.7610624432563782, "learning_rate": 8.803407756968093e-06, "loss": 0.012, "step": 71620 }, { "epoch": 0.6048426252348483, "grad_norm": 0.332505464553833, "learning_rate": 8.802929387049833e-06, "loss": 0.0133, "step": 71630 }, { "epoch": 0.6049270650820122, "grad_norm": 0.19805917143821716, "learning_rate": 8.802450934532725e-06, "loss": 0.0097, "step": 71640 }, { "epoch": 0.6050115049291761, "grad_norm": 0.16126087307929993, "learning_rate": 8.801972399427161e-06, "loss": 0.0145, "step": 71650 }, { "epoch": 0.60509594477634, "grad_norm": 0.28816843032836914, "learning_rate": 8.801493781743537e-06, "loss": 0.0116, "step": 71660 }, { "epoch": 0.6051803846235039, "grad_norm": 0.37443235516548157, "learning_rate": 8.801015081492246e-06, "loss": 0.0156, "step": 71670 }, { "epoch": 0.6052648244706678, "grad_norm": 0.35697099566459656, "learning_rate": 8.800536298683687e-06, "loss": 0.0085, "step": 71680 }, { "epoch": 0.6053492643178315, "grad_norm": 0.3039175271987915, "learning_rate": 8.800057433328257e-06, "loss": 0.0152, "step": 71690 }, { "epoch": 0.6054337041649954, "grad_norm": 0.5227802991867065, "learning_rate": 8.79957848543636e-06, "loss": 0.0222, "step": 71700 }, { "epoch": 0.6055181440121593, "grad_norm": 0.19152705371379852, "learning_rate": 8.799099455018395e-06, "loss": 0.0124, "step": 71710 }, { "epoch": 0.6056025838593232, "grad_norm": 0.2135104537010193, "learning_rate": 8.798620342084771e-06, "loss": 0.0213, "step": 71720 }, { "epoch": 0.6056870237064871, "grad_norm": 1.4005683660507202, "learning_rate": 8.798141146645891e-06, "loss": 0.0195, "step": 71730 }, { "epoch": 0.605771463553651, "grad_norm": 0.27249038219451904, "learning_rate": 8.797661868712163e-06, "loss": 0.0143, "step": 71740 }, { "epoch": 0.6058559034008149, "grad_norm": 0.10760775953531265, "learning_rate": 8.797182508293997e-06, "loss": 0.0184, "step": 71750 }, { "epoch": 0.6059403432479787, "grad_norm": 0.3850250244140625, "learning_rate": 8.796703065401804e-06, "loss": 0.0128, "step": 71760 }, { "epoch": 0.6060247830951426, "grad_norm": 0.16324734687805176, "learning_rate": 8.796223540046e-06, "loss": 0.01, "step": 71770 }, { "epoch": 0.6061092229423065, "grad_norm": 0.3478115200996399, "learning_rate": 8.795743932237e-06, "loss": 0.01, "step": 71780 }, { "epoch": 0.6061936627894704, "grad_norm": 0.5890544652938843, "learning_rate": 8.795264241985218e-06, "loss": 0.0085, "step": 71790 }, { "epoch": 0.6062781026366342, "grad_norm": 0.5156133770942688, "learning_rate": 8.794784469301075e-06, "loss": 0.0071, "step": 71800 }, { "epoch": 0.6063625424837981, "grad_norm": 0.4676365554332733, "learning_rate": 8.794304614194988e-06, "loss": 0.0144, "step": 71810 }, { "epoch": 0.606446982330962, "grad_norm": 0.7836660742759705, "learning_rate": 8.793824676677386e-06, "loss": 0.026, "step": 71820 }, { "epoch": 0.6065314221781258, "grad_norm": 0.4317767024040222, "learning_rate": 8.793344656758686e-06, "loss": 0.0087, "step": 71830 }, { "epoch": 0.6066158620252897, "grad_norm": 0.3314960300922394, "learning_rate": 8.79286455444932e-06, "loss": 0.0117, "step": 71840 }, { "epoch": 0.6067003018724536, "grad_norm": 0.20108206570148468, "learning_rate": 8.792384369759714e-06, "loss": 0.0112, "step": 71850 }, { "epoch": 0.6067847417196175, "grad_norm": 0.4155370593070984, "learning_rate": 8.791904102700294e-06, "loss": 0.017, "step": 71860 }, { "epoch": 0.6068691815667814, "grad_norm": 0.20751915872097015, "learning_rate": 8.791423753281496e-06, "loss": 0.0092, "step": 71870 }, { "epoch": 0.6069536214139453, "grad_norm": 0.3516866862773895, "learning_rate": 8.790943321513749e-06, "loss": 0.0088, "step": 71880 }, { "epoch": 0.6070380612611092, "grad_norm": 0.045428674668073654, "learning_rate": 8.790462807407492e-06, "loss": 0.0096, "step": 71890 }, { "epoch": 0.607122501108273, "grad_norm": 0.22478310763835907, "learning_rate": 8.789982210973159e-06, "loss": 0.0104, "step": 71900 }, { "epoch": 0.6072069409554369, "grad_norm": 0.4726668894290924, "learning_rate": 8.789501532221188e-06, "loss": 0.0121, "step": 71910 }, { "epoch": 0.6072913808026007, "grad_norm": 0.8553647994995117, "learning_rate": 8.78902077116202e-06, "loss": 0.0168, "step": 71920 }, { "epoch": 0.6073758206497646, "grad_norm": 1.1585795879364014, "learning_rate": 8.7885399278061e-06, "loss": 0.0214, "step": 71930 }, { "epoch": 0.6074602604969285, "grad_norm": 0.4378274083137512, "learning_rate": 8.788059002163867e-06, "loss": 0.0129, "step": 71940 }, { "epoch": 0.6075447003440924, "grad_norm": 0.4348190426826477, "learning_rate": 8.787577994245768e-06, "loss": 0.0204, "step": 71950 }, { "epoch": 0.6076291401912562, "grad_norm": 0.21779826283454895, "learning_rate": 8.787096904062253e-06, "loss": 0.0184, "step": 71960 }, { "epoch": 0.6077135800384201, "grad_norm": 0.4167310893535614, "learning_rate": 8.786615731623769e-06, "loss": 0.0106, "step": 71970 }, { "epoch": 0.607798019885584, "grad_norm": 0.5780656337738037, "learning_rate": 8.786134476940766e-06, "loss": 0.0143, "step": 71980 }, { "epoch": 0.6078824597327479, "grad_norm": 0.6496355533599854, "learning_rate": 8.785653140023699e-06, "loss": 0.0102, "step": 71990 }, { "epoch": 0.6079668995799118, "grad_norm": 0.22413960099220276, "learning_rate": 8.785171720883022e-06, "loss": 0.0135, "step": 72000 }, { "epoch": 0.6080513394270757, "grad_norm": 0.2304420918226242, "learning_rate": 8.784690219529188e-06, "loss": 0.0137, "step": 72010 }, { "epoch": 0.6081357792742396, "grad_norm": 0.2586575150489807, "learning_rate": 8.784208635972662e-06, "loss": 0.0107, "step": 72020 }, { "epoch": 0.6082202191214033, "grad_norm": 0.5302172303199768, "learning_rate": 8.783726970223898e-06, "loss": 0.0068, "step": 72030 }, { "epoch": 0.6083046589685672, "grad_norm": 0.2897048890590668, "learning_rate": 8.783245222293359e-06, "loss": 0.0095, "step": 72040 }, { "epoch": 0.6083890988157311, "grad_norm": 0.33619701862335205, "learning_rate": 8.782763392191508e-06, "loss": 0.0152, "step": 72050 }, { "epoch": 0.608473538662895, "grad_norm": 0.9232521057128906, "learning_rate": 8.782281479928814e-06, "loss": 0.0147, "step": 72060 }, { "epoch": 0.6085579785100589, "grad_norm": 0.2561909556388855, "learning_rate": 8.781799485515741e-06, "loss": 0.0083, "step": 72070 }, { "epoch": 0.6086424183572228, "grad_norm": 0.5291321277618408, "learning_rate": 8.781317408962757e-06, "loss": 0.0106, "step": 72080 }, { "epoch": 0.6087268582043867, "grad_norm": 0.7348375916481018, "learning_rate": 8.780835250280334e-06, "loss": 0.0106, "step": 72090 }, { "epoch": 0.6088112980515505, "grad_norm": 0.38368338346481323, "learning_rate": 8.780353009478944e-06, "loss": 0.0125, "step": 72100 }, { "epoch": 0.6088957378987144, "grad_norm": 0.2455018311738968, "learning_rate": 8.77987068656906e-06, "loss": 0.011, "step": 72110 }, { "epoch": 0.6089801777458783, "grad_norm": 0.16280579566955566, "learning_rate": 8.779388281561163e-06, "loss": 0.0167, "step": 72120 }, { "epoch": 0.6090646175930422, "grad_norm": 0.42047011852264404, "learning_rate": 8.778905794465724e-06, "loss": 0.0111, "step": 72130 }, { "epoch": 0.6091490574402061, "grad_norm": 0.19668997824192047, "learning_rate": 8.778423225293227e-06, "loss": 0.0112, "step": 72140 }, { "epoch": 0.6092334972873699, "grad_norm": 0.07206647098064423, "learning_rate": 8.777940574054151e-06, "loss": 0.0112, "step": 72150 }, { "epoch": 0.6093179371345337, "grad_norm": 0.21961794793605804, "learning_rate": 8.77745784075898e-06, "loss": 0.0066, "step": 72160 }, { "epoch": 0.6094023769816976, "grad_norm": 0.1540725827217102, "learning_rate": 8.776975025418197e-06, "loss": 0.0104, "step": 72170 }, { "epoch": 0.6094868168288615, "grad_norm": 1.059878945350647, "learning_rate": 8.776492128042294e-06, "loss": 0.0096, "step": 72180 }, { "epoch": 0.6095712566760254, "grad_norm": 0.19852778315544128, "learning_rate": 8.776009148641753e-06, "loss": 0.0178, "step": 72190 }, { "epoch": 0.6096556965231893, "grad_norm": 0.5268886089324951, "learning_rate": 8.775526087227069e-06, "loss": 0.0133, "step": 72200 }, { "epoch": 0.6097401363703532, "grad_norm": 0.17258372902870178, "learning_rate": 8.775042943808731e-06, "loss": 0.0097, "step": 72210 }, { "epoch": 0.6098245762175171, "grad_norm": 0.4177778363227844, "learning_rate": 8.774559718397233e-06, "loss": 0.0261, "step": 72220 }, { "epoch": 0.609909016064681, "grad_norm": 0.17533892393112183, "learning_rate": 8.774076411003074e-06, "loss": 0.014, "step": 72230 }, { "epoch": 0.6099934559118448, "grad_norm": 0.28354352712631226, "learning_rate": 8.773593021636746e-06, "loss": 0.0114, "step": 72240 }, { "epoch": 0.6100778957590087, "grad_norm": 0.5042291879653931, "learning_rate": 8.773109550308751e-06, "loss": 0.0116, "step": 72250 }, { "epoch": 0.6101623356061725, "grad_norm": 0.2729102373123169, "learning_rate": 8.772625997029592e-06, "loss": 0.0112, "step": 72260 }, { "epoch": 0.6102467754533364, "grad_norm": 0.4226902425289154, "learning_rate": 8.772142361809768e-06, "loss": 0.0133, "step": 72270 }, { "epoch": 0.6103312153005003, "grad_norm": 0.18240125477313995, "learning_rate": 8.771658644659786e-06, "loss": 0.0116, "step": 72280 }, { "epoch": 0.6104156551476642, "grad_norm": 0.1910984367132187, "learning_rate": 8.771174845590149e-06, "loss": 0.0157, "step": 72290 }, { "epoch": 0.610500094994828, "grad_norm": 0.20923133194446564, "learning_rate": 8.770690964611368e-06, "loss": 0.0078, "step": 72300 }, { "epoch": 0.6105845348419919, "grad_norm": 0.40686458349227905, "learning_rate": 8.770207001733953e-06, "loss": 0.0174, "step": 72310 }, { "epoch": 0.6106689746891558, "grad_norm": 0.26616355776786804, "learning_rate": 8.769722956968414e-06, "loss": 0.0154, "step": 72320 }, { "epoch": 0.6107534145363197, "grad_norm": 0.4712711274623871, "learning_rate": 8.769238830325264e-06, "loss": 0.0088, "step": 72330 }, { "epoch": 0.6108378543834836, "grad_norm": 0.13993647694587708, "learning_rate": 8.76875462181502e-06, "loss": 0.0105, "step": 72340 }, { "epoch": 0.6109222942306475, "grad_norm": 0.5229104161262512, "learning_rate": 8.768270331448197e-06, "loss": 0.0145, "step": 72350 }, { "epoch": 0.6110067340778114, "grad_norm": 0.5390434861183167, "learning_rate": 8.767785959235316e-06, "loss": 0.005, "step": 72360 }, { "epoch": 0.6110911739249753, "grad_norm": 0.49994179606437683, "learning_rate": 8.767301505186895e-06, "loss": 0.0112, "step": 72370 }, { "epoch": 0.611175613772139, "grad_norm": 0.4107465147972107, "learning_rate": 8.766816969313457e-06, "loss": 0.0134, "step": 72380 }, { "epoch": 0.6112600536193029, "grad_norm": 0.48679760098457336, "learning_rate": 8.766332351625528e-06, "loss": 0.0142, "step": 72390 }, { "epoch": 0.6113444934664668, "grad_norm": 0.7418422698974609, "learning_rate": 8.76584765213363e-06, "loss": 0.0145, "step": 72400 }, { "epoch": 0.6114289333136307, "grad_norm": 0.41078153252601624, "learning_rate": 8.765362870848294e-06, "loss": 0.0183, "step": 72410 }, { "epoch": 0.6115133731607946, "grad_norm": 0.2879903018474579, "learning_rate": 8.764878007780048e-06, "loss": 0.0103, "step": 72420 }, { "epoch": 0.6115978130079585, "grad_norm": 0.07010147720575333, "learning_rate": 8.764393062939423e-06, "loss": 0.0094, "step": 72430 }, { "epoch": 0.6116822528551223, "grad_norm": 0.2974597215652466, "learning_rate": 8.763908036336952e-06, "loss": 0.0144, "step": 72440 }, { "epoch": 0.6117666927022862, "grad_norm": 0.28298959136009216, "learning_rate": 8.76342292798317e-06, "loss": 0.0109, "step": 72450 }, { "epoch": 0.6118511325494501, "grad_norm": 0.3605673015117645, "learning_rate": 8.762937737888613e-06, "loss": 0.009, "step": 72460 }, { "epoch": 0.611935572396614, "grad_norm": 0.42230141162872314, "learning_rate": 8.762452466063818e-06, "loss": 0.0165, "step": 72470 }, { "epoch": 0.6120200122437779, "grad_norm": 0.7946943044662476, "learning_rate": 8.761967112519328e-06, "loss": 0.0207, "step": 72480 }, { "epoch": 0.6121044520909417, "grad_norm": 0.4439505338668823, "learning_rate": 8.761481677265683e-06, "loss": 0.0148, "step": 72490 }, { "epoch": 0.6121888919381056, "grad_norm": 0.6328783631324768, "learning_rate": 8.760996160313427e-06, "loss": 0.0075, "step": 72500 }, { "epoch": 0.6122733317852694, "grad_norm": 0.3213738799095154, "learning_rate": 8.760510561673105e-06, "loss": 0.0122, "step": 72510 }, { "epoch": 0.6123577716324333, "grad_norm": 0.20461103320121765, "learning_rate": 8.760024881355264e-06, "loss": 0.023, "step": 72520 }, { "epoch": 0.6124422114795972, "grad_norm": 0.2952219545841217, "learning_rate": 8.759539119370452e-06, "loss": 0.0107, "step": 72530 }, { "epoch": 0.6125266513267611, "grad_norm": 0.5956734418869019, "learning_rate": 8.759053275729223e-06, "loss": 0.0071, "step": 72540 }, { "epoch": 0.612611091173925, "grad_norm": 0.5812730193138123, "learning_rate": 8.758567350442126e-06, "loss": 0.0143, "step": 72550 }, { "epoch": 0.6126955310210889, "grad_norm": 0.9339331388473511, "learning_rate": 8.758081343519716e-06, "loss": 0.018, "step": 72560 }, { "epoch": 0.6127799708682528, "grad_norm": 0.2972323000431061, "learning_rate": 8.75759525497255e-06, "loss": 0.0183, "step": 72570 }, { "epoch": 0.6128644107154166, "grad_norm": 0.5512222647666931, "learning_rate": 8.757109084811184e-06, "loss": 0.0092, "step": 72580 }, { "epoch": 0.6129488505625805, "grad_norm": 0.41146984696388245, "learning_rate": 8.75662283304618e-06, "loss": 0.009, "step": 72590 }, { "epoch": 0.6130332904097444, "grad_norm": 0.19377771019935608, "learning_rate": 8.756136499688098e-06, "loss": 0.0078, "step": 72600 }, { "epoch": 0.6131177302569082, "grad_norm": 0.4142472445964813, "learning_rate": 8.7556500847475e-06, "loss": 0.0146, "step": 72610 }, { "epoch": 0.6132021701040721, "grad_norm": 0.3801015019416809, "learning_rate": 8.75516358823495e-06, "loss": 0.0173, "step": 72620 }, { "epoch": 0.613286609951236, "grad_norm": 0.08292713761329651, "learning_rate": 8.754677010161019e-06, "loss": 0.0181, "step": 72630 }, { "epoch": 0.6133710497983998, "grad_norm": 0.30889853835105896, "learning_rate": 8.754190350536272e-06, "loss": 0.0099, "step": 72640 }, { "epoch": 0.6134554896455637, "grad_norm": 0.25174105167388916, "learning_rate": 8.75370360937128e-06, "loss": 0.0098, "step": 72650 }, { "epoch": 0.6135399294927276, "grad_norm": 0.8421201705932617, "learning_rate": 8.753216786676615e-06, "loss": 0.0172, "step": 72660 }, { "epoch": 0.6136243693398915, "grad_norm": 0.2054138481616974, "learning_rate": 8.752729882462849e-06, "loss": 0.0158, "step": 72670 }, { "epoch": 0.6137088091870554, "grad_norm": 0.2938646674156189, "learning_rate": 8.752242896740561e-06, "loss": 0.0129, "step": 72680 }, { "epoch": 0.6137932490342193, "grad_norm": 0.3171009421348572, "learning_rate": 8.751755829520326e-06, "loss": 0.0125, "step": 72690 }, { "epoch": 0.6138776888813832, "grad_norm": 0.13934066891670227, "learning_rate": 8.75126868081272e-06, "loss": 0.011, "step": 72700 }, { "epoch": 0.613962128728547, "grad_norm": 0.5218513607978821, "learning_rate": 8.750781450628332e-06, "loss": 0.0213, "step": 72710 }, { "epoch": 0.6140465685757108, "grad_norm": 0.25432559847831726, "learning_rate": 8.750294138977736e-06, "loss": 0.0112, "step": 72720 }, { "epoch": 0.6141310084228747, "grad_norm": 0.23029038310050964, "learning_rate": 8.749806745871521e-06, "loss": 0.0118, "step": 72730 }, { "epoch": 0.6142154482700386, "grad_norm": 0.21575528383255005, "learning_rate": 8.749319271320272e-06, "loss": 0.0075, "step": 72740 }, { "epoch": 0.6142998881172025, "grad_norm": 0.18855980038642883, "learning_rate": 8.748831715334575e-06, "loss": 0.0153, "step": 72750 }, { "epoch": 0.6143843279643664, "grad_norm": 0.27402812242507935, "learning_rate": 8.748344077925025e-06, "loss": 0.0074, "step": 72760 }, { "epoch": 0.6144687678115303, "grad_norm": 0.6369491219520569, "learning_rate": 8.747856359102208e-06, "loss": 0.0102, "step": 72770 }, { "epoch": 0.6145532076586941, "grad_norm": 0.609487771987915, "learning_rate": 8.747368558876717e-06, "loss": 0.0131, "step": 72780 }, { "epoch": 0.614637647505858, "grad_norm": 0.2712751030921936, "learning_rate": 8.746880677259152e-06, "loss": 0.0119, "step": 72790 }, { "epoch": 0.6147220873530219, "grad_norm": 0.335033118724823, "learning_rate": 8.746392714260104e-06, "loss": 0.0123, "step": 72800 }, { "epoch": 0.6148065272001858, "grad_norm": 0.44886818528175354, "learning_rate": 8.745904669890173e-06, "loss": 0.0114, "step": 72810 }, { "epoch": 0.6148909670473497, "grad_norm": 0.5052224397659302, "learning_rate": 8.745416544159962e-06, "loss": 0.025, "step": 72820 }, { "epoch": 0.6149754068945135, "grad_norm": 0.37379688024520874, "learning_rate": 8.744928337080071e-06, "loss": 0.0082, "step": 72830 }, { "epoch": 0.6150598467416774, "grad_norm": 0.7846968173980713, "learning_rate": 8.744440048661104e-06, "loss": 0.0099, "step": 72840 }, { "epoch": 0.6151442865888412, "grad_norm": 0.2124669998884201, "learning_rate": 8.743951678913667e-06, "loss": 0.0128, "step": 72850 }, { "epoch": 0.6152287264360051, "grad_norm": 0.44169798493385315, "learning_rate": 8.743463227848365e-06, "loss": 0.0166, "step": 72860 }, { "epoch": 0.615313166283169, "grad_norm": 0.6653974056243896, "learning_rate": 8.742974695475812e-06, "loss": 0.0154, "step": 72870 }, { "epoch": 0.6153976061303329, "grad_norm": 0.5349587202072144, "learning_rate": 8.742486081806613e-06, "loss": 0.0139, "step": 72880 }, { "epoch": 0.6154820459774968, "grad_norm": 0.43833982944488525, "learning_rate": 8.741997386851382e-06, "loss": 0.0119, "step": 72890 }, { "epoch": 0.6155664858246607, "grad_norm": 0.179874986410141, "learning_rate": 8.741508610620737e-06, "loss": 0.0077, "step": 72900 }, { "epoch": 0.6156509256718246, "grad_norm": 0.31260696053504944, "learning_rate": 8.741019753125291e-06, "loss": 0.0119, "step": 72910 }, { "epoch": 0.6157353655189884, "grad_norm": 0.4637148082256317, "learning_rate": 8.740530814375662e-06, "loss": 0.0079, "step": 72920 }, { "epoch": 0.6158198053661523, "grad_norm": 0.47606730461120605, "learning_rate": 8.740041794382472e-06, "loss": 0.0206, "step": 72930 }, { "epoch": 0.6159042452133162, "grad_norm": 0.4925644099712372, "learning_rate": 8.73955269315634e-06, "loss": 0.0164, "step": 72940 }, { "epoch": 0.61598868506048, "grad_norm": 0.41644352674484253, "learning_rate": 8.739063510707888e-06, "loss": 0.0126, "step": 72950 }, { "epoch": 0.6160731249076439, "grad_norm": 0.24278023838996887, "learning_rate": 8.738574247047744e-06, "loss": 0.0137, "step": 72960 }, { "epoch": 0.6161575647548078, "grad_norm": 0.4213253855705261, "learning_rate": 8.738084902186535e-06, "loss": 0.016, "step": 72970 }, { "epoch": 0.6162420046019716, "grad_norm": 0.06441536545753479, "learning_rate": 8.737595476134886e-06, "loss": 0.016, "step": 72980 }, { "epoch": 0.6163264444491355, "grad_norm": 0.39156630635261536, "learning_rate": 8.73710596890343e-06, "loss": 0.0175, "step": 72990 }, { "epoch": 0.6164108842962994, "grad_norm": 0.1613440364599228, "learning_rate": 8.736616380502796e-06, "loss": 0.0132, "step": 73000 }, { "epoch": 0.6164953241434633, "grad_norm": 0.19340354204177856, "learning_rate": 8.736126710943622e-06, "loss": 0.0067, "step": 73010 }, { "epoch": 0.6165797639906272, "grad_norm": 0.774422287940979, "learning_rate": 8.735636960236543e-06, "loss": 0.0206, "step": 73020 }, { "epoch": 0.6166642038377911, "grad_norm": 0.16082602739334106, "learning_rate": 8.735147128392193e-06, "loss": 0.0074, "step": 73030 }, { "epoch": 0.616748643684955, "grad_norm": 0.28458499908447266, "learning_rate": 8.734657215421212e-06, "loss": 0.0076, "step": 73040 }, { "epoch": 0.6168330835321189, "grad_norm": 0.4672326147556305, "learning_rate": 8.734167221334242e-06, "loss": 0.0132, "step": 73050 }, { "epoch": 0.6169175233792826, "grad_norm": 0.35279810428619385, "learning_rate": 8.733677146141926e-06, "loss": 0.0118, "step": 73060 }, { "epoch": 0.6170019632264465, "grad_norm": 0.4865543246269226, "learning_rate": 8.733186989854904e-06, "loss": 0.0108, "step": 73070 }, { "epoch": 0.6170864030736104, "grad_norm": 0.10668929666280746, "learning_rate": 8.73269675248383e-06, "loss": 0.0103, "step": 73080 }, { "epoch": 0.6171708429207743, "grad_norm": 0.8540059328079224, "learning_rate": 8.732206434039346e-06, "loss": 0.0111, "step": 73090 }, { "epoch": 0.6172552827679382, "grad_norm": 0.18950846791267395, "learning_rate": 8.731716034532102e-06, "loss": 0.0103, "step": 73100 }, { "epoch": 0.6173397226151021, "grad_norm": 0.5959983468055725, "learning_rate": 8.731225553972753e-06, "loss": 0.0144, "step": 73110 }, { "epoch": 0.617424162462266, "grad_norm": 0.5787569880485535, "learning_rate": 8.730734992371948e-06, "loss": 0.0131, "step": 73120 }, { "epoch": 0.6175086023094298, "grad_norm": 0.49501582980155945, "learning_rate": 8.730244349740343e-06, "loss": 0.013, "step": 73130 }, { "epoch": 0.6175930421565937, "grad_norm": 0.25823983550071716, "learning_rate": 8.729753626088595e-06, "loss": 0.0126, "step": 73140 }, { "epoch": 0.6176774820037576, "grad_norm": 0.24864111840724945, "learning_rate": 8.729262821427364e-06, "loss": 0.0189, "step": 73150 }, { "epoch": 0.6177619218509215, "grad_norm": 0.46195855736732483, "learning_rate": 8.728771935767307e-06, "loss": 0.0144, "step": 73160 }, { "epoch": 0.6178463616980854, "grad_norm": 0.3852398991584778, "learning_rate": 8.728280969119088e-06, "loss": 0.0149, "step": 73170 }, { "epoch": 0.6179308015452492, "grad_norm": 0.10279974341392517, "learning_rate": 8.72778992149337e-06, "loss": 0.0222, "step": 73180 }, { "epoch": 0.618015241392413, "grad_norm": 0.4720799922943115, "learning_rate": 8.72729879290082e-06, "loss": 0.0094, "step": 73190 }, { "epoch": 0.6180996812395769, "grad_norm": 0.24896079301834106, "learning_rate": 8.726807583352104e-06, "loss": 0.0065, "step": 73200 }, { "epoch": 0.6181841210867408, "grad_norm": 0.2928214967250824, "learning_rate": 8.726316292857891e-06, "loss": 0.0112, "step": 73210 }, { "epoch": 0.6182685609339047, "grad_norm": 0.6297366619110107, "learning_rate": 8.725824921428852e-06, "loss": 0.0189, "step": 73220 }, { "epoch": 0.6183530007810686, "grad_norm": 0.6393787264823914, "learning_rate": 8.725333469075658e-06, "loss": 0.0211, "step": 73230 }, { "epoch": 0.6184374406282325, "grad_norm": 0.24341455101966858, "learning_rate": 8.724841935808985e-06, "loss": 0.0096, "step": 73240 }, { "epoch": 0.6185218804753964, "grad_norm": 1.0354870557785034, "learning_rate": 8.724350321639507e-06, "loss": 0.0108, "step": 73250 }, { "epoch": 0.6186063203225602, "grad_norm": 0.6567655801773071, "learning_rate": 8.723858626577906e-06, "loss": 0.0146, "step": 73260 }, { "epoch": 0.6186907601697241, "grad_norm": 0.3651907444000244, "learning_rate": 8.723366850634857e-06, "loss": 0.0104, "step": 73270 }, { "epoch": 0.618775200016888, "grad_norm": 0.282198041677475, "learning_rate": 8.722874993821043e-06, "loss": 0.013, "step": 73280 }, { "epoch": 0.6188596398640518, "grad_norm": 0.5474591255187988, "learning_rate": 8.722383056147147e-06, "loss": 0.0121, "step": 73290 }, { "epoch": 0.6189440797112157, "grad_norm": 0.1941440999507904, "learning_rate": 8.721891037623854e-06, "loss": 0.0219, "step": 73300 }, { "epoch": 0.6190285195583796, "grad_norm": 0.38059085607528687, "learning_rate": 8.721398938261852e-06, "loss": 0.0127, "step": 73310 }, { "epoch": 0.6191129594055435, "grad_norm": 0.30369189381599426, "learning_rate": 8.720906758071826e-06, "loss": 0.0142, "step": 73320 }, { "epoch": 0.6191973992527073, "grad_norm": 0.20049667358398438, "learning_rate": 8.720414497064467e-06, "loss": 0.0087, "step": 73330 }, { "epoch": 0.6192818390998712, "grad_norm": 0.43086758255958557, "learning_rate": 8.71992215525047e-06, "loss": 0.0179, "step": 73340 }, { "epoch": 0.6193662789470351, "grad_norm": 0.3745499551296234, "learning_rate": 8.719429732640524e-06, "loss": 0.0114, "step": 73350 }, { "epoch": 0.619450718794199, "grad_norm": 0.42040887475013733, "learning_rate": 8.718937229245325e-06, "loss": 0.0141, "step": 73360 }, { "epoch": 0.6195351586413629, "grad_norm": 0.5384461283683777, "learning_rate": 8.718444645075573e-06, "loss": 0.0172, "step": 73370 }, { "epoch": 0.6196195984885268, "grad_norm": 0.19288958609104156, "learning_rate": 8.717951980141965e-06, "loss": 0.0098, "step": 73380 }, { "epoch": 0.6197040383356907, "grad_norm": 0.23153412342071533, "learning_rate": 8.717459234455203e-06, "loss": 0.0112, "step": 73390 }, { "epoch": 0.6197884781828545, "grad_norm": 0.8916225433349609, "learning_rate": 8.71696640802599e-06, "loss": 0.0088, "step": 73400 }, { "epoch": 0.6198729180300183, "grad_norm": 0.19819898903369904, "learning_rate": 8.716473500865023e-06, "loss": 0.0139, "step": 73410 }, { "epoch": 0.6199573578771822, "grad_norm": 0.21072548627853394, "learning_rate": 8.715980512983017e-06, "loss": 0.0104, "step": 73420 }, { "epoch": 0.6200417977243461, "grad_norm": 0.5183328986167908, "learning_rate": 8.715487444390675e-06, "loss": 0.0203, "step": 73430 }, { "epoch": 0.62012623757151, "grad_norm": 0.1601608395576477, "learning_rate": 8.714994295098707e-06, "loss": 0.0172, "step": 73440 }, { "epoch": 0.6202106774186739, "grad_norm": 0.10382002592086792, "learning_rate": 8.714501065117825e-06, "loss": 0.009, "step": 73450 }, { "epoch": 0.6202951172658377, "grad_norm": 0.06243513524532318, "learning_rate": 8.71400775445874e-06, "loss": 0.0115, "step": 73460 }, { "epoch": 0.6203795571130016, "grad_norm": 0.2503660321235657, "learning_rate": 8.713514363132168e-06, "loss": 0.0129, "step": 73470 }, { "epoch": 0.6204639969601655, "grad_norm": 0.31420376896858215, "learning_rate": 8.713020891148823e-06, "loss": 0.0106, "step": 73480 }, { "epoch": 0.6205484368073294, "grad_norm": 0.2941734492778778, "learning_rate": 8.712527338519427e-06, "loss": 0.0219, "step": 73490 }, { "epoch": 0.6206328766544933, "grad_norm": 0.22594715654850006, "learning_rate": 8.712033705254698e-06, "loss": 0.0083, "step": 73500 }, { "epoch": 0.6207173165016572, "grad_norm": 0.27147868275642395, "learning_rate": 8.711539991365359e-06, "loss": 0.0068, "step": 73510 }, { "epoch": 0.620801756348821, "grad_norm": 0.6536809206008911, "learning_rate": 8.71104619686213e-06, "loss": 0.0164, "step": 73520 }, { "epoch": 0.6208861961959848, "grad_norm": 0.3320670425891876, "learning_rate": 8.710552321755737e-06, "loss": 0.0137, "step": 73530 }, { "epoch": 0.6209706360431487, "grad_norm": 0.31933555006980896, "learning_rate": 8.710058366056911e-06, "loss": 0.0118, "step": 73540 }, { "epoch": 0.6210550758903126, "grad_norm": 0.33403217792510986, "learning_rate": 8.709564329776374e-06, "loss": 0.0162, "step": 73550 }, { "epoch": 0.6211395157374765, "grad_norm": 0.5829833149909973, "learning_rate": 8.709070212924863e-06, "loss": 0.0085, "step": 73560 }, { "epoch": 0.6212239555846404, "grad_norm": 0.3339305520057678, "learning_rate": 8.708576015513106e-06, "loss": 0.0139, "step": 73570 }, { "epoch": 0.6213083954318043, "grad_norm": 0.21592184901237488, "learning_rate": 8.708081737551837e-06, "loss": 0.0144, "step": 73580 }, { "epoch": 0.6213928352789682, "grad_norm": 0.3133796155452728, "learning_rate": 8.707587379051795e-06, "loss": 0.0119, "step": 73590 }, { "epoch": 0.621477275126132, "grad_norm": 0.6811272501945496, "learning_rate": 8.707092940023712e-06, "loss": 0.0142, "step": 73600 }, { "epoch": 0.6215617149732959, "grad_norm": 0.19871337711811066, "learning_rate": 8.706598420478331e-06, "loss": 0.015, "step": 73610 }, { "epoch": 0.6216461548204598, "grad_norm": 0.5767467021942139, "learning_rate": 8.706103820426393e-06, "loss": 0.0182, "step": 73620 }, { "epoch": 0.6217305946676237, "grad_norm": 0.3542725145816803, "learning_rate": 8.705609139878638e-06, "loss": 0.0158, "step": 73630 }, { "epoch": 0.6218150345147875, "grad_norm": 0.29297807812690735, "learning_rate": 8.705114378845812e-06, "loss": 0.0154, "step": 73640 }, { "epoch": 0.6218994743619514, "grad_norm": 0.5603965520858765, "learning_rate": 8.70461953733866e-06, "loss": 0.0101, "step": 73650 }, { "epoch": 0.6219839142091153, "grad_norm": 0.42839887738227844, "learning_rate": 8.704124615367934e-06, "loss": 0.0149, "step": 73660 }, { "epoch": 0.6220683540562791, "grad_norm": 0.16572609543800354, "learning_rate": 8.703629612944378e-06, "loss": 0.0114, "step": 73670 }, { "epoch": 0.622152793903443, "grad_norm": 0.22861133515834808, "learning_rate": 8.703134530078747e-06, "loss": 0.0112, "step": 73680 }, { "epoch": 0.6222372337506069, "grad_norm": 0.15238289535045624, "learning_rate": 8.702639366781792e-06, "loss": 0.0146, "step": 73690 }, { "epoch": 0.6223216735977708, "grad_norm": 0.27010977268218994, "learning_rate": 8.70214412306427e-06, "loss": 0.0078, "step": 73700 }, { "epoch": 0.6224061134449347, "grad_norm": 0.9755997061729431, "learning_rate": 8.701648798936934e-06, "loss": 0.015, "step": 73710 }, { "epoch": 0.6224905532920986, "grad_norm": 0.12792804837226868, "learning_rate": 8.701153394410545e-06, "loss": 0.0129, "step": 73720 }, { "epoch": 0.6225749931392625, "grad_norm": 0.19786885380744934, "learning_rate": 8.700657909495864e-06, "loss": 0.0082, "step": 73730 }, { "epoch": 0.6226594329864263, "grad_norm": 0.9932509660720825, "learning_rate": 8.70016234420365e-06, "loss": 0.0175, "step": 73740 }, { "epoch": 0.6227438728335901, "grad_norm": 0.2851891815662384, "learning_rate": 8.69966669854467e-06, "loss": 0.0103, "step": 73750 }, { "epoch": 0.622828312680754, "grad_norm": 0.3111479878425598, "learning_rate": 8.699170972529687e-06, "loss": 0.0144, "step": 73760 }, { "epoch": 0.6229127525279179, "grad_norm": 0.2197485715150833, "learning_rate": 8.698675166169469e-06, "loss": 0.0116, "step": 73770 }, { "epoch": 0.6229971923750818, "grad_norm": 0.20078864693641663, "learning_rate": 8.69817927947478e-06, "loss": 0.0084, "step": 73780 }, { "epoch": 0.6230816322222457, "grad_norm": 0.22339418530464172, "learning_rate": 8.6976833124564e-06, "loss": 0.0096, "step": 73790 }, { "epoch": 0.6231660720694095, "grad_norm": 0.37491604685783386, "learning_rate": 8.697187265125095e-06, "loss": 0.0182, "step": 73800 }, { "epoch": 0.6232505119165734, "grad_norm": 0.49111175537109375, "learning_rate": 8.69669113749164e-06, "loss": 0.0073, "step": 73810 }, { "epoch": 0.6233349517637373, "grad_norm": 0.3363785147666931, "learning_rate": 8.69619492956681e-06, "loss": 0.0081, "step": 73820 }, { "epoch": 0.6234193916109012, "grad_norm": 0.29118481278419495, "learning_rate": 8.695698641361384e-06, "loss": 0.0137, "step": 73830 }, { "epoch": 0.6235038314580651, "grad_norm": 0.19518215954303741, "learning_rate": 8.695202272886141e-06, "loss": 0.0097, "step": 73840 }, { "epoch": 0.623588271305229, "grad_norm": 0.6854243278503418, "learning_rate": 8.694705824151862e-06, "loss": 0.0161, "step": 73850 }, { "epoch": 0.6236727111523929, "grad_norm": 0.26276442408561707, "learning_rate": 8.694209295169329e-06, "loss": 0.0106, "step": 73860 }, { "epoch": 0.6237571509995566, "grad_norm": 0.7551456093788147, "learning_rate": 8.693712685949328e-06, "loss": 0.0113, "step": 73870 }, { "epoch": 0.6238415908467205, "grad_norm": 0.28559038043022156, "learning_rate": 8.693215996502644e-06, "loss": 0.0107, "step": 73880 }, { "epoch": 0.6239260306938844, "grad_norm": 0.16509440541267395, "learning_rate": 8.692719226840064e-06, "loss": 0.0142, "step": 73890 }, { "epoch": 0.6240104705410483, "grad_norm": 0.7571868896484375, "learning_rate": 8.69222237697238e-06, "loss": 0.0141, "step": 73900 }, { "epoch": 0.6240949103882122, "grad_norm": 0.19049634039402008, "learning_rate": 8.691725446910385e-06, "loss": 0.0151, "step": 73910 }, { "epoch": 0.6241793502353761, "grad_norm": 0.01109943725168705, "learning_rate": 8.691228436664868e-06, "loss": 0.0208, "step": 73920 }, { "epoch": 0.62426379008254, "grad_norm": 0.3787505030632019, "learning_rate": 8.690731346246626e-06, "loss": 0.0134, "step": 73930 }, { "epoch": 0.6243482299297038, "grad_norm": 0.6643309593200684, "learning_rate": 8.690234175666454e-06, "loss": 0.0164, "step": 73940 }, { "epoch": 0.6244326697768677, "grad_norm": 0.3766544461250305, "learning_rate": 8.689736924935155e-06, "loss": 0.0096, "step": 73950 }, { "epoch": 0.6245171096240316, "grad_norm": 0.05623707175254822, "learning_rate": 8.689239594063523e-06, "loss": 0.0105, "step": 73960 }, { "epoch": 0.6246015494711955, "grad_norm": 0.15622694790363312, "learning_rate": 8.688742183062363e-06, "loss": 0.0113, "step": 73970 }, { "epoch": 0.6246859893183593, "grad_norm": 0.32864105701446533, "learning_rate": 8.688244691942481e-06, "loss": 0.0237, "step": 73980 }, { "epoch": 0.6247704291655232, "grad_norm": 0.8589459657669067, "learning_rate": 8.687747120714679e-06, "loss": 0.0188, "step": 73990 }, { "epoch": 0.624854869012687, "grad_norm": 0.5789362788200378, "learning_rate": 8.687249469389765e-06, "loss": 0.0082, "step": 74000 }, { "epoch": 0.6249393088598509, "grad_norm": 0.4118853211402893, "learning_rate": 8.686751737978547e-06, "loss": 0.0159, "step": 74010 }, { "epoch": 0.6250237487070148, "grad_norm": 0.5181739926338196, "learning_rate": 8.686253926491839e-06, "loss": 0.0191, "step": 74020 }, { "epoch": 0.6251081885541787, "grad_norm": 0.3002920150756836, "learning_rate": 8.68575603494045e-06, "loss": 0.0105, "step": 74030 }, { "epoch": 0.6251926284013426, "grad_norm": 0.36927103996276855, "learning_rate": 8.685258063335196e-06, "loss": 0.0079, "step": 74040 }, { "epoch": 0.6252770682485065, "grad_norm": 0.2783825993537903, "learning_rate": 8.68476001168689e-06, "loss": 0.0101, "step": 74050 }, { "epoch": 0.6253615080956704, "grad_norm": 0.6318174004554749, "learning_rate": 8.684261880006353e-06, "loss": 0.0107, "step": 74060 }, { "epoch": 0.6254459479428343, "grad_norm": 0.2339031845331192, "learning_rate": 8.683763668304403e-06, "loss": 0.0149, "step": 74070 }, { "epoch": 0.6255303877899981, "grad_norm": 0.4909008741378784, "learning_rate": 8.683265376591861e-06, "loss": 0.0138, "step": 74080 }, { "epoch": 0.625614827637162, "grad_norm": 0.3840339481830597, "learning_rate": 8.68276700487955e-06, "loss": 0.0179, "step": 74090 }, { "epoch": 0.6256992674843258, "grad_norm": 0.12171858549118042, "learning_rate": 8.682268553178294e-06, "loss": 0.0113, "step": 74100 }, { "epoch": 0.6257837073314897, "grad_norm": 0.2822727560997009, "learning_rate": 8.68177002149892e-06, "loss": 0.0185, "step": 74110 }, { "epoch": 0.6258681471786536, "grad_norm": 0.645066499710083, "learning_rate": 8.681271409852255e-06, "loss": 0.0141, "step": 74120 }, { "epoch": 0.6259525870258175, "grad_norm": 0.31021320819854736, "learning_rate": 8.680772718249128e-06, "loss": 0.0148, "step": 74130 }, { "epoch": 0.6260370268729814, "grad_norm": 0.24342307448387146, "learning_rate": 8.680273946700375e-06, "loss": 0.0085, "step": 74140 }, { "epoch": 0.6261214667201452, "grad_norm": 0.40823400020599365, "learning_rate": 8.679775095216825e-06, "loss": 0.0211, "step": 74150 }, { "epoch": 0.6262059065673091, "grad_norm": 0.17058931291103363, "learning_rate": 8.679276163809312e-06, "loss": 0.0166, "step": 74160 }, { "epoch": 0.626290346414473, "grad_norm": 0.3352777659893036, "learning_rate": 8.678777152488676e-06, "loss": 0.0066, "step": 74170 }, { "epoch": 0.6263747862616369, "grad_norm": 0.72048020362854, "learning_rate": 8.678278061265755e-06, "loss": 0.017, "step": 74180 }, { "epoch": 0.6264592261088008, "grad_norm": 0.09310754388570786, "learning_rate": 8.677778890151386e-06, "loss": 0.0099, "step": 74190 }, { "epoch": 0.6265436659559647, "grad_norm": 0.1377962976694107, "learning_rate": 8.677279639156415e-06, "loss": 0.0144, "step": 74200 }, { "epoch": 0.6266281058031284, "grad_norm": 0.37989360094070435, "learning_rate": 8.676780308291683e-06, "loss": 0.0064, "step": 74210 }, { "epoch": 0.6267125456502923, "grad_norm": 0.012305783107876778, "learning_rate": 8.676280897568038e-06, "loss": 0.0065, "step": 74220 }, { "epoch": 0.6267969854974562, "grad_norm": 0.3439452648162842, "learning_rate": 8.675781406996323e-06, "loss": 0.0096, "step": 74230 }, { "epoch": 0.6268814253446201, "grad_norm": 0.26290640234947205, "learning_rate": 8.67528183658739e-06, "loss": 0.0092, "step": 74240 }, { "epoch": 0.626965865191784, "grad_norm": 0.7407910227775574, "learning_rate": 8.67478218635209e-06, "loss": 0.0101, "step": 74250 }, { "epoch": 0.6270503050389479, "grad_norm": 0.11190593242645264, "learning_rate": 8.67428245630127e-06, "loss": 0.0132, "step": 74260 }, { "epoch": 0.6271347448861118, "grad_norm": 0.14737477898597717, "learning_rate": 8.673782646445792e-06, "loss": 0.0128, "step": 74270 }, { "epoch": 0.6272191847332756, "grad_norm": 0.5605781674385071, "learning_rate": 8.673282756796506e-06, "loss": 0.0123, "step": 74280 }, { "epoch": 0.6273036245804395, "grad_norm": 0.6518559455871582, "learning_rate": 8.67278278736427e-06, "loss": 0.015, "step": 74290 }, { "epoch": 0.6273880644276034, "grad_norm": 0.19816097617149353, "learning_rate": 8.672282738159948e-06, "loss": 0.0145, "step": 74300 }, { "epoch": 0.6274725042747673, "grad_norm": 0.2643267810344696, "learning_rate": 8.671782609194395e-06, "loss": 0.0224, "step": 74310 }, { "epoch": 0.6275569441219312, "grad_norm": 0.4171772301197052, "learning_rate": 8.671282400478475e-06, "loss": 0.0148, "step": 74320 }, { "epoch": 0.627641383969095, "grad_norm": 0.05588724464178085, "learning_rate": 8.670782112023056e-06, "loss": 0.0117, "step": 74330 }, { "epoch": 0.6277258238162589, "grad_norm": 0.27521592378616333, "learning_rate": 8.670281743839e-06, "loss": 0.0094, "step": 74340 }, { "epoch": 0.6278102636634227, "grad_norm": 0.24599626660346985, "learning_rate": 8.669781295937178e-06, "loss": 0.0197, "step": 74350 }, { "epoch": 0.6278947035105866, "grad_norm": 0.329548180103302, "learning_rate": 8.669280768328459e-06, "loss": 0.0122, "step": 74360 }, { "epoch": 0.6279791433577505, "grad_norm": 0.2923431694507599, "learning_rate": 8.66878016102371e-06, "loss": 0.0176, "step": 74370 }, { "epoch": 0.6280635832049144, "grad_norm": 0.3686707615852356, "learning_rate": 8.668279474033812e-06, "loss": 0.0111, "step": 74380 }, { "epoch": 0.6281480230520783, "grad_norm": 0.29474765062332153, "learning_rate": 8.667778707369634e-06, "loss": 0.0104, "step": 74390 }, { "epoch": 0.6282324628992422, "grad_norm": 0.372380793094635, "learning_rate": 8.667277861042052e-06, "loss": 0.0107, "step": 74400 }, { "epoch": 0.6283169027464061, "grad_norm": 0.40131905674934387, "learning_rate": 8.666776935061948e-06, "loss": 0.0121, "step": 74410 }, { "epoch": 0.62840134259357, "grad_norm": 0.4201785922050476, "learning_rate": 8.6662759294402e-06, "loss": 0.0072, "step": 74420 }, { "epoch": 0.6284857824407338, "grad_norm": 0.5462507009506226, "learning_rate": 8.66577484418769e-06, "loss": 0.0134, "step": 74430 }, { "epoch": 0.6285702222878976, "grad_norm": 0.40704959630966187, "learning_rate": 8.665273679315302e-06, "loss": 0.0147, "step": 74440 }, { "epoch": 0.6286546621350615, "grad_norm": 0.1362554132938385, "learning_rate": 8.664772434833922e-06, "loss": 0.0073, "step": 74450 }, { "epoch": 0.6287391019822254, "grad_norm": 1.2895731925964355, "learning_rate": 8.664271110754433e-06, "loss": 0.0157, "step": 74460 }, { "epoch": 0.6288235418293893, "grad_norm": 0.1476215124130249, "learning_rate": 8.66376970708773e-06, "loss": 0.0137, "step": 74470 }, { "epoch": 0.6289079816765532, "grad_norm": 0.5940818786621094, "learning_rate": 8.663268223844697e-06, "loss": 0.0145, "step": 74480 }, { "epoch": 0.628992421523717, "grad_norm": 0.47295087575912476, "learning_rate": 8.66276666103623e-06, "loss": 0.0179, "step": 74490 }, { "epoch": 0.6290768613708809, "grad_norm": 0.055862486362457275, "learning_rate": 8.66226501867322e-06, "loss": 0.0095, "step": 74500 }, { "epoch": 0.6291613012180448, "grad_norm": 0.37231874465942383, "learning_rate": 8.661763296766566e-06, "loss": 0.0094, "step": 74510 }, { "epoch": 0.6292457410652087, "grad_norm": 0.7983134984970093, "learning_rate": 8.661261495327162e-06, "loss": 0.0195, "step": 74520 }, { "epoch": 0.6293301809123726, "grad_norm": 0.738778829574585, "learning_rate": 8.66075961436591e-06, "loss": 0.0153, "step": 74530 }, { "epoch": 0.6294146207595365, "grad_norm": 0.1511474847793579, "learning_rate": 8.660257653893709e-06, "loss": 0.008, "step": 74540 }, { "epoch": 0.6294990606067004, "grad_norm": 0.8982148766517639, "learning_rate": 8.65975561392146e-06, "loss": 0.0122, "step": 74550 }, { "epoch": 0.6295835004538641, "grad_norm": 0.2173059731721878, "learning_rate": 8.659253494460072e-06, "loss": 0.0152, "step": 74560 }, { "epoch": 0.629667940301028, "grad_norm": 1.070743203163147, "learning_rate": 8.658751295520446e-06, "loss": 0.0121, "step": 74570 }, { "epoch": 0.6297523801481919, "grad_norm": 0.4532868564128876, "learning_rate": 8.658249017113493e-06, "loss": 0.0095, "step": 74580 }, { "epoch": 0.6298368199953558, "grad_norm": 0.1820663958787918, "learning_rate": 8.657746659250121e-06, "loss": 0.0157, "step": 74590 }, { "epoch": 0.6299212598425197, "grad_norm": 0.4427812993526459, "learning_rate": 8.65724422194124e-06, "loss": 0.0154, "step": 74600 }, { "epoch": 0.6300056996896836, "grad_norm": 0.9949896335601807, "learning_rate": 8.656741705197765e-06, "loss": 0.011, "step": 74610 }, { "epoch": 0.6300901395368474, "grad_norm": 0.30285370349884033, "learning_rate": 8.656239109030608e-06, "loss": 0.0153, "step": 74620 }, { "epoch": 0.6301745793840113, "grad_norm": 0.26420462131500244, "learning_rate": 8.655736433450687e-06, "loss": 0.0084, "step": 74630 }, { "epoch": 0.6302590192311752, "grad_norm": 0.3560696840286255, "learning_rate": 8.655233678468922e-06, "loss": 0.0088, "step": 74640 }, { "epoch": 0.6303434590783391, "grad_norm": 0.8990529775619507, "learning_rate": 8.654730844096229e-06, "loss": 0.0144, "step": 74650 }, { "epoch": 0.630427898925503, "grad_norm": 0.2582317888736725, "learning_rate": 8.65422793034353e-06, "loss": 0.0177, "step": 74660 }, { "epoch": 0.6305123387726668, "grad_norm": 0.016615621745586395, "learning_rate": 8.65372493722175e-06, "loss": 0.0097, "step": 74670 }, { "epoch": 0.6305967786198307, "grad_norm": 0.21549928188323975, "learning_rate": 8.653221864741814e-06, "loss": 0.0152, "step": 74680 }, { "epoch": 0.6306812184669945, "grad_norm": 0.5357888340950012, "learning_rate": 8.652718712914647e-06, "loss": 0.0078, "step": 74690 }, { "epoch": 0.6307656583141584, "grad_norm": 0.24485774338245392, "learning_rate": 8.652215481751177e-06, "loss": 0.0102, "step": 74700 }, { "epoch": 0.6308500981613223, "grad_norm": 0.17494606971740723, "learning_rate": 8.651712171262337e-06, "loss": 0.0084, "step": 74710 }, { "epoch": 0.6309345380084862, "grad_norm": 0.55727618932724, "learning_rate": 8.651208781459054e-06, "loss": 0.0152, "step": 74720 }, { "epoch": 0.6310189778556501, "grad_norm": 0.1965043544769287, "learning_rate": 8.650705312352268e-06, "loss": 0.0108, "step": 74730 }, { "epoch": 0.631103417702814, "grad_norm": 0.1302873194217682, "learning_rate": 8.65020176395291e-06, "loss": 0.008, "step": 74740 }, { "epoch": 0.6311878575499779, "grad_norm": 0.3290797472000122, "learning_rate": 8.649698136271915e-06, "loss": 0.0111, "step": 74750 }, { "epoch": 0.6312722973971417, "grad_norm": 0.23369549214839935, "learning_rate": 8.649194429320227e-06, "loss": 0.0128, "step": 74760 }, { "epoch": 0.6313567372443056, "grad_norm": 0.19709347188472748, "learning_rate": 8.648690643108784e-06, "loss": 0.0125, "step": 74770 }, { "epoch": 0.6314411770914695, "grad_norm": 0.2872784435749054, "learning_rate": 8.648186777648526e-06, "loss": 0.0083, "step": 74780 }, { "epoch": 0.6315256169386333, "grad_norm": 0.05679522827267647, "learning_rate": 8.6476828329504e-06, "loss": 0.0213, "step": 74790 }, { "epoch": 0.6316100567857972, "grad_norm": 0.46919190883636475, "learning_rate": 8.647178809025351e-06, "loss": 0.0151, "step": 74800 }, { "epoch": 0.6316944966329611, "grad_norm": 0.571272075176239, "learning_rate": 8.646674705884324e-06, "loss": 0.0138, "step": 74810 }, { "epoch": 0.631778936480125, "grad_norm": 0.6557847857475281, "learning_rate": 8.646170523538271e-06, "loss": 0.0116, "step": 74820 }, { "epoch": 0.6318633763272888, "grad_norm": 0.282601922750473, "learning_rate": 8.64566626199814e-06, "loss": 0.0239, "step": 74830 }, { "epoch": 0.6319478161744527, "grad_norm": 0.34181004762649536, "learning_rate": 8.645161921274887e-06, "loss": 0.0142, "step": 74840 }, { "epoch": 0.6320322560216166, "grad_norm": 0.08589537441730499, "learning_rate": 8.644657501379462e-06, "loss": 0.0101, "step": 74850 }, { "epoch": 0.6321166958687805, "grad_norm": 0.38368403911590576, "learning_rate": 8.644153002322824e-06, "loss": 0.0128, "step": 74860 }, { "epoch": 0.6322011357159444, "grad_norm": 0.17282506823539734, "learning_rate": 8.643648424115928e-06, "loss": 0.0051, "step": 74870 }, { "epoch": 0.6322855755631083, "grad_norm": 0.5315201878547668, "learning_rate": 8.643143766769738e-06, "loss": 0.0164, "step": 74880 }, { "epoch": 0.6323700154102722, "grad_norm": 0.28554025292396545, "learning_rate": 8.642639030295208e-06, "loss": 0.0167, "step": 74890 }, { "epoch": 0.6324544552574359, "grad_norm": 0.41468527913093567, "learning_rate": 8.642134214703309e-06, "loss": 0.0164, "step": 74900 }, { "epoch": 0.6325388951045998, "grad_norm": 0.728487491607666, "learning_rate": 8.641629320004998e-06, "loss": 0.0123, "step": 74910 }, { "epoch": 0.6326233349517637, "grad_norm": 0.4962255358695984, "learning_rate": 8.641124346211244e-06, "loss": 0.0117, "step": 74920 }, { "epoch": 0.6327077747989276, "grad_norm": 0.3303067982196808, "learning_rate": 8.640619293333017e-06, "loss": 0.0138, "step": 74930 }, { "epoch": 0.6327922146460915, "grad_norm": 0.28426793217658997, "learning_rate": 8.640114161381285e-06, "loss": 0.0075, "step": 74940 }, { "epoch": 0.6328766544932554, "grad_norm": 0.28143757581710815, "learning_rate": 8.63960895036702e-06, "loss": 0.0084, "step": 74950 }, { "epoch": 0.6329610943404193, "grad_norm": 0.3318357765674591, "learning_rate": 8.639103660301193e-06, "loss": 0.0075, "step": 74960 }, { "epoch": 0.6330455341875831, "grad_norm": 0.14630413055419922, "learning_rate": 8.63859829119478e-06, "loss": 0.0127, "step": 74970 }, { "epoch": 0.633129974034747, "grad_norm": 0.864351212978363, "learning_rate": 8.638092843058758e-06, "loss": 0.0098, "step": 74980 }, { "epoch": 0.6332144138819109, "grad_norm": 0.6175723075866699, "learning_rate": 8.637587315904105e-06, "loss": 0.0115, "step": 74990 }, { "epoch": 0.6332988537290748, "grad_norm": 0.24519844353199005, "learning_rate": 8.637081709741802e-06, "loss": 0.0136, "step": 75000 }, { "epoch": 0.6333832935762387, "grad_norm": 0.6432893872261047, "learning_rate": 8.63657602458283e-06, "loss": 0.0127, "step": 75010 }, { "epoch": 0.6334677334234025, "grad_norm": 0.22938832640647888, "learning_rate": 8.63607026043817e-06, "loss": 0.0101, "step": 75020 }, { "epoch": 0.6335521732705663, "grad_norm": 0.30686408281326294, "learning_rate": 8.635564417318809e-06, "loss": 0.0103, "step": 75030 }, { "epoch": 0.6336366131177302, "grad_norm": 0.07482054084539413, "learning_rate": 8.635058495235736e-06, "loss": 0.0083, "step": 75040 }, { "epoch": 0.6337210529648941, "grad_norm": 0.4644741415977478, "learning_rate": 8.634552494199935e-06, "loss": 0.0137, "step": 75050 }, { "epoch": 0.633805492812058, "grad_norm": 0.667123019695282, "learning_rate": 8.634046414222401e-06, "loss": 0.0246, "step": 75060 }, { "epoch": 0.6338899326592219, "grad_norm": 0.2967767119407654, "learning_rate": 8.633540255314122e-06, "loss": 0.0072, "step": 75070 }, { "epoch": 0.6339743725063858, "grad_norm": 0.5057099461555481, "learning_rate": 8.633034017486092e-06, "loss": 0.0142, "step": 75080 }, { "epoch": 0.6340588123535497, "grad_norm": 0.20268034934997559, "learning_rate": 8.63252770074931e-06, "loss": 0.0135, "step": 75090 }, { "epoch": 0.6341432522007135, "grad_norm": 0.3282199203968048, "learning_rate": 8.63202130511477e-06, "loss": 0.0071, "step": 75100 }, { "epoch": 0.6342276920478774, "grad_norm": 0.36176756024360657, "learning_rate": 8.631514830593471e-06, "loss": 0.0088, "step": 75110 }, { "epoch": 0.6343121318950413, "grad_norm": 0.6559303998947144, "learning_rate": 8.631008277196415e-06, "loss": 0.0104, "step": 75120 }, { "epoch": 0.6343965717422051, "grad_norm": 0.34155866503715515, "learning_rate": 8.630501644934604e-06, "loss": 0.0153, "step": 75130 }, { "epoch": 0.634481011589369, "grad_norm": 0.269287109375, "learning_rate": 8.629994933819039e-06, "loss": 0.0144, "step": 75140 }, { "epoch": 0.6345654514365329, "grad_norm": 0.6446939706802368, "learning_rate": 8.629488143860728e-06, "loss": 0.0127, "step": 75150 }, { "epoch": 0.6346498912836968, "grad_norm": 0.5232512950897217, "learning_rate": 8.62898127507068e-06, "loss": 0.0132, "step": 75160 }, { "epoch": 0.6347343311308606, "grad_norm": 0.260099858045578, "learning_rate": 8.628474327459901e-06, "loss": 0.0143, "step": 75170 }, { "epoch": 0.6348187709780245, "grad_norm": 0.27348870038986206, "learning_rate": 8.627967301039403e-06, "loss": 0.0099, "step": 75180 }, { "epoch": 0.6349032108251884, "grad_norm": 0.4771726429462433, "learning_rate": 8.6274601958202e-06, "loss": 0.0102, "step": 75190 }, { "epoch": 0.6349876506723523, "grad_norm": 0.5213075876235962, "learning_rate": 8.626953011813305e-06, "loss": 0.0109, "step": 75200 }, { "epoch": 0.6350720905195162, "grad_norm": 0.3835291862487793, "learning_rate": 8.626445749029733e-06, "loss": 0.0121, "step": 75210 }, { "epoch": 0.6351565303666801, "grad_norm": 0.24034759402275085, "learning_rate": 8.625938407480502e-06, "loss": 0.0114, "step": 75220 }, { "epoch": 0.635240970213844, "grad_norm": 0.4801008701324463, "learning_rate": 8.625430987176631e-06, "loss": 0.0138, "step": 75230 }, { "epoch": 0.6353254100610078, "grad_norm": 0.5097364783287048, "learning_rate": 8.624923488129144e-06, "loss": 0.0076, "step": 75240 }, { "epoch": 0.6354098499081716, "grad_norm": 0.6316417455673218, "learning_rate": 8.62441591034906e-06, "loss": 0.0158, "step": 75250 }, { "epoch": 0.6354942897553355, "grad_norm": 0.32649922370910645, "learning_rate": 8.623908253847406e-06, "loss": 0.0105, "step": 75260 }, { "epoch": 0.6355787296024994, "grad_norm": 0.15075573325157166, "learning_rate": 8.623400518635209e-06, "loss": 0.0141, "step": 75270 }, { "epoch": 0.6356631694496633, "grad_norm": 0.39857324957847595, "learning_rate": 8.622892704723494e-06, "loss": 0.0124, "step": 75280 }, { "epoch": 0.6357476092968272, "grad_norm": 0.3515797555446625, "learning_rate": 8.62238481212329e-06, "loss": 0.0156, "step": 75290 }, { "epoch": 0.635832049143991, "grad_norm": 0.25849097967147827, "learning_rate": 8.621876840845631e-06, "loss": 0.0101, "step": 75300 }, { "epoch": 0.6359164889911549, "grad_norm": 0.24054771661758423, "learning_rate": 8.62136879090155e-06, "loss": 0.0096, "step": 75310 }, { "epoch": 0.6360009288383188, "grad_norm": 0.2309863269329071, "learning_rate": 8.620860662302081e-06, "loss": 0.0136, "step": 75320 }, { "epoch": 0.6360853686854827, "grad_norm": 0.1961834579706192, "learning_rate": 8.62035245505826e-06, "loss": 0.0183, "step": 75330 }, { "epoch": 0.6361698085326466, "grad_norm": 0.0785365179181099, "learning_rate": 8.619844169181127e-06, "loss": 0.0088, "step": 75340 }, { "epoch": 0.6362542483798105, "grad_norm": 0.681911051273346, "learning_rate": 8.619335804681716e-06, "loss": 0.0153, "step": 75350 }, { "epoch": 0.6363386882269743, "grad_norm": 0.23532094061374664, "learning_rate": 8.618827361571079e-06, "loss": 0.0105, "step": 75360 }, { "epoch": 0.6364231280741381, "grad_norm": 0.2926280200481415, "learning_rate": 8.61831883986025e-06, "loss": 0.0153, "step": 75370 }, { "epoch": 0.636507567921302, "grad_norm": 0.31366029381752014, "learning_rate": 8.617810239560275e-06, "loss": 0.0121, "step": 75380 }, { "epoch": 0.6365920077684659, "grad_norm": 0.11288069188594818, "learning_rate": 8.617301560682208e-06, "loss": 0.0127, "step": 75390 }, { "epoch": 0.6366764476156298, "grad_norm": 0.8618664145469666, "learning_rate": 8.616792803237088e-06, "loss": 0.0137, "step": 75400 }, { "epoch": 0.6367608874627937, "grad_norm": 0.12891757488250732, "learning_rate": 8.616283967235971e-06, "loss": 0.0127, "step": 75410 }, { "epoch": 0.6368453273099576, "grad_norm": 0.1565598100423813, "learning_rate": 8.61577505268991e-06, "loss": 0.0133, "step": 75420 }, { "epoch": 0.6369297671571215, "grad_norm": 0.25852227210998535, "learning_rate": 8.615266059609953e-06, "loss": 0.0304, "step": 75430 }, { "epoch": 0.6370142070042853, "grad_norm": 0.27263516187667847, "learning_rate": 8.614756988007159e-06, "loss": 0.0082, "step": 75440 }, { "epoch": 0.6370986468514492, "grad_norm": 0.9609626531600952, "learning_rate": 8.614247837892582e-06, "loss": 0.0158, "step": 75450 }, { "epoch": 0.6371830866986131, "grad_norm": 0.28885987401008606, "learning_rate": 8.613738609277285e-06, "loss": 0.0217, "step": 75460 }, { "epoch": 0.6372675265457769, "grad_norm": 0.660135805606842, "learning_rate": 8.613229302172323e-06, "loss": 0.009, "step": 75470 }, { "epoch": 0.6373519663929408, "grad_norm": 0.2057383805513382, "learning_rate": 8.612719916588764e-06, "loss": 0.0094, "step": 75480 }, { "epoch": 0.6374364062401047, "grad_norm": 0.35921552777290344, "learning_rate": 8.612210452537668e-06, "loss": 0.012, "step": 75490 }, { "epoch": 0.6375208460872686, "grad_norm": 0.7394030690193176, "learning_rate": 8.6117009100301e-06, "loss": 0.0176, "step": 75500 }, { "epoch": 0.6376052859344324, "grad_norm": 0.08421708643436432, "learning_rate": 8.61119128907713e-06, "loss": 0.0074, "step": 75510 }, { "epoch": 0.6376897257815963, "grad_norm": 0.3289808928966522, "learning_rate": 8.610681589689825e-06, "loss": 0.0109, "step": 75520 }, { "epoch": 0.6377741656287602, "grad_norm": 0.012364687398076057, "learning_rate": 8.610171811879254e-06, "loss": 0.0197, "step": 75530 }, { "epoch": 0.6378586054759241, "grad_norm": 0.3024140000343323, "learning_rate": 8.609661955656491e-06, "loss": 0.0117, "step": 75540 }, { "epoch": 0.637943045323088, "grad_norm": 0.3267548978328705, "learning_rate": 8.609152021032612e-06, "loss": 0.0097, "step": 75550 }, { "epoch": 0.6380274851702519, "grad_norm": 0.7717544436454773, "learning_rate": 8.60864200801869e-06, "loss": 0.0158, "step": 75560 }, { "epoch": 0.6381119250174158, "grad_norm": 0.4315796196460724, "learning_rate": 8.608131916625804e-06, "loss": 0.0163, "step": 75570 }, { "epoch": 0.6381963648645796, "grad_norm": 0.20598359405994415, "learning_rate": 8.607621746865031e-06, "loss": 0.0144, "step": 75580 }, { "epoch": 0.6382808047117434, "grad_norm": 0.3638256788253784, "learning_rate": 8.607111498747453e-06, "loss": 0.0129, "step": 75590 }, { "epoch": 0.6383652445589073, "grad_norm": 0.3467789590358734, "learning_rate": 8.606601172284153e-06, "loss": 0.0079, "step": 75600 }, { "epoch": 0.6384496844060712, "grad_norm": 0.20596244931221008, "learning_rate": 8.606090767486213e-06, "loss": 0.0144, "step": 75610 }, { "epoch": 0.6385341242532351, "grad_norm": 0.339192271232605, "learning_rate": 8.60558028436472e-06, "loss": 0.0148, "step": 75620 }, { "epoch": 0.638618564100399, "grad_norm": 0.4692637324333191, "learning_rate": 8.605069722930766e-06, "loss": 0.015, "step": 75630 }, { "epoch": 0.6387030039475629, "grad_norm": 0.18485774099826813, "learning_rate": 8.604559083195434e-06, "loss": 0.028, "step": 75640 }, { "epoch": 0.6387874437947267, "grad_norm": 0.3925078213214874, "learning_rate": 8.604048365169818e-06, "loss": 0.0093, "step": 75650 }, { "epoch": 0.6388718836418906, "grad_norm": 0.16928528249263763, "learning_rate": 8.603537568865008e-06, "loss": 0.0154, "step": 75660 }, { "epoch": 0.6389563234890545, "grad_norm": 0.18139582872390747, "learning_rate": 8.603026694292102e-06, "loss": 0.0094, "step": 75670 }, { "epoch": 0.6390407633362184, "grad_norm": 0.20791949331760406, "learning_rate": 8.602515741462193e-06, "loss": 0.0074, "step": 75680 }, { "epoch": 0.6391252031833823, "grad_norm": 0.23768362402915955, "learning_rate": 8.602004710386383e-06, "loss": 0.0173, "step": 75690 }, { "epoch": 0.6392096430305461, "grad_norm": 0.16681228578090668, "learning_rate": 8.601493601075768e-06, "loss": 0.0102, "step": 75700 }, { "epoch": 0.63929408287771, "grad_norm": 0.30735987424850464, "learning_rate": 8.600982413541448e-06, "loss": 0.0124, "step": 75710 }, { "epoch": 0.6393785227248738, "grad_norm": 0.22831788659095764, "learning_rate": 8.600471147794531e-06, "loss": 0.0086, "step": 75720 }, { "epoch": 0.6394629625720377, "grad_norm": 0.17398469150066376, "learning_rate": 8.599959803846117e-06, "loss": 0.018, "step": 75730 }, { "epoch": 0.6395474024192016, "grad_norm": 0.09767181426286697, "learning_rate": 8.599448381707313e-06, "loss": 0.012, "step": 75740 }, { "epoch": 0.6396318422663655, "grad_norm": 0.22124430537223816, "learning_rate": 8.598936881389229e-06, "loss": 0.0133, "step": 75750 }, { "epoch": 0.6397162821135294, "grad_norm": 0.13861894607543945, "learning_rate": 8.598425302902972e-06, "loss": 0.0093, "step": 75760 }, { "epoch": 0.6398007219606933, "grad_norm": 0.6241310238838196, "learning_rate": 8.597913646259655e-06, "loss": 0.0136, "step": 75770 }, { "epoch": 0.6398851618078572, "grad_norm": 0.10140445083379745, "learning_rate": 8.597401911470392e-06, "loss": 0.0105, "step": 75780 }, { "epoch": 0.639969601655021, "grad_norm": 0.32570284605026245, "learning_rate": 8.596890098546295e-06, "loss": 0.0154, "step": 75790 }, { "epoch": 0.6400540415021849, "grad_norm": 0.049251165241003036, "learning_rate": 8.596378207498484e-06, "loss": 0.0206, "step": 75800 }, { "epoch": 0.6401384813493488, "grad_norm": 0.17854048311710358, "learning_rate": 8.595866238338075e-06, "loss": 0.0084, "step": 75810 }, { "epoch": 0.6402229211965126, "grad_norm": 0.30258622765541077, "learning_rate": 8.59535419107619e-06, "loss": 0.0085, "step": 75820 }, { "epoch": 0.6403073610436765, "grad_norm": 0.5392104387283325, "learning_rate": 8.594842065723945e-06, "loss": 0.0078, "step": 75830 }, { "epoch": 0.6403918008908404, "grad_norm": 0.7921459078788757, "learning_rate": 8.594329862292467e-06, "loss": 0.0192, "step": 75840 }, { "epoch": 0.6404762407380042, "grad_norm": 0.12795419991016388, "learning_rate": 8.593817580792883e-06, "loss": 0.0076, "step": 75850 }, { "epoch": 0.6405606805851681, "grad_norm": 0.8987675309181213, "learning_rate": 8.593305221236317e-06, "loss": 0.0127, "step": 75860 }, { "epoch": 0.640645120432332, "grad_norm": 0.09422005712985992, "learning_rate": 8.592792783633898e-06, "loss": 0.0113, "step": 75870 }, { "epoch": 0.6407295602794959, "grad_norm": 0.16904614865779877, "learning_rate": 8.592280267996755e-06, "loss": 0.0128, "step": 75880 }, { "epoch": 0.6408140001266598, "grad_norm": 0.07012403011322021, "learning_rate": 8.591767674336022e-06, "loss": 0.0057, "step": 75890 }, { "epoch": 0.6408984399738237, "grad_norm": 1.2108129262924194, "learning_rate": 8.59125500266283e-06, "loss": 0.009, "step": 75900 }, { "epoch": 0.6409828798209876, "grad_norm": 0.4952234923839569, "learning_rate": 8.590742252988318e-06, "loss": 0.0169, "step": 75910 }, { "epoch": 0.6410673196681514, "grad_norm": 0.6974268555641174, "learning_rate": 8.590229425323617e-06, "loss": 0.0102, "step": 75920 }, { "epoch": 0.6411517595153152, "grad_norm": 0.24518123269081116, "learning_rate": 8.589716519679869e-06, "loss": 0.0117, "step": 75930 }, { "epoch": 0.6412361993624791, "grad_norm": 0.14202405512332916, "learning_rate": 8.589203536068215e-06, "loss": 0.0121, "step": 75940 }, { "epoch": 0.641320639209643, "grad_norm": 0.07503083348274231, "learning_rate": 8.588690474499797e-06, "loss": 0.0102, "step": 75950 }, { "epoch": 0.6414050790568069, "grad_norm": 0.5689702033996582, "learning_rate": 8.588177334985754e-06, "loss": 0.0171, "step": 75960 }, { "epoch": 0.6414895189039708, "grad_norm": 0.4133727252483368, "learning_rate": 8.587664117537235e-06, "loss": 0.0136, "step": 75970 }, { "epoch": 0.6415739587511347, "grad_norm": 0.28515031933784485, "learning_rate": 8.58715082216539e-06, "loss": 0.0119, "step": 75980 }, { "epoch": 0.6416583985982985, "grad_norm": 0.31412357091903687, "learning_rate": 8.586637448881361e-06, "loss": 0.0076, "step": 75990 }, { "epoch": 0.6417428384454624, "grad_norm": 0.25597813725471497, "learning_rate": 8.586123997696302e-06, "loss": 0.0108, "step": 76000 }, { "epoch": 0.6418272782926263, "grad_norm": 0.29040274024009705, "learning_rate": 8.585610468621365e-06, "loss": 0.0127, "step": 76010 }, { "epoch": 0.6419117181397902, "grad_norm": 0.22373349964618683, "learning_rate": 8.585096861667704e-06, "loss": 0.0118, "step": 76020 }, { "epoch": 0.6419961579869541, "grad_norm": 0.051738739013671875, "learning_rate": 8.584583176846472e-06, "loss": 0.0102, "step": 76030 }, { "epoch": 0.642080597834118, "grad_norm": 0.36291056871414185, "learning_rate": 8.58406941416883e-06, "loss": 0.0076, "step": 76040 }, { "epoch": 0.6421650376812817, "grad_norm": 0.8354829549789429, "learning_rate": 8.583555573645934e-06, "loss": 0.0128, "step": 76050 }, { "epoch": 0.6422494775284456, "grad_norm": 0.26297539472579956, "learning_rate": 8.583041655288945e-06, "loss": 0.0101, "step": 76060 }, { "epoch": 0.6423339173756095, "grad_norm": 0.1722404956817627, "learning_rate": 8.582527659109025e-06, "loss": 0.0109, "step": 76070 }, { "epoch": 0.6424183572227734, "grad_norm": 0.16988927125930786, "learning_rate": 8.582013585117338e-06, "loss": 0.0132, "step": 76080 }, { "epoch": 0.6425027970699373, "grad_norm": 0.31602877378463745, "learning_rate": 8.581499433325052e-06, "loss": 0.0122, "step": 76090 }, { "epoch": 0.6425872369171012, "grad_norm": 0.336446613073349, "learning_rate": 8.580985203743334e-06, "loss": 0.0095, "step": 76100 }, { "epoch": 0.6426716767642651, "grad_norm": 0.13210532069206238, "learning_rate": 8.580470896383347e-06, "loss": 0.0091, "step": 76110 }, { "epoch": 0.642756116611429, "grad_norm": 1.2542331218719482, "learning_rate": 8.579956511256267e-06, "loss": 0.0062, "step": 76120 }, { "epoch": 0.6428405564585928, "grad_norm": 0.6024127006530762, "learning_rate": 8.579442048373265e-06, "loss": 0.0143, "step": 76130 }, { "epoch": 0.6429249963057567, "grad_norm": 0.5032970309257507, "learning_rate": 8.578927507745515e-06, "loss": 0.0117, "step": 76140 }, { "epoch": 0.6430094361529206, "grad_norm": 0.3756119906902313, "learning_rate": 8.578412889384193e-06, "loss": 0.0078, "step": 76150 }, { "epoch": 0.6430938760000844, "grad_norm": 0.0277412086725235, "learning_rate": 8.577898193300478e-06, "loss": 0.0187, "step": 76160 }, { "epoch": 0.6431783158472483, "grad_norm": 0.7051587700843811, "learning_rate": 8.577383419505548e-06, "loss": 0.0224, "step": 76170 }, { "epoch": 0.6432627556944122, "grad_norm": 0.4919177293777466, "learning_rate": 8.57686856801058e-06, "loss": 0.0152, "step": 76180 }, { "epoch": 0.643347195541576, "grad_norm": 0.4375656545162201, "learning_rate": 8.576353638826761e-06, "loss": 0.0145, "step": 76190 }, { "epoch": 0.6434316353887399, "grad_norm": 0.1672947108745575, "learning_rate": 8.575838631965274e-06, "loss": 0.011, "step": 76200 }, { "epoch": 0.6435160752359038, "grad_norm": 0.6750337481498718, "learning_rate": 8.575323547437304e-06, "loss": 0.0134, "step": 76210 }, { "epoch": 0.6436005150830677, "grad_norm": 0.2829844057559967, "learning_rate": 8.57480838525404e-06, "loss": 0.011, "step": 76220 }, { "epoch": 0.6436849549302316, "grad_norm": 0.5428634881973267, "learning_rate": 8.574293145426671e-06, "loss": 0.0101, "step": 76230 }, { "epoch": 0.6437693947773955, "grad_norm": 0.7490507364273071, "learning_rate": 8.573777827966388e-06, "loss": 0.0149, "step": 76240 }, { "epoch": 0.6438538346245594, "grad_norm": 0.7021929025650024, "learning_rate": 8.573262432884381e-06, "loss": 0.0153, "step": 76250 }, { "epoch": 0.6439382744717232, "grad_norm": 0.5644338726997375, "learning_rate": 8.572746960191847e-06, "loss": 0.0155, "step": 76260 }, { "epoch": 0.6440227143188871, "grad_norm": 0.0984681025147438, "learning_rate": 8.57223140989998e-06, "loss": 0.0122, "step": 76270 }, { "epoch": 0.6441071541660509, "grad_norm": 0.302980899810791, "learning_rate": 8.571715782019981e-06, "loss": 0.0072, "step": 76280 }, { "epoch": 0.6441915940132148, "grad_norm": 0.26993778347969055, "learning_rate": 8.571200076563044e-06, "loss": 0.0156, "step": 76290 }, { "epoch": 0.6442760338603787, "grad_norm": 1.697380781173706, "learning_rate": 8.570684293540376e-06, "loss": 0.012, "step": 76300 }, { "epoch": 0.6443604737075426, "grad_norm": 0.43234094977378845, "learning_rate": 8.570168432963175e-06, "loss": 0.0103, "step": 76310 }, { "epoch": 0.6444449135547065, "grad_norm": 0.6751847863197327, "learning_rate": 8.569652494842649e-06, "loss": 0.0207, "step": 76320 }, { "epoch": 0.6445293534018703, "grad_norm": 0.33020666241645813, "learning_rate": 8.569136479190002e-06, "loss": 0.0122, "step": 76330 }, { "epoch": 0.6446137932490342, "grad_norm": 0.313100129365921, "learning_rate": 8.56862038601644e-06, "loss": 0.0092, "step": 76340 }, { "epoch": 0.6446982330961981, "grad_norm": 0.35707730054855347, "learning_rate": 8.568104215333176e-06, "loss": 0.0148, "step": 76350 }, { "epoch": 0.644782672943362, "grad_norm": 0.3639475405216217, "learning_rate": 8.56758796715142e-06, "loss": 0.0111, "step": 76360 }, { "epoch": 0.6448671127905259, "grad_norm": 0.6194461584091187, "learning_rate": 8.567071641482383e-06, "loss": 0.0087, "step": 76370 }, { "epoch": 0.6449515526376898, "grad_norm": 0.2521495521068573, "learning_rate": 8.566555238337282e-06, "loss": 0.0103, "step": 76380 }, { "epoch": 0.6450359924848535, "grad_norm": 0.17738497257232666, "learning_rate": 8.566038757727332e-06, "loss": 0.0173, "step": 76390 }, { "epoch": 0.6451204323320174, "grad_norm": 0.47085681557655334, "learning_rate": 8.565522199663751e-06, "loss": 0.0118, "step": 76400 }, { "epoch": 0.6452048721791813, "grad_norm": 0.3023211359977722, "learning_rate": 8.565005564157759e-06, "loss": 0.0147, "step": 76410 }, { "epoch": 0.6452893120263452, "grad_norm": 0.4479489326477051, "learning_rate": 8.564488851220576e-06, "loss": 0.0115, "step": 76420 }, { "epoch": 0.6453737518735091, "grad_norm": 0.5901840925216675, "learning_rate": 8.563972060863425e-06, "loss": 0.0203, "step": 76430 }, { "epoch": 0.645458191720673, "grad_norm": 0.6121829152107239, "learning_rate": 8.563455193097532e-06, "loss": 0.0183, "step": 76440 }, { "epoch": 0.6455426315678369, "grad_norm": 0.34081852436065674, "learning_rate": 8.562938247934122e-06, "loss": 0.0134, "step": 76450 }, { "epoch": 0.6456270714150008, "grad_norm": 0.3039097189903259, "learning_rate": 8.562421225384424e-06, "loss": 0.0181, "step": 76460 }, { "epoch": 0.6457115112621646, "grad_norm": 0.5484497547149658, "learning_rate": 8.561904125459667e-06, "loss": 0.0125, "step": 76470 }, { "epoch": 0.6457959511093285, "grad_norm": 0.3879795968532562, "learning_rate": 8.561386948171082e-06, "loss": 0.0132, "step": 76480 }, { "epoch": 0.6458803909564924, "grad_norm": 0.31202301383018494, "learning_rate": 8.560869693529903e-06, "loss": 0.0069, "step": 76490 }, { "epoch": 0.6459648308036563, "grad_norm": 0.43593496084213257, "learning_rate": 8.560352361547361e-06, "loss": 0.0254, "step": 76500 }, { "epoch": 0.6460492706508201, "grad_norm": 0.007480957079678774, "learning_rate": 8.559834952234699e-06, "loss": 0.0065, "step": 76510 }, { "epoch": 0.646133710497984, "grad_norm": 0.19154255092144012, "learning_rate": 8.55931746560315e-06, "loss": 0.0164, "step": 76520 }, { "epoch": 0.6462181503451478, "grad_norm": 0.2468864917755127, "learning_rate": 8.558799901663954e-06, "loss": 0.0133, "step": 76530 }, { "epoch": 0.6463025901923117, "grad_norm": 0.12031616270542145, "learning_rate": 8.558282260428355e-06, "loss": 0.0079, "step": 76540 }, { "epoch": 0.6463870300394756, "grad_norm": 0.6621826887130737, "learning_rate": 8.557764541907591e-06, "loss": 0.0156, "step": 76550 }, { "epoch": 0.6464714698866395, "grad_norm": 0.38170355558395386, "learning_rate": 8.557246746112915e-06, "loss": 0.0121, "step": 76560 }, { "epoch": 0.6465559097338034, "grad_norm": 0.9346027970314026, "learning_rate": 8.556728873055565e-06, "loss": 0.0172, "step": 76570 }, { "epoch": 0.6466403495809673, "grad_norm": 0.749570369720459, "learning_rate": 8.556210922746795e-06, "loss": 0.0176, "step": 76580 }, { "epoch": 0.6467247894281312, "grad_norm": 0.16718359291553497, "learning_rate": 8.555692895197851e-06, "loss": 0.0123, "step": 76590 }, { "epoch": 0.646809229275295, "grad_norm": 0.032145168632268906, "learning_rate": 8.555174790419987e-06, "loss": 0.0138, "step": 76600 }, { "epoch": 0.6468936691224589, "grad_norm": 0.21018919348716736, "learning_rate": 8.554656608424455e-06, "loss": 0.0094, "step": 76610 }, { "epoch": 0.6469781089696227, "grad_norm": 0.5092268586158752, "learning_rate": 8.55413834922251e-06, "loss": 0.0103, "step": 76620 }, { "epoch": 0.6470625488167866, "grad_norm": 0.18715055286884308, "learning_rate": 8.553620012825407e-06, "loss": 0.0114, "step": 76630 }, { "epoch": 0.6471469886639505, "grad_norm": 0.592059314250946, "learning_rate": 8.553101599244406e-06, "loss": 0.0111, "step": 76640 }, { "epoch": 0.6472314285111144, "grad_norm": 0.001117699546739459, "learning_rate": 8.552583108490769e-06, "loss": 0.0124, "step": 76650 }, { "epoch": 0.6473158683582783, "grad_norm": 0.22425462305545807, "learning_rate": 8.552064540575752e-06, "loss": 0.0116, "step": 76660 }, { "epoch": 0.6474003082054421, "grad_norm": 0.2733466625213623, "learning_rate": 8.55154589551062e-06, "loss": 0.008, "step": 76670 }, { "epoch": 0.647484748052606, "grad_norm": 0.9552724361419678, "learning_rate": 8.551027173306641e-06, "loss": 0.01, "step": 76680 }, { "epoch": 0.6475691878997699, "grad_norm": 0.14522764086723328, "learning_rate": 8.55050837397508e-06, "loss": 0.0048, "step": 76690 }, { "epoch": 0.6476536277469338, "grad_norm": 0.12648607790470123, "learning_rate": 8.549989497527203e-06, "loss": 0.0168, "step": 76700 }, { "epoch": 0.6477380675940977, "grad_norm": 0.5669137835502625, "learning_rate": 8.549470543974282e-06, "loss": 0.0163, "step": 76710 }, { "epoch": 0.6478225074412616, "grad_norm": 0.2833728492259979, "learning_rate": 8.54895151332759e-06, "loss": 0.0101, "step": 76720 }, { "epoch": 0.6479069472884255, "grad_norm": 0.52168869972229, "learning_rate": 8.548432405598396e-06, "loss": 0.0129, "step": 76730 }, { "epoch": 0.6479913871355892, "grad_norm": 0.4949534833431244, "learning_rate": 8.547913220797978e-06, "loss": 0.0206, "step": 76740 }, { "epoch": 0.6480758269827531, "grad_norm": 0.27513232827186584, "learning_rate": 8.547393958937614e-06, "loss": 0.0108, "step": 76750 }, { "epoch": 0.648160266829917, "grad_norm": 0.3373427391052246, "learning_rate": 8.546874620028578e-06, "loss": 0.0117, "step": 76760 }, { "epoch": 0.6482447066770809, "grad_norm": 0.20040825009346008, "learning_rate": 8.546355204082152e-06, "loss": 0.0179, "step": 76770 }, { "epoch": 0.6483291465242448, "grad_norm": 0.4876895546913147, "learning_rate": 8.545835711109619e-06, "loss": 0.0224, "step": 76780 }, { "epoch": 0.6484135863714087, "grad_norm": 0.28110817074775696, "learning_rate": 8.545316141122258e-06, "loss": 0.0121, "step": 76790 }, { "epoch": 0.6484980262185726, "grad_norm": 0.14338886737823486, "learning_rate": 8.54479649413136e-06, "loss": 0.0122, "step": 76800 }, { "epoch": 0.6485824660657364, "grad_norm": 0.2186933159828186, "learning_rate": 8.544276770148207e-06, "loss": 0.0097, "step": 76810 }, { "epoch": 0.6486669059129003, "grad_norm": 0.2908993661403656, "learning_rate": 8.54375696918409e-06, "loss": 0.0097, "step": 76820 }, { "epoch": 0.6487513457600642, "grad_norm": 0.6332127451896667, "learning_rate": 8.543237091250296e-06, "loss": 0.0215, "step": 76830 }, { "epoch": 0.6488357856072281, "grad_norm": 0.6516956090927124, "learning_rate": 8.54271713635812e-06, "loss": 0.0108, "step": 76840 }, { "epoch": 0.6489202254543919, "grad_norm": 0.012878618203103542, "learning_rate": 8.542197104518854e-06, "loss": 0.0077, "step": 76850 }, { "epoch": 0.6490046653015558, "grad_norm": 0.4519331455230713, "learning_rate": 8.54167699574379e-06, "loss": 0.0105, "step": 76860 }, { "epoch": 0.6490891051487196, "grad_norm": 0.1642780900001526, "learning_rate": 8.541156810044232e-06, "loss": 0.0092, "step": 76870 }, { "epoch": 0.6491735449958835, "grad_norm": 0.5771197080612183, "learning_rate": 8.54063654743147e-06, "loss": 0.018, "step": 76880 }, { "epoch": 0.6492579848430474, "grad_norm": 0.31346940994262695, "learning_rate": 8.540116207916809e-06, "loss": 0.0142, "step": 76890 }, { "epoch": 0.6493424246902113, "grad_norm": 0.456737756729126, "learning_rate": 8.539595791511549e-06, "loss": 0.0078, "step": 76900 }, { "epoch": 0.6494268645373752, "grad_norm": 0.4846169352531433, "learning_rate": 8.539075298226995e-06, "loss": 0.0062, "step": 76910 }, { "epoch": 0.6495113043845391, "grad_norm": 0.45066940784454346, "learning_rate": 8.538554728074448e-06, "loss": 0.0136, "step": 76920 }, { "epoch": 0.649595744231703, "grad_norm": 0.4509386420249939, "learning_rate": 8.53803408106522e-06, "loss": 0.0185, "step": 76930 }, { "epoch": 0.6496801840788669, "grad_norm": 0.9534650444984436, "learning_rate": 8.537513357210616e-06, "loss": 0.0192, "step": 76940 }, { "epoch": 0.6497646239260307, "grad_norm": 0.5209226608276367, "learning_rate": 8.536992556521949e-06, "loss": 0.0074, "step": 76950 }, { "epoch": 0.6498490637731946, "grad_norm": 0.04836690425872803, "learning_rate": 8.536471679010526e-06, "loss": 0.0188, "step": 76960 }, { "epoch": 0.6499335036203584, "grad_norm": 0.11127378791570663, "learning_rate": 8.535950724687663e-06, "loss": 0.0089, "step": 76970 }, { "epoch": 0.6500179434675223, "grad_norm": 0.3207826018333435, "learning_rate": 8.535429693564674e-06, "loss": 0.0192, "step": 76980 }, { "epoch": 0.6501023833146862, "grad_norm": 0.035672299563884735, "learning_rate": 8.534908585652876e-06, "loss": 0.0139, "step": 76990 }, { "epoch": 0.6501868231618501, "grad_norm": 0.4179764986038208, "learning_rate": 8.534387400963591e-06, "loss": 0.0126, "step": 77000 }, { "epoch": 0.650271263009014, "grad_norm": 0.3842274248600006, "learning_rate": 8.533866139508134e-06, "loss": 0.015, "step": 77010 }, { "epoch": 0.6503557028561778, "grad_norm": 0.3382124900817871, "learning_rate": 8.533344801297829e-06, "loss": 0.0193, "step": 77020 }, { "epoch": 0.6504401427033417, "grad_norm": 0.18758545815944672, "learning_rate": 8.532823386344e-06, "loss": 0.0091, "step": 77030 }, { "epoch": 0.6505245825505056, "grad_norm": 0.44008567929267883, "learning_rate": 8.53230189465797e-06, "loss": 0.0092, "step": 77040 }, { "epoch": 0.6506090223976695, "grad_norm": 0.38325953483581543, "learning_rate": 8.531780326251066e-06, "loss": 0.0113, "step": 77050 }, { "epoch": 0.6506934622448334, "grad_norm": 0.231769397854805, "learning_rate": 8.531258681134618e-06, "loss": 0.0102, "step": 77060 }, { "epoch": 0.6507779020919973, "grad_norm": 0.40486064553260803, "learning_rate": 8.530736959319954e-06, "loss": 0.0089, "step": 77070 }, { "epoch": 0.650862341939161, "grad_norm": 0.13801506161689758, "learning_rate": 8.530215160818407e-06, "loss": 0.009, "step": 77080 }, { "epoch": 0.6509467817863249, "grad_norm": 0.24581794440746307, "learning_rate": 8.529693285641312e-06, "loss": 0.0143, "step": 77090 }, { "epoch": 0.6510312216334888, "grad_norm": 0.24976232647895813, "learning_rate": 8.5291713338e-06, "loss": 0.0101, "step": 77100 }, { "epoch": 0.6511156614806527, "grad_norm": 0.4621851444244385, "learning_rate": 8.52864930530581e-06, "loss": 0.0082, "step": 77110 }, { "epoch": 0.6512001013278166, "grad_norm": 0.5488687753677368, "learning_rate": 8.52812720017008e-06, "loss": 0.0126, "step": 77120 }, { "epoch": 0.6512845411749805, "grad_norm": 0.30572545528411865, "learning_rate": 8.527605018404152e-06, "loss": 0.017, "step": 77130 }, { "epoch": 0.6513689810221444, "grad_norm": 0.4040132462978363, "learning_rate": 8.527082760019366e-06, "loss": 0.0139, "step": 77140 }, { "epoch": 0.6514534208693082, "grad_norm": 0.1999220997095108, "learning_rate": 8.526560425027063e-06, "loss": 0.0147, "step": 77150 }, { "epoch": 0.6515378607164721, "grad_norm": 0.1016671359539032, "learning_rate": 8.526038013438591e-06, "loss": 0.02, "step": 77160 }, { "epoch": 0.651622300563636, "grad_norm": 0.5232185125350952, "learning_rate": 8.525515525265296e-06, "loss": 0.0127, "step": 77170 }, { "epoch": 0.6517067404107999, "grad_norm": 0.14242741465568542, "learning_rate": 8.524992960518525e-06, "loss": 0.0091, "step": 77180 }, { "epoch": 0.6517911802579638, "grad_norm": 0.3193066120147705, "learning_rate": 8.524470319209632e-06, "loss": 0.0102, "step": 77190 }, { "epoch": 0.6518756201051276, "grad_norm": 0.3394393026828766, "learning_rate": 8.523947601349965e-06, "loss": 0.0094, "step": 77200 }, { "epoch": 0.6519600599522914, "grad_norm": 0.7683786749839783, "learning_rate": 8.523424806950877e-06, "loss": 0.0168, "step": 77210 }, { "epoch": 0.6520444997994553, "grad_norm": 0.28301921486854553, "learning_rate": 8.522901936023725e-06, "loss": 0.0079, "step": 77220 }, { "epoch": 0.6521289396466192, "grad_norm": 0.3088972270488739, "learning_rate": 8.522378988579864e-06, "loss": 0.0056, "step": 77230 }, { "epoch": 0.6522133794937831, "grad_norm": 0.4548506438732147, "learning_rate": 8.521855964630654e-06, "loss": 0.0107, "step": 77240 }, { "epoch": 0.652297819340947, "grad_norm": 0.14588522911071777, "learning_rate": 8.521332864187455e-06, "loss": 0.0069, "step": 77250 }, { "epoch": 0.6523822591881109, "grad_norm": 0.666566014289856, "learning_rate": 8.520809687261628e-06, "loss": 0.0131, "step": 77260 }, { "epoch": 0.6524666990352748, "grad_norm": 0.24845245480537415, "learning_rate": 8.520286433864535e-06, "loss": 0.0049, "step": 77270 }, { "epoch": 0.6525511388824387, "grad_norm": 0.6774412989616394, "learning_rate": 8.519763104007541e-06, "loss": 0.0181, "step": 77280 }, { "epoch": 0.6526355787296025, "grad_norm": 0.4765528738498688, "learning_rate": 8.519239697702016e-06, "loss": 0.0127, "step": 77290 }, { "epoch": 0.6527200185767664, "grad_norm": 0.10794757306575775, "learning_rate": 8.518716214959324e-06, "loss": 0.0091, "step": 77300 }, { "epoch": 0.6528044584239302, "grad_norm": 0.14806514978408813, "learning_rate": 8.518192655790835e-06, "loss": 0.0163, "step": 77310 }, { "epoch": 0.6528888982710941, "grad_norm": 0.49882861971855164, "learning_rate": 8.517669020207926e-06, "loss": 0.0118, "step": 77320 }, { "epoch": 0.652973338118258, "grad_norm": 0.2853807806968689, "learning_rate": 8.517145308221966e-06, "loss": 0.0095, "step": 77330 }, { "epoch": 0.6530577779654219, "grad_norm": 0.1093629002571106, "learning_rate": 8.51662151984433e-06, "loss": 0.0119, "step": 77340 }, { "epoch": 0.6531422178125857, "grad_norm": 0.2788066565990448, "learning_rate": 8.516097655086394e-06, "loss": 0.009, "step": 77350 }, { "epoch": 0.6532266576597496, "grad_norm": 0.36570435762405396, "learning_rate": 8.515573713959541e-06, "loss": 0.0095, "step": 77360 }, { "epoch": 0.6533110975069135, "grad_norm": 0.5270602703094482, "learning_rate": 8.515049696475144e-06, "loss": 0.0155, "step": 77370 }, { "epoch": 0.6533955373540774, "grad_norm": 0.3243676424026489, "learning_rate": 8.51452560264459e-06, "loss": 0.011, "step": 77380 }, { "epoch": 0.6534799772012413, "grad_norm": 0.5953270792961121, "learning_rate": 8.514001432479259e-06, "loss": 0.011, "step": 77390 }, { "epoch": 0.6535644170484052, "grad_norm": 0.2963503897190094, "learning_rate": 8.513477185990537e-06, "loss": 0.0132, "step": 77400 }, { "epoch": 0.6536488568955691, "grad_norm": 0.029602376744151115, "learning_rate": 8.512952863189811e-06, "loss": 0.0075, "step": 77410 }, { "epoch": 0.653733296742733, "grad_norm": 0.3770897090435028, "learning_rate": 8.512428464088469e-06, "loss": 0.0148, "step": 77420 }, { "epoch": 0.6538177365898967, "grad_norm": 0.30753225088119507, "learning_rate": 8.5119039886979e-06, "loss": 0.0157, "step": 77430 }, { "epoch": 0.6539021764370606, "grad_norm": 0.5013945698738098, "learning_rate": 8.511379437029496e-06, "loss": 0.0122, "step": 77440 }, { "epoch": 0.6539866162842245, "grad_norm": 0.30344849824905396, "learning_rate": 8.510854809094649e-06, "loss": 0.0227, "step": 77450 }, { "epoch": 0.6540710561313884, "grad_norm": 0.39522701501846313, "learning_rate": 8.510330104904757e-06, "loss": 0.0226, "step": 77460 }, { "epoch": 0.6541554959785523, "grad_norm": 0.7420527935028076, "learning_rate": 8.509805324471213e-06, "loss": 0.0112, "step": 77470 }, { "epoch": 0.6542399358257162, "grad_norm": 0.7881239652633667, "learning_rate": 8.509280467805418e-06, "loss": 0.0098, "step": 77480 }, { "epoch": 0.65432437567288, "grad_norm": 0.12602722644805908, "learning_rate": 8.50875553491877e-06, "loss": 0.0144, "step": 77490 }, { "epoch": 0.6544088155200439, "grad_norm": 0.1712542176246643, "learning_rate": 8.50823052582267e-06, "loss": 0.0111, "step": 77500 }, { "epoch": 0.6544932553672078, "grad_norm": 0.25165340304374695, "learning_rate": 8.507705440528522e-06, "loss": 0.0083, "step": 77510 }, { "epoch": 0.6545776952143717, "grad_norm": 0.027428235858678818, "learning_rate": 8.507180279047731e-06, "loss": 0.0158, "step": 77520 }, { "epoch": 0.6546621350615356, "grad_norm": 0.5869337916374207, "learning_rate": 8.506655041391703e-06, "loss": 0.0264, "step": 77530 }, { "epoch": 0.6547465749086994, "grad_norm": 0.12393584102392197, "learning_rate": 8.506129727571846e-06, "loss": 0.0101, "step": 77540 }, { "epoch": 0.6548310147558632, "grad_norm": 0.12311805039644241, "learning_rate": 8.505604337599568e-06, "loss": 0.0122, "step": 77550 }, { "epoch": 0.6549154546030271, "grad_norm": 0.0027922005392611027, "learning_rate": 8.505078871486286e-06, "loss": 0.0153, "step": 77560 }, { "epoch": 0.654999894450191, "grad_norm": 0.25351861119270325, "learning_rate": 8.504553329243405e-06, "loss": 0.01, "step": 77570 }, { "epoch": 0.6550843342973549, "grad_norm": 0.45047807693481445, "learning_rate": 8.504027710882348e-06, "loss": 0.0131, "step": 77580 }, { "epoch": 0.6551687741445188, "grad_norm": 0.17059853672981262, "learning_rate": 8.503502016414524e-06, "loss": 0.0131, "step": 77590 }, { "epoch": 0.6552532139916827, "grad_norm": 0.5281243324279785, "learning_rate": 8.502976245851356e-06, "loss": 0.0197, "step": 77600 }, { "epoch": 0.6553376538388466, "grad_norm": 0.1294853240251541, "learning_rate": 8.50245039920426e-06, "loss": 0.0051, "step": 77610 }, { "epoch": 0.6554220936860105, "grad_norm": 0.2396424561738968, "learning_rate": 8.50192447648466e-06, "loss": 0.0087, "step": 77620 }, { "epoch": 0.6555065335331743, "grad_norm": 0.42057302594184875, "learning_rate": 8.50139847770398e-06, "loss": 0.011, "step": 77630 }, { "epoch": 0.6555909733803382, "grad_norm": 0.3815348446369171, "learning_rate": 8.500872402873639e-06, "loss": 0.0088, "step": 77640 }, { "epoch": 0.6556754132275021, "grad_norm": 0.3622993528842926, "learning_rate": 8.500346252005068e-06, "loss": 0.0092, "step": 77650 }, { "epoch": 0.6557598530746659, "grad_norm": 0.4198117256164551, "learning_rate": 8.499820025109695e-06, "loss": 0.0094, "step": 77660 }, { "epoch": 0.6558442929218298, "grad_norm": 0.41228267550468445, "learning_rate": 8.499293722198947e-06, "loss": 0.0156, "step": 77670 }, { "epoch": 0.6559287327689937, "grad_norm": 0.26787954568862915, "learning_rate": 8.498767343284256e-06, "loss": 0.0225, "step": 77680 }, { "epoch": 0.6560131726161575, "grad_norm": 0.11428959667682648, "learning_rate": 8.498240888377056e-06, "loss": 0.0074, "step": 77690 }, { "epoch": 0.6560976124633214, "grad_norm": 0.2764917016029358, "learning_rate": 8.497714357488781e-06, "loss": 0.0075, "step": 77700 }, { "epoch": 0.6561820523104853, "grad_norm": 0.4977496266365051, "learning_rate": 8.497187750630867e-06, "loss": 0.0178, "step": 77710 }, { "epoch": 0.6562664921576492, "grad_norm": 0.10974343121051788, "learning_rate": 8.496661067814751e-06, "loss": 0.0094, "step": 77720 }, { "epoch": 0.6563509320048131, "grad_norm": 0.5626024007797241, "learning_rate": 8.496134309051875e-06, "loss": 0.0112, "step": 77730 }, { "epoch": 0.656435371851977, "grad_norm": 0.2681542634963989, "learning_rate": 8.495607474353675e-06, "loss": 0.0139, "step": 77740 }, { "epoch": 0.6565198116991409, "grad_norm": 0.17165911197662354, "learning_rate": 8.495080563731599e-06, "loss": 0.0099, "step": 77750 }, { "epoch": 0.6566042515463048, "grad_norm": 0.07068973034620285, "learning_rate": 8.494553577197088e-06, "loss": 0.0145, "step": 77760 }, { "epoch": 0.6566886913934685, "grad_norm": 0.1416132003068924, "learning_rate": 8.494026514761593e-06, "loss": 0.0099, "step": 77770 }, { "epoch": 0.6567731312406324, "grad_norm": 0.29340487718582153, "learning_rate": 8.493499376436554e-06, "loss": 0.0122, "step": 77780 }, { "epoch": 0.6568575710877963, "grad_norm": 0.3808267116546631, "learning_rate": 8.492972162233426e-06, "loss": 0.0159, "step": 77790 }, { "epoch": 0.6569420109349602, "grad_norm": 0.2345559448003769, "learning_rate": 8.492444872163658e-06, "loss": 0.0233, "step": 77800 }, { "epoch": 0.6570264507821241, "grad_norm": 0.2962654232978821, "learning_rate": 8.491917506238705e-06, "loss": 0.0218, "step": 77810 }, { "epoch": 0.657110890629288, "grad_norm": 0.7086693644523621, "learning_rate": 8.491390064470018e-06, "loss": 0.0097, "step": 77820 }, { "epoch": 0.6571953304764518, "grad_norm": 0.45103710889816284, "learning_rate": 8.490862546869053e-06, "loss": 0.0075, "step": 77830 }, { "epoch": 0.6572797703236157, "grad_norm": 0.34432804584503174, "learning_rate": 8.49033495344727e-06, "loss": 0.0111, "step": 77840 }, { "epoch": 0.6573642101707796, "grad_norm": 0.4411298334598541, "learning_rate": 8.489807284216127e-06, "loss": 0.0095, "step": 77850 }, { "epoch": 0.6574486500179435, "grad_norm": 0.28415557742118835, "learning_rate": 8.489279539187086e-06, "loss": 0.0124, "step": 77860 }, { "epoch": 0.6575330898651074, "grad_norm": 0.17536316812038422, "learning_rate": 8.488751718371606e-06, "loss": 0.0146, "step": 77870 }, { "epoch": 0.6576175297122713, "grad_norm": 0.13687586784362793, "learning_rate": 8.488223821781156e-06, "loss": 0.0151, "step": 77880 }, { "epoch": 0.657701969559435, "grad_norm": 0.26236361265182495, "learning_rate": 8.487695849427197e-06, "loss": 0.0096, "step": 77890 }, { "epoch": 0.6577864094065989, "grad_norm": 0.31214916706085205, "learning_rate": 8.487167801321202e-06, "loss": 0.0126, "step": 77900 }, { "epoch": 0.6578708492537628, "grad_norm": 0.36566439270973206, "learning_rate": 8.486639677474633e-06, "loss": 0.0187, "step": 77910 }, { "epoch": 0.6579552891009267, "grad_norm": 0.11746862530708313, "learning_rate": 8.486111477898967e-06, "loss": 0.0089, "step": 77920 }, { "epoch": 0.6580397289480906, "grad_norm": 0.2159326672554016, "learning_rate": 8.485583202605674e-06, "loss": 0.008, "step": 77930 }, { "epoch": 0.6581241687952545, "grad_norm": 0.691942572593689, "learning_rate": 8.485054851606228e-06, "loss": 0.0144, "step": 77940 }, { "epoch": 0.6582086086424184, "grad_norm": 0.793743908405304, "learning_rate": 8.484526424912105e-06, "loss": 0.0109, "step": 77950 }, { "epoch": 0.6582930484895823, "grad_norm": 0.4121430516242981, "learning_rate": 8.483997922534783e-06, "loss": 0.014, "step": 77960 }, { "epoch": 0.6583774883367461, "grad_norm": 0.17826223373413086, "learning_rate": 8.483469344485738e-06, "loss": 0.0128, "step": 77970 }, { "epoch": 0.65846192818391, "grad_norm": 0.25146064162254333, "learning_rate": 8.482940690776454e-06, "loss": 0.0109, "step": 77980 }, { "epoch": 0.6585463680310739, "grad_norm": 0.3833489716053009, "learning_rate": 8.482411961418411e-06, "loss": 0.0125, "step": 77990 }, { "epoch": 0.6586308078782377, "grad_norm": 0.12993170320987701, "learning_rate": 8.481883156423093e-06, "loss": 0.0138, "step": 78000 }, { "epoch": 0.6587152477254016, "grad_norm": 0.13571015000343323, "learning_rate": 8.48135427580199e-06, "loss": 0.0111, "step": 78010 }, { "epoch": 0.6587996875725655, "grad_norm": 0.11592202633619308, "learning_rate": 8.480825319566581e-06, "loss": 0.0133, "step": 78020 }, { "epoch": 0.6588841274197293, "grad_norm": 0.12358731031417847, "learning_rate": 8.480296287728362e-06, "loss": 0.0078, "step": 78030 }, { "epoch": 0.6589685672668932, "grad_norm": 0.1993897259235382, "learning_rate": 8.479767180298821e-06, "loss": 0.0108, "step": 78040 }, { "epoch": 0.6590530071140571, "grad_norm": 0.6536014080047607, "learning_rate": 8.47923799728945e-06, "loss": 0.0119, "step": 78050 }, { "epoch": 0.659137446961221, "grad_norm": 0.1869932860136032, "learning_rate": 8.47870873871174e-06, "loss": 0.0094, "step": 78060 }, { "epoch": 0.6592218868083849, "grad_norm": 0.3951735496520996, "learning_rate": 8.478179404577192e-06, "loss": 0.0125, "step": 78070 }, { "epoch": 0.6593063266555488, "grad_norm": 0.8892766237258911, "learning_rate": 8.477649994897299e-06, "loss": 0.0152, "step": 78080 }, { "epoch": 0.6593907665027127, "grad_norm": 0.33590221405029297, "learning_rate": 8.47712050968356e-06, "loss": 0.0127, "step": 78090 }, { "epoch": 0.6594752063498766, "grad_norm": 0.4450094699859619, "learning_rate": 8.476590948947478e-06, "loss": 0.0138, "step": 78100 }, { "epoch": 0.6595596461970403, "grad_norm": 0.4717522859573364, "learning_rate": 8.47606131270055e-06, "loss": 0.008, "step": 78110 }, { "epoch": 0.6596440860442042, "grad_norm": 0.4860139787197113, "learning_rate": 8.475531600954286e-06, "loss": 0.0168, "step": 78120 }, { "epoch": 0.6597285258913681, "grad_norm": 0.5537725687026978, "learning_rate": 8.475001813720185e-06, "loss": 0.0122, "step": 78130 }, { "epoch": 0.659812965738532, "grad_norm": 0.32727935910224915, "learning_rate": 8.474471951009759e-06, "loss": 0.0085, "step": 78140 }, { "epoch": 0.6598974055856959, "grad_norm": 0.15343761444091797, "learning_rate": 8.473942012834513e-06, "loss": 0.0108, "step": 78150 }, { "epoch": 0.6599818454328598, "grad_norm": 0.34305256605148315, "learning_rate": 8.473411999205958e-06, "loss": 0.0159, "step": 78160 }, { "epoch": 0.6600662852800236, "grad_norm": 0.14904676377773285, "learning_rate": 8.472881910135607e-06, "loss": 0.0094, "step": 78170 }, { "epoch": 0.6601507251271875, "grad_norm": 0.32935476303100586, "learning_rate": 8.472351745634972e-06, "loss": 0.0062, "step": 78180 }, { "epoch": 0.6602351649743514, "grad_norm": 0.5946546196937561, "learning_rate": 8.47182150571557e-06, "loss": 0.0193, "step": 78190 }, { "epoch": 0.6603196048215153, "grad_norm": 0.31440410017967224, "learning_rate": 8.471291190388915e-06, "loss": 0.0163, "step": 78200 }, { "epoch": 0.6604040446686792, "grad_norm": 0.7986752986907959, "learning_rate": 8.470760799666527e-06, "loss": 0.0126, "step": 78210 }, { "epoch": 0.6604884845158431, "grad_norm": 0.2103353887796402, "learning_rate": 8.470230333559926e-06, "loss": 0.0108, "step": 78220 }, { "epoch": 0.6605729243630069, "grad_norm": 0.2783908247947693, "learning_rate": 8.469699792080633e-06, "loss": 0.0085, "step": 78230 }, { "epoch": 0.6606573642101707, "grad_norm": 0.3081263303756714, "learning_rate": 8.46916917524017e-06, "loss": 0.0098, "step": 78240 }, { "epoch": 0.6607418040573346, "grad_norm": 0.4712540805339813, "learning_rate": 8.468638483050065e-06, "loss": 0.0114, "step": 78250 }, { "epoch": 0.6608262439044985, "grad_norm": 0.06283412128686905, "learning_rate": 8.468107715521843e-06, "loss": 0.0084, "step": 78260 }, { "epoch": 0.6609106837516624, "grad_norm": 0.29310569167137146, "learning_rate": 8.467576872667032e-06, "loss": 0.0179, "step": 78270 }, { "epoch": 0.6609951235988263, "grad_norm": 0.24881458282470703, "learning_rate": 8.467045954497161e-06, "loss": 0.0109, "step": 78280 }, { "epoch": 0.6610795634459902, "grad_norm": 0.6819106340408325, "learning_rate": 8.466514961023764e-06, "loss": 0.0085, "step": 78290 }, { "epoch": 0.6611640032931541, "grad_norm": 0.3869427442550659, "learning_rate": 8.46598389225837e-06, "loss": 0.0145, "step": 78300 }, { "epoch": 0.6612484431403179, "grad_norm": 1.3196033239364624, "learning_rate": 8.465452748212518e-06, "loss": 0.0171, "step": 78310 }, { "epoch": 0.6613328829874818, "grad_norm": 0.11010774970054626, "learning_rate": 8.464921528897743e-06, "loss": 0.0077, "step": 78320 }, { "epoch": 0.6614173228346457, "grad_norm": 0.4523993134498596, "learning_rate": 8.46439023432558e-06, "loss": 0.0108, "step": 78330 }, { "epoch": 0.6615017626818095, "grad_norm": 0.20405350625514984, "learning_rate": 8.463858864507573e-06, "loss": 0.0084, "step": 78340 }, { "epoch": 0.6615862025289734, "grad_norm": 0.7437866926193237, "learning_rate": 8.46332741945526e-06, "loss": 0.0173, "step": 78350 }, { "epoch": 0.6616706423761373, "grad_norm": 0.579717218875885, "learning_rate": 8.462795899180188e-06, "loss": 0.0156, "step": 78360 }, { "epoch": 0.6617550822233011, "grad_norm": 0.35591351985931396, "learning_rate": 8.462264303693894e-06, "loss": 0.0143, "step": 78370 }, { "epoch": 0.661839522070465, "grad_norm": 0.21759408712387085, "learning_rate": 8.461732633007933e-06, "loss": 0.0096, "step": 78380 }, { "epoch": 0.6619239619176289, "grad_norm": 0.2021651566028595, "learning_rate": 8.461200887133846e-06, "loss": 0.0038, "step": 78390 }, { "epoch": 0.6620084017647928, "grad_norm": 0.5109520554542542, "learning_rate": 8.460669066083187e-06, "loss": 0.0137, "step": 78400 }, { "epoch": 0.6620928416119567, "grad_norm": 0.6633316278457642, "learning_rate": 8.460137169867504e-06, "loss": 0.0238, "step": 78410 }, { "epoch": 0.6621772814591206, "grad_norm": 0.6608896851539612, "learning_rate": 8.459605198498351e-06, "loss": 0.0103, "step": 78420 }, { "epoch": 0.6622617213062845, "grad_norm": 0.32132649421691895, "learning_rate": 8.45907315198728e-06, "loss": 0.0088, "step": 78430 }, { "epoch": 0.6623461611534484, "grad_norm": 0.12354443222284317, "learning_rate": 8.458541030345852e-06, "loss": 0.0099, "step": 78440 }, { "epoch": 0.6624306010006122, "grad_norm": 0.2309732884168625, "learning_rate": 8.458008833585621e-06, "loss": 0.0073, "step": 78450 }, { "epoch": 0.662515040847776, "grad_norm": 0.5216895341873169, "learning_rate": 8.457476561718145e-06, "loss": 0.0101, "step": 78460 }, { "epoch": 0.6625994806949399, "grad_norm": 0.2895561158657074, "learning_rate": 8.456944214754988e-06, "loss": 0.0137, "step": 78470 }, { "epoch": 0.6626839205421038, "grad_norm": 0.568236231803894, "learning_rate": 8.456411792707711e-06, "loss": 0.0127, "step": 78480 }, { "epoch": 0.6627683603892677, "grad_norm": 0.40319937467575073, "learning_rate": 8.455879295587877e-06, "loss": 0.0082, "step": 78490 }, { "epoch": 0.6628528002364316, "grad_norm": 0.2883496582508087, "learning_rate": 8.455346723407055e-06, "loss": 0.013, "step": 78500 }, { "epoch": 0.6629372400835954, "grad_norm": 0.5247752666473389, "learning_rate": 8.454814076176808e-06, "loss": 0.0141, "step": 78510 }, { "epoch": 0.6630216799307593, "grad_norm": 0.4061208963394165, "learning_rate": 8.454281353908708e-06, "loss": 0.0061, "step": 78520 }, { "epoch": 0.6631061197779232, "grad_norm": 0.18061567842960358, "learning_rate": 8.453748556614325e-06, "loss": 0.012, "step": 78530 }, { "epoch": 0.6631905596250871, "grad_norm": 0.23814809322357178, "learning_rate": 8.453215684305231e-06, "loss": 0.0105, "step": 78540 }, { "epoch": 0.663274999472251, "grad_norm": 0.5798749327659607, "learning_rate": 8.452682736992998e-06, "loss": 0.0147, "step": 78550 }, { "epoch": 0.6633594393194149, "grad_norm": 0.2751377522945404, "learning_rate": 8.452149714689207e-06, "loss": 0.0101, "step": 78560 }, { "epoch": 0.6634438791665787, "grad_norm": 0.2135041505098343, "learning_rate": 8.45161661740543e-06, "loss": 0.0185, "step": 78570 }, { "epoch": 0.6635283190137425, "grad_norm": 0.2550775408744812, "learning_rate": 8.451083445153247e-06, "loss": 0.0147, "step": 78580 }, { "epoch": 0.6636127588609064, "grad_norm": 0.3408629894256592, "learning_rate": 8.45055019794424e-06, "loss": 0.0142, "step": 78590 }, { "epoch": 0.6636971987080703, "grad_norm": 0.25509992241859436, "learning_rate": 8.45001687578999e-06, "loss": 0.011, "step": 78600 }, { "epoch": 0.6637816385552342, "grad_norm": 0.3978441059589386, "learning_rate": 8.449483478702079e-06, "loss": 0.0086, "step": 78610 }, { "epoch": 0.6638660784023981, "grad_norm": 0.3192659318447113, "learning_rate": 8.448950006692094e-06, "loss": 0.0131, "step": 78620 }, { "epoch": 0.663950518249562, "grad_norm": 0.605172336101532, "learning_rate": 8.44841645977162e-06, "loss": 0.0115, "step": 78630 }, { "epoch": 0.6640349580967259, "grad_norm": 0.10407647490501404, "learning_rate": 8.447882837952251e-06, "loss": 0.0103, "step": 78640 }, { "epoch": 0.6641193979438897, "grad_norm": 0.6124219298362732, "learning_rate": 8.447349141245572e-06, "loss": 0.0217, "step": 78650 }, { "epoch": 0.6642038377910536, "grad_norm": 0.09402371942996979, "learning_rate": 8.446815369663174e-06, "loss": 0.0157, "step": 78660 }, { "epoch": 0.6642882776382175, "grad_norm": 0.2977350950241089, "learning_rate": 8.446281523216653e-06, "loss": 0.0131, "step": 78670 }, { "epoch": 0.6643727174853814, "grad_norm": 0.4271329939365387, "learning_rate": 8.445747601917606e-06, "loss": 0.0104, "step": 78680 }, { "epoch": 0.6644571573325452, "grad_norm": 0.43621107935905457, "learning_rate": 8.445213605777624e-06, "loss": 0.0088, "step": 78690 }, { "epoch": 0.6645415971797091, "grad_norm": 0.923245906829834, "learning_rate": 8.44467953480831e-06, "loss": 0.0099, "step": 78700 }, { "epoch": 0.664626037026873, "grad_norm": 0.024643104523420334, "learning_rate": 8.444145389021263e-06, "loss": 0.0151, "step": 78710 }, { "epoch": 0.6647104768740368, "grad_norm": 0.7725934386253357, "learning_rate": 8.443611168428083e-06, "loss": 0.0162, "step": 78720 }, { "epoch": 0.6647949167212007, "grad_norm": 0.3049542307853699, "learning_rate": 8.443076873040374e-06, "loss": 0.0101, "step": 78730 }, { "epoch": 0.6648793565683646, "grad_norm": 0.365095317363739, "learning_rate": 8.442542502869741e-06, "loss": 0.0103, "step": 78740 }, { "epoch": 0.6649637964155285, "grad_norm": 0.32033225893974304, "learning_rate": 8.44200805792779e-06, "loss": 0.0179, "step": 78750 }, { "epoch": 0.6650482362626924, "grad_norm": 0.4577454626560211, "learning_rate": 8.441473538226132e-06, "loss": 0.0212, "step": 78760 }, { "epoch": 0.6651326761098563, "grad_norm": 0.28412240743637085, "learning_rate": 8.440938943776371e-06, "loss": 0.0077, "step": 78770 }, { "epoch": 0.6652171159570202, "grad_norm": 0.15133757889270782, "learning_rate": 8.440404274590122e-06, "loss": 0.0108, "step": 78780 }, { "epoch": 0.665301555804184, "grad_norm": 0.2224566489458084, "learning_rate": 8.439869530678998e-06, "loss": 0.011, "step": 78790 }, { "epoch": 0.6653859956513478, "grad_norm": 0.38949596881866455, "learning_rate": 8.439334712054612e-06, "loss": 0.0123, "step": 78800 }, { "epoch": 0.6654704354985117, "grad_norm": 0.23472565412521362, "learning_rate": 8.438799818728581e-06, "loss": 0.0135, "step": 78810 }, { "epoch": 0.6655548753456756, "grad_norm": 1.2347272634506226, "learning_rate": 8.438264850712521e-06, "loss": 0.0054, "step": 78820 }, { "epoch": 0.6656393151928395, "grad_norm": 0.23066513240337372, "learning_rate": 8.437729808018056e-06, "loss": 0.0153, "step": 78830 }, { "epoch": 0.6657237550400034, "grad_norm": 0.13951058685779572, "learning_rate": 8.4371946906568e-06, "loss": 0.0154, "step": 78840 }, { "epoch": 0.6658081948871672, "grad_norm": 0.3567161560058594, "learning_rate": 8.436659498640383e-06, "loss": 0.0146, "step": 78850 }, { "epoch": 0.6658926347343311, "grad_norm": 0.2659664452075958, "learning_rate": 8.436124231980424e-06, "loss": 0.0103, "step": 78860 }, { "epoch": 0.665977074581495, "grad_norm": 0.5156199932098389, "learning_rate": 8.435588890688553e-06, "loss": 0.014, "step": 78870 }, { "epoch": 0.6660615144286589, "grad_norm": 0.4267347455024719, "learning_rate": 8.435053474776393e-06, "loss": 0.0067, "step": 78880 }, { "epoch": 0.6661459542758228, "grad_norm": 0.24480214715003967, "learning_rate": 8.434517984255578e-06, "loss": 0.0187, "step": 78890 }, { "epoch": 0.6662303941229867, "grad_norm": 0.4332479238510132, "learning_rate": 8.433982419137733e-06, "loss": 0.0157, "step": 78900 }, { "epoch": 0.6663148339701506, "grad_norm": 0.32491788268089294, "learning_rate": 8.433446779434497e-06, "loss": 0.0067, "step": 78910 }, { "epoch": 0.6663992738173143, "grad_norm": 0.8997355699539185, "learning_rate": 8.432911065157498e-06, "loss": 0.0142, "step": 78920 }, { "epoch": 0.6664837136644782, "grad_norm": 0.116337351500988, "learning_rate": 8.432375276318376e-06, "loss": 0.0072, "step": 78930 }, { "epoch": 0.6665681535116421, "grad_norm": 0.3506733179092407, "learning_rate": 8.431839412928765e-06, "loss": 0.0087, "step": 78940 }, { "epoch": 0.666652593358806, "grad_norm": 0.2201635241508484, "learning_rate": 8.431303475000305e-06, "loss": 0.0112, "step": 78950 }, { "epoch": 0.6667370332059699, "grad_norm": 0.8678199052810669, "learning_rate": 8.430767462544639e-06, "loss": 0.0159, "step": 78960 }, { "epoch": 0.6668214730531338, "grad_norm": 0.47072452306747437, "learning_rate": 8.430231375573403e-06, "loss": 0.0287, "step": 78970 }, { "epoch": 0.6669059129002977, "grad_norm": 0.25023117661476135, "learning_rate": 8.429695214098246e-06, "loss": 0.0125, "step": 78980 }, { "epoch": 0.6669903527474615, "grad_norm": 0.03964437171816826, "learning_rate": 8.429158978130814e-06, "loss": 0.0133, "step": 78990 }, { "epoch": 0.6670747925946254, "grad_norm": 0.2097456306219101, "learning_rate": 8.42862266768275e-06, "loss": 0.0198, "step": 79000 }, { "epoch": 0.6671592324417893, "grad_norm": 0.3448963761329651, "learning_rate": 8.428086282765703e-06, "loss": 0.012, "step": 79010 }, { "epoch": 0.6672436722889532, "grad_norm": 0.36961379647254944, "learning_rate": 8.427549823391325e-06, "loss": 0.0101, "step": 79020 }, { "epoch": 0.667328112136117, "grad_norm": 0.052873775362968445, "learning_rate": 8.427013289571269e-06, "loss": 0.019, "step": 79030 }, { "epoch": 0.6674125519832809, "grad_norm": 0.17691200971603394, "learning_rate": 8.426476681317184e-06, "loss": 0.0155, "step": 79040 }, { "epoch": 0.6674969918304448, "grad_norm": 0.34629324078559875, "learning_rate": 8.42593999864073e-06, "loss": 0.0107, "step": 79050 }, { "epoch": 0.6675814316776086, "grad_norm": 0.44233813881874084, "learning_rate": 8.42540324155356e-06, "loss": 0.0154, "step": 79060 }, { "epoch": 0.6676658715247725, "grad_norm": 0.5791160464286804, "learning_rate": 8.424866410067334e-06, "loss": 0.0112, "step": 79070 }, { "epoch": 0.6677503113719364, "grad_norm": 0.3970732092857361, "learning_rate": 8.424329504193712e-06, "loss": 0.0135, "step": 79080 }, { "epoch": 0.6678347512191003, "grad_norm": 0.03412272408604622, "learning_rate": 8.423792523944353e-06, "loss": 0.0103, "step": 79090 }, { "epoch": 0.6679191910662642, "grad_norm": 0.7513880729675293, "learning_rate": 8.423255469330924e-06, "loss": 0.0117, "step": 79100 }, { "epoch": 0.6680036309134281, "grad_norm": 0.27390506863594055, "learning_rate": 8.422718340365086e-06, "loss": 0.0091, "step": 79110 }, { "epoch": 0.668088070760592, "grad_norm": 0.539509654045105, "learning_rate": 8.422181137058506e-06, "loss": 0.0116, "step": 79120 }, { "epoch": 0.6681725106077558, "grad_norm": 0.4350016713142395, "learning_rate": 8.421643859422857e-06, "loss": 0.0231, "step": 79130 }, { "epoch": 0.6682569504549197, "grad_norm": 0.4391949474811554, "learning_rate": 8.421106507469801e-06, "loss": 0.0116, "step": 79140 }, { "epoch": 0.6683413903020835, "grad_norm": 0.4649350643157959, "learning_rate": 8.420569081211014e-06, "loss": 0.0117, "step": 79150 }, { "epoch": 0.6684258301492474, "grad_norm": 0.7998905181884766, "learning_rate": 8.420031580658167e-06, "loss": 0.0141, "step": 79160 }, { "epoch": 0.6685102699964113, "grad_norm": 0.3593500256538391, "learning_rate": 8.419494005822936e-06, "loss": 0.0182, "step": 79170 }, { "epoch": 0.6685947098435752, "grad_norm": 0.7866082787513733, "learning_rate": 8.418956356716995e-06, "loss": 0.018, "step": 79180 }, { "epoch": 0.668679149690739, "grad_norm": 0.303850382566452, "learning_rate": 8.418418633352023e-06, "loss": 0.0159, "step": 79190 }, { "epoch": 0.6687635895379029, "grad_norm": 0.6556203365325928, "learning_rate": 8.417880835739699e-06, "loss": 0.0098, "step": 79200 }, { "epoch": 0.6688480293850668, "grad_norm": 0.2757994830608368, "learning_rate": 8.417342963891703e-06, "loss": 0.014, "step": 79210 }, { "epoch": 0.6689324692322307, "grad_norm": 0.3278566002845764, "learning_rate": 8.416805017819717e-06, "loss": 0.0114, "step": 79220 }, { "epoch": 0.6690169090793946, "grad_norm": 0.41468682885169983, "learning_rate": 8.41626699753543e-06, "loss": 0.0102, "step": 79230 }, { "epoch": 0.6691013489265585, "grad_norm": 0.5652278065681458, "learning_rate": 8.415728903050519e-06, "loss": 0.0067, "step": 79240 }, { "epoch": 0.6691857887737224, "grad_norm": 0.1928781270980835, "learning_rate": 8.415190734376679e-06, "loss": 0.0116, "step": 79250 }, { "epoch": 0.6692702286208861, "grad_norm": 0.24004340171813965, "learning_rate": 8.414652491525597e-06, "loss": 0.0169, "step": 79260 }, { "epoch": 0.66935466846805, "grad_norm": 0.17370548844337463, "learning_rate": 8.414114174508961e-06, "loss": 0.0121, "step": 79270 }, { "epoch": 0.6694391083152139, "grad_norm": 0.27575764060020447, "learning_rate": 8.413575783338465e-06, "loss": 0.0183, "step": 79280 }, { "epoch": 0.6695235481623778, "grad_norm": 0.24111133813858032, "learning_rate": 8.413037318025803e-06, "loss": 0.0157, "step": 79290 }, { "epoch": 0.6696079880095417, "grad_norm": 0.7307522296905518, "learning_rate": 8.412498778582669e-06, "loss": 0.0175, "step": 79300 }, { "epoch": 0.6696924278567056, "grad_norm": 0.2852342426776886, "learning_rate": 8.41196016502076e-06, "loss": 0.009, "step": 79310 }, { "epoch": 0.6697768677038695, "grad_norm": 0.2979567050933838, "learning_rate": 8.411421477351778e-06, "loss": 0.0106, "step": 79320 }, { "epoch": 0.6698613075510333, "grad_norm": 0.5152826309204102, "learning_rate": 8.410882715587419e-06, "loss": 0.0093, "step": 79330 }, { "epoch": 0.6699457473981972, "grad_norm": 0.3182167112827301, "learning_rate": 8.410343879739387e-06, "loss": 0.0097, "step": 79340 }, { "epoch": 0.6700301872453611, "grad_norm": 0.554561197757721, "learning_rate": 8.409804969819383e-06, "loss": 0.0149, "step": 79350 }, { "epoch": 0.670114627092525, "grad_norm": 0.33150872588157654, "learning_rate": 8.409265985839116e-06, "loss": 0.011, "step": 79360 }, { "epoch": 0.6701990669396889, "grad_norm": 0.34248074889183044, "learning_rate": 8.408726927810289e-06, "loss": 0.0158, "step": 79370 }, { "epoch": 0.6702835067868527, "grad_norm": 0.048451174050569534, "learning_rate": 8.408187795744614e-06, "loss": 0.0105, "step": 79380 }, { "epoch": 0.6703679466340166, "grad_norm": 1.0308893918991089, "learning_rate": 8.407648589653796e-06, "loss": 0.0197, "step": 79390 }, { "epoch": 0.6704523864811804, "grad_norm": 0.15532861649990082, "learning_rate": 8.407109309549548e-06, "loss": 0.0098, "step": 79400 }, { "epoch": 0.6705368263283443, "grad_norm": 0.15296109020709991, "learning_rate": 8.406569955443587e-06, "loss": 0.01, "step": 79410 }, { "epoch": 0.6706212661755082, "grad_norm": 0.13102878630161285, "learning_rate": 8.406030527347623e-06, "loss": 0.0101, "step": 79420 }, { "epoch": 0.6707057060226721, "grad_norm": 0.5692269206047058, "learning_rate": 8.405491025273374e-06, "loss": 0.0112, "step": 79430 }, { "epoch": 0.670790145869836, "grad_norm": 0.35439884662628174, "learning_rate": 8.404951449232558e-06, "loss": 0.0143, "step": 79440 }, { "epoch": 0.6708745857169999, "grad_norm": 0.47384652495384216, "learning_rate": 8.404411799236893e-06, "loss": 0.0084, "step": 79450 }, { "epoch": 0.6709590255641638, "grad_norm": 0.24514010548591614, "learning_rate": 8.403872075298103e-06, "loss": 0.0168, "step": 79460 }, { "epoch": 0.6710434654113276, "grad_norm": 0.6185876727104187, "learning_rate": 8.403332277427907e-06, "loss": 0.0091, "step": 79470 }, { "epoch": 0.6711279052584915, "grad_norm": 0.446924090385437, "learning_rate": 8.402792405638034e-06, "loss": 0.0068, "step": 79480 }, { "epoch": 0.6712123451056553, "grad_norm": 0.19610536098480225, "learning_rate": 8.402252459940206e-06, "loss": 0.0119, "step": 79490 }, { "epoch": 0.6712967849528192, "grad_norm": 0.520383894443512, "learning_rate": 8.401712440346153e-06, "loss": 0.0183, "step": 79500 }, { "epoch": 0.6713812247999831, "grad_norm": 0.33940958976745605, "learning_rate": 8.4011723468676e-06, "loss": 0.0161, "step": 79510 }, { "epoch": 0.671465664647147, "grad_norm": 0.5935443043708801, "learning_rate": 8.400632179516283e-06, "loss": 0.0121, "step": 79520 }, { "epoch": 0.6715501044943109, "grad_norm": 0.4056508541107178, "learning_rate": 8.400091938303932e-06, "loss": 0.0107, "step": 79530 }, { "epoch": 0.6716345443414747, "grad_norm": 0.41190385818481445, "learning_rate": 8.39955162324228e-06, "loss": 0.0123, "step": 79540 }, { "epoch": 0.6717189841886386, "grad_norm": 0.7172152996063232, "learning_rate": 8.399011234343065e-06, "loss": 0.0118, "step": 79550 }, { "epoch": 0.6718034240358025, "grad_norm": 0.1252436637878418, "learning_rate": 8.39847077161802e-06, "loss": 0.0075, "step": 79560 }, { "epoch": 0.6718878638829664, "grad_norm": 0.35004809498786926, "learning_rate": 8.397930235078885e-06, "loss": 0.0109, "step": 79570 }, { "epoch": 0.6719723037301303, "grad_norm": 0.19022786617279053, "learning_rate": 8.397389624737406e-06, "loss": 0.0085, "step": 79580 }, { "epoch": 0.6720567435772942, "grad_norm": 0.07437769323587418, "learning_rate": 8.396848940605318e-06, "loss": 0.0108, "step": 79590 }, { "epoch": 0.672141183424458, "grad_norm": 0.5873528718948364, "learning_rate": 8.396308182694368e-06, "loss": 0.0135, "step": 79600 }, { "epoch": 0.6722256232716218, "grad_norm": 0.17332755029201508, "learning_rate": 8.395767351016298e-06, "loss": 0.0126, "step": 79610 }, { "epoch": 0.6723100631187857, "grad_norm": 0.0827094316482544, "learning_rate": 8.395226445582861e-06, "loss": 0.0097, "step": 79620 }, { "epoch": 0.6723945029659496, "grad_norm": 0.7714887261390686, "learning_rate": 8.394685466405799e-06, "loss": 0.0189, "step": 79630 }, { "epoch": 0.6724789428131135, "grad_norm": 0.6867839097976685, "learning_rate": 8.394144413496863e-06, "loss": 0.0136, "step": 79640 }, { "epoch": 0.6725633826602774, "grad_norm": 0.3746963441371918, "learning_rate": 8.393603286867808e-06, "loss": 0.0084, "step": 79650 }, { "epoch": 0.6726478225074413, "grad_norm": 0.5111218094825745, "learning_rate": 8.393062086530384e-06, "loss": 0.0066, "step": 79660 }, { "epoch": 0.6727322623546051, "grad_norm": 0.3564196228981018, "learning_rate": 8.392520812496347e-06, "loss": 0.0093, "step": 79670 }, { "epoch": 0.672816702201769, "grad_norm": 0.3406161665916443, "learning_rate": 8.391979464777454e-06, "loss": 0.0102, "step": 79680 }, { "epoch": 0.6729011420489329, "grad_norm": 0.22394640743732452, "learning_rate": 8.39143804338546e-06, "loss": 0.017, "step": 79690 }, { "epoch": 0.6729855818960968, "grad_norm": 0.21404847502708435, "learning_rate": 8.390896548332129e-06, "loss": 0.0131, "step": 79700 }, { "epoch": 0.6730700217432607, "grad_norm": 0.20872336626052856, "learning_rate": 8.390354979629219e-06, "loss": 0.0079, "step": 79710 }, { "epoch": 0.6731544615904245, "grad_norm": 0.26660868525505066, "learning_rate": 8.389813337288492e-06, "loss": 0.0185, "step": 79720 }, { "epoch": 0.6732389014375884, "grad_norm": 0.3512226343154907, "learning_rate": 8.389271621321715e-06, "loss": 0.0115, "step": 79730 }, { "epoch": 0.6733233412847522, "grad_norm": 0.39000004529953003, "learning_rate": 8.388729831740654e-06, "loss": 0.0097, "step": 79740 }, { "epoch": 0.6734077811319161, "grad_norm": 0.4123421311378479, "learning_rate": 8.388187968557076e-06, "loss": 0.014, "step": 79750 }, { "epoch": 0.67349222097908, "grad_norm": 0.29054710268974304, "learning_rate": 8.387646031782746e-06, "loss": 0.0099, "step": 79760 }, { "epoch": 0.6735766608262439, "grad_norm": 0.2775079607963562, "learning_rate": 8.387104021429441e-06, "loss": 0.0127, "step": 79770 }, { "epoch": 0.6736611006734078, "grad_norm": 0.11308153718709946, "learning_rate": 8.386561937508931e-06, "loss": 0.0105, "step": 79780 }, { "epoch": 0.6737455405205717, "grad_norm": 0.22709877789020538, "learning_rate": 8.38601978003299e-06, "loss": 0.0172, "step": 79790 }, { "epoch": 0.6738299803677356, "grad_norm": 0.3996732532978058, "learning_rate": 8.385477549013392e-06, "loss": 0.0059, "step": 79800 }, { "epoch": 0.6739144202148994, "grad_norm": 0.2555750012397766, "learning_rate": 8.384935244461915e-06, "loss": 0.0109, "step": 79810 }, { "epoch": 0.6739988600620633, "grad_norm": 0.42578840255737305, "learning_rate": 8.384392866390339e-06, "loss": 0.0079, "step": 79820 }, { "epoch": 0.6740832999092272, "grad_norm": 0.5723271369934082, "learning_rate": 8.383850414810443e-06, "loss": 0.017, "step": 79830 }, { "epoch": 0.674167739756391, "grad_norm": 0.1715075820684433, "learning_rate": 8.383307889734012e-06, "loss": 0.006, "step": 79840 }, { "epoch": 0.6742521796035549, "grad_norm": 0.402570903301239, "learning_rate": 8.382765291172824e-06, "loss": 0.0144, "step": 79850 }, { "epoch": 0.6743366194507188, "grad_norm": 0.2780951261520386, "learning_rate": 8.382222619138666e-06, "loss": 0.0128, "step": 79860 }, { "epoch": 0.6744210592978827, "grad_norm": 0.548957347869873, "learning_rate": 8.38167987364333e-06, "loss": 0.0132, "step": 79870 }, { "epoch": 0.6745054991450465, "grad_norm": 0.23718516528606415, "learning_rate": 8.381137054698596e-06, "loss": 0.0136, "step": 79880 }, { "epoch": 0.6745899389922104, "grad_norm": 0.39985254406929016, "learning_rate": 8.38059416231626e-06, "loss": 0.0137, "step": 79890 }, { "epoch": 0.6746743788393743, "grad_norm": 0.5545390248298645, "learning_rate": 8.380051196508112e-06, "loss": 0.0179, "step": 79900 }, { "epoch": 0.6747588186865382, "grad_norm": 0.5160760879516602, "learning_rate": 8.379508157285945e-06, "loss": 0.0166, "step": 79910 }, { "epoch": 0.6748432585337021, "grad_norm": 0.46763837337493896, "learning_rate": 8.378965044661552e-06, "loss": 0.0095, "step": 79920 }, { "epoch": 0.674927698380866, "grad_norm": 0.15461379289627075, "learning_rate": 8.37842185864673e-06, "loss": 0.0207, "step": 79930 }, { "epoch": 0.6750121382280299, "grad_norm": 0.42804500460624695, "learning_rate": 8.377878599253279e-06, "loss": 0.0103, "step": 79940 }, { "epoch": 0.6750965780751936, "grad_norm": 0.8040900826454163, "learning_rate": 8.377335266492997e-06, "loss": 0.018, "step": 79950 }, { "epoch": 0.6751810179223575, "grad_norm": 0.2345278114080429, "learning_rate": 8.376791860377683e-06, "loss": 0.0136, "step": 79960 }, { "epoch": 0.6752654577695214, "grad_norm": 0.44297221302986145, "learning_rate": 8.376248380919144e-06, "loss": 0.0114, "step": 79970 }, { "epoch": 0.6753498976166853, "grad_norm": 0.4014127850532532, "learning_rate": 8.37570482812918e-06, "loss": 0.0094, "step": 79980 }, { "epoch": 0.6754343374638492, "grad_norm": 0.37357598543167114, "learning_rate": 8.375161202019602e-06, "loss": 0.01, "step": 79990 }, { "epoch": 0.6755187773110131, "grad_norm": 0.16701748967170715, "learning_rate": 8.374617502602211e-06, "loss": 0.0076, "step": 80000 }, { "epoch": 0.675603217158177, "grad_norm": 0.2965761423110962, "learning_rate": 8.374073729888824e-06, "loss": 0.0086, "step": 80010 }, { "epoch": 0.6756876570053408, "grad_norm": 0.42971959710121155, "learning_rate": 8.373529883891243e-06, "loss": 0.0143, "step": 80020 }, { "epoch": 0.6757720968525047, "grad_norm": 0.11668762564659119, "learning_rate": 8.372985964621285e-06, "loss": 0.0084, "step": 80030 }, { "epoch": 0.6758565366996686, "grad_norm": 0.37439537048339844, "learning_rate": 8.372441972090762e-06, "loss": 0.0135, "step": 80040 }, { "epoch": 0.6759409765468325, "grad_norm": 0.38599666953086853, "learning_rate": 8.371897906311493e-06, "loss": 0.016, "step": 80050 }, { "epoch": 0.6760254163939964, "grad_norm": 0.28490036725997925, "learning_rate": 8.371353767295289e-06, "loss": 0.0118, "step": 80060 }, { "epoch": 0.6761098562411602, "grad_norm": 0.28742966055870056, "learning_rate": 8.370809555053976e-06, "loss": 0.0136, "step": 80070 }, { "epoch": 0.676194296088324, "grad_norm": 0.38711416721343994, "learning_rate": 8.370265269599367e-06, "loss": 0.0119, "step": 80080 }, { "epoch": 0.6762787359354879, "grad_norm": 0.32360711693763733, "learning_rate": 8.36972091094329e-06, "loss": 0.0112, "step": 80090 }, { "epoch": 0.6763631757826518, "grad_norm": 0.1183817908167839, "learning_rate": 8.369176479097563e-06, "loss": 0.0033, "step": 80100 }, { "epoch": 0.6764476156298157, "grad_norm": 0.6055214405059814, "learning_rate": 8.368631974074015e-06, "loss": 0.0127, "step": 80110 }, { "epoch": 0.6765320554769796, "grad_norm": 0.8134786486625671, "learning_rate": 8.368087395884468e-06, "loss": 0.0134, "step": 80120 }, { "epoch": 0.6766164953241435, "grad_norm": 0.67496657371521, "learning_rate": 8.367542744540755e-06, "loss": 0.0154, "step": 80130 }, { "epoch": 0.6767009351713074, "grad_norm": 0.306148886680603, "learning_rate": 8.366998020054704e-06, "loss": 0.0064, "step": 80140 }, { "epoch": 0.6767853750184712, "grad_norm": 0.23871345818042755, "learning_rate": 8.366453222438145e-06, "loss": 0.0102, "step": 80150 }, { "epoch": 0.6768698148656351, "grad_norm": 0.3506043255329132, "learning_rate": 8.365908351702912e-06, "loss": 0.0066, "step": 80160 }, { "epoch": 0.676954254712799, "grad_norm": 0.37227246165275574, "learning_rate": 8.36536340786084e-06, "loss": 0.0102, "step": 80170 }, { "epoch": 0.6770386945599628, "grad_norm": 0.6526156067848206, "learning_rate": 8.364818390923763e-06, "loss": 0.0151, "step": 80180 }, { "epoch": 0.6771231344071267, "grad_norm": 0.6502557396888733, "learning_rate": 8.364273300903521e-06, "loss": 0.0153, "step": 80190 }, { "epoch": 0.6772075742542906, "grad_norm": 0.263453871011734, "learning_rate": 8.363728137811953e-06, "loss": 0.0094, "step": 80200 }, { "epoch": 0.6772920141014545, "grad_norm": 0.21339520812034607, "learning_rate": 8.363182901660898e-06, "loss": 0.0076, "step": 80210 }, { "epoch": 0.6773764539486183, "grad_norm": 0.2313903570175171, "learning_rate": 8.3626375924622e-06, "loss": 0.0151, "step": 80220 }, { "epoch": 0.6774608937957822, "grad_norm": 0.0575353167951107, "learning_rate": 8.362092210227703e-06, "loss": 0.0137, "step": 80230 }, { "epoch": 0.6775453336429461, "grad_norm": 0.4842001795768738, "learning_rate": 8.361546754969252e-06, "loss": 0.0121, "step": 80240 }, { "epoch": 0.67762977349011, "grad_norm": 0.7659074068069458, "learning_rate": 8.361001226698695e-06, "loss": 0.014, "step": 80250 }, { "epoch": 0.6777142133372739, "grad_norm": 0.4476005434989929, "learning_rate": 8.360455625427877e-06, "loss": 0.0201, "step": 80260 }, { "epoch": 0.6777986531844378, "grad_norm": 0.3734194040298462, "learning_rate": 8.359909951168654e-06, "loss": 0.0103, "step": 80270 }, { "epoch": 0.6778830930316017, "grad_norm": 0.44015273451805115, "learning_rate": 8.359364203932875e-06, "loss": 0.0126, "step": 80280 }, { "epoch": 0.6779675328787655, "grad_norm": 0.585954487323761, "learning_rate": 8.358818383732394e-06, "loss": 0.0076, "step": 80290 }, { "epoch": 0.6780519727259293, "grad_norm": 0.33119335770606995, "learning_rate": 8.358272490579066e-06, "loss": 0.0143, "step": 80300 }, { "epoch": 0.6781364125730932, "grad_norm": 0.1478017121553421, "learning_rate": 8.357726524484748e-06, "loss": 0.0044, "step": 80310 }, { "epoch": 0.6782208524202571, "grad_norm": 0.5409925580024719, "learning_rate": 8.357180485461296e-06, "loss": 0.0126, "step": 80320 }, { "epoch": 0.678305292267421, "grad_norm": 1.0608612298965454, "learning_rate": 8.356634373520574e-06, "loss": 0.016, "step": 80330 }, { "epoch": 0.6783897321145849, "grad_norm": 0.25209271907806396, "learning_rate": 8.356088188674441e-06, "loss": 0.0109, "step": 80340 }, { "epoch": 0.6784741719617488, "grad_norm": 0.2819741666316986, "learning_rate": 8.355541930934761e-06, "loss": 0.0073, "step": 80350 }, { "epoch": 0.6785586118089126, "grad_norm": 0.38007619976997375, "learning_rate": 8.354995600313395e-06, "loss": 0.0146, "step": 80360 }, { "epoch": 0.6786430516560765, "grad_norm": 0.9063570499420166, "learning_rate": 8.354449196822213e-06, "loss": 0.0076, "step": 80370 }, { "epoch": 0.6787274915032404, "grad_norm": 0.24453093111515045, "learning_rate": 8.353902720473084e-06, "loss": 0.0106, "step": 80380 }, { "epoch": 0.6788119313504043, "grad_norm": 0.5040955543518066, "learning_rate": 8.353356171277873e-06, "loss": 0.0092, "step": 80390 }, { "epoch": 0.6788963711975682, "grad_norm": 0.16791050136089325, "learning_rate": 8.352809549248456e-06, "loss": 0.0078, "step": 80400 }, { "epoch": 0.678980811044732, "grad_norm": 0.2574414908885956, "learning_rate": 8.3522628543967e-06, "loss": 0.0105, "step": 80410 }, { "epoch": 0.6790652508918958, "grad_norm": 0.08181243389844894, "learning_rate": 8.351716086734482e-06, "loss": 0.0156, "step": 80420 }, { "epoch": 0.6791496907390597, "grad_norm": 0.4269576966762543, "learning_rate": 8.351169246273677e-06, "loss": 0.0115, "step": 80430 }, { "epoch": 0.6792341305862236, "grad_norm": 0.6276191473007202, "learning_rate": 8.350622333026165e-06, "loss": 0.0108, "step": 80440 }, { "epoch": 0.6793185704333875, "grad_norm": 0.06946967542171478, "learning_rate": 8.35007534700382e-06, "loss": 0.0055, "step": 80450 }, { "epoch": 0.6794030102805514, "grad_norm": 1.1683123111724854, "learning_rate": 8.349528288218526e-06, "loss": 0.0128, "step": 80460 }, { "epoch": 0.6794874501277153, "grad_norm": 0.5126104950904846, "learning_rate": 8.348981156682163e-06, "loss": 0.0101, "step": 80470 }, { "epoch": 0.6795718899748792, "grad_norm": 0.3363606035709381, "learning_rate": 8.348433952406617e-06, "loss": 0.0176, "step": 80480 }, { "epoch": 0.679656329822043, "grad_norm": 0.22343382239341736, "learning_rate": 8.34788667540377e-06, "loss": 0.0098, "step": 80490 }, { "epoch": 0.6797407696692069, "grad_norm": 0.5693515539169312, "learning_rate": 8.347339325685512e-06, "loss": 0.0133, "step": 80500 }, { "epoch": 0.6798252095163708, "grad_norm": 0.012042221613228321, "learning_rate": 8.346791903263729e-06, "loss": 0.0186, "step": 80510 }, { "epoch": 0.6799096493635347, "grad_norm": 0.3283432126045227, "learning_rate": 8.346244408150312e-06, "loss": 0.0139, "step": 80520 }, { "epoch": 0.6799940892106985, "grad_norm": 0.30628758668899536, "learning_rate": 8.345696840357153e-06, "loss": 0.0141, "step": 80530 }, { "epoch": 0.6800785290578624, "grad_norm": 0.3564453423023224, "learning_rate": 8.345149199896143e-06, "loss": 0.012, "step": 80540 }, { "epoch": 0.6801629689050263, "grad_norm": 0.37167057394981384, "learning_rate": 8.34460148677918e-06, "loss": 0.0153, "step": 80550 }, { "epoch": 0.6802474087521901, "grad_norm": 0.2636718451976776, "learning_rate": 8.344053701018155e-06, "loss": 0.0198, "step": 80560 }, { "epoch": 0.680331848599354, "grad_norm": 0.35635560750961304, "learning_rate": 8.34350584262497e-06, "loss": 0.0118, "step": 80570 }, { "epoch": 0.6804162884465179, "grad_norm": 0.09117990732192993, "learning_rate": 8.342957911611524e-06, "loss": 0.0151, "step": 80580 }, { "epoch": 0.6805007282936818, "grad_norm": 0.5562264323234558, "learning_rate": 8.342409907989717e-06, "loss": 0.0118, "step": 80590 }, { "epoch": 0.6805851681408457, "grad_norm": 0.5930752158164978, "learning_rate": 8.341861831771452e-06, "loss": 0.0117, "step": 80600 }, { "epoch": 0.6806696079880096, "grad_norm": 0.24133914709091187, "learning_rate": 8.341313682968632e-06, "loss": 0.0148, "step": 80610 }, { "epoch": 0.6807540478351735, "grad_norm": 0.4451800286769867, "learning_rate": 8.340765461593163e-06, "loss": 0.0148, "step": 80620 }, { "epoch": 0.6808384876823373, "grad_norm": 1.8496744632720947, "learning_rate": 8.340217167656955e-06, "loss": 0.0165, "step": 80630 }, { "epoch": 0.6809229275295011, "grad_norm": 0.2023789882659912, "learning_rate": 8.339668801171912e-06, "loss": 0.0128, "step": 80640 }, { "epoch": 0.681007367376665, "grad_norm": 0.23836985230445862, "learning_rate": 8.339120362149949e-06, "loss": 0.011, "step": 80650 }, { "epoch": 0.6810918072238289, "grad_norm": 0.4542604982852936, "learning_rate": 8.338571850602975e-06, "loss": 0.0174, "step": 80660 }, { "epoch": 0.6811762470709928, "grad_norm": 0.6147012710571289, "learning_rate": 8.338023266542903e-06, "loss": 0.0189, "step": 80670 }, { "epoch": 0.6812606869181567, "grad_norm": 0.08274441957473755, "learning_rate": 8.337474609981653e-06, "loss": 0.0146, "step": 80680 }, { "epoch": 0.6813451267653206, "grad_norm": 0.20803558826446533, "learning_rate": 8.336925880931138e-06, "loss": 0.0203, "step": 80690 }, { "epoch": 0.6814295666124844, "grad_norm": 0.3168659508228302, "learning_rate": 8.336377079403273e-06, "loss": 0.0089, "step": 80700 }, { "epoch": 0.6815140064596483, "grad_norm": 0.1732271909713745, "learning_rate": 8.335828205409983e-06, "loss": 0.0112, "step": 80710 }, { "epoch": 0.6815984463068122, "grad_norm": 0.21777403354644775, "learning_rate": 8.33527925896319e-06, "loss": 0.0116, "step": 80720 }, { "epoch": 0.6816828861539761, "grad_norm": 0.14870870113372803, "learning_rate": 8.334730240074815e-06, "loss": 0.0098, "step": 80730 }, { "epoch": 0.68176732600114, "grad_norm": 0.34386810660362244, "learning_rate": 8.334181148756782e-06, "loss": 0.007, "step": 80740 }, { "epoch": 0.6818517658483038, "grad_norm": 0.21865062415599823, "learning_rate": 8.333631985021015e-06, "loss": 0.0091, "step": 80750 }, { "epoch": 0.6819362056954676, "grad_norm": 0.18539084494113922, "learning_rate": 8.333082748879446e-06, "loss": 0.01, "step": 80760 }, { "epoch": 0.6820206455426315, "grad_norm": 0.24122394621372223, "learning_rate": 8.332533440344003e-06, "loss": 0.0113, "step": 80770 }, { "epoch": 0.6821050853897954, "grad_norm": 0.37396588921546936, "learning_rate": 8.331984059426617e-06, "loss": 0.0104, "step": 80780 }, { "epoch": 0.6821895252369593, "grad_norm": 0.3766961991786957, "learning_rate": 8.33143460613922e-06, "loss": 0.0104, "step": 80790 }, { "epoch": 0.6822739650841232, "grad_norm": 0.27284470200538635, "learning_rate": 8.330885080493747e-06, "loss": 0.009, "step": 80800 }, { "epoch": 0.6823584049312871, "grad_norm": 0.4130505919456482, "learning_rate": 8.330335482502132e-06, "loss": 0.0103, "step": 80810 }, { "epoch": 0.682442844778451, "grad_norm": 0.5167810320854187, "learning_rate": 8.329785812176312e-06, "loss": 0.0123, "step": 80820 }, { "epoch": 0.6825272846256148, "grad_norm": 0.5014861226081848, "learning_rate": 8.329236069528227e-06, "loss": 0.0157, "step": 80830 }, { "epoch": 0.6826117244727787, "grad_norm": 0.11906247586011887, "learning_rate": 8.328686254569815e-06, "loss": 0.0075, "step": 80840 }, { "epoch": 0.6826961643199426, "grad_norm": 0.001377843669615686, "learning_rate": 8.328136367313021e-06, "loss": 0.0128, "step": 80850 }, { "epoch": 0.6827806041671065, "grad_norm": 0.8557945489883423, "learning_rate": 8.327586407769787e-06, "loss": 0.0151, "step": 80860 }, { "epoch": 0.6828650440142703, "grad_norm": 0.5172713398933411, "learning_rate": 8.327036375952059e-06, "loss": 0.0184, "step": 80870 }, { "epoch": 0.6829494838614342, "grad_norm": 0.327362596988678, "learning_rate": 8.326486271871781e-06, "loss": 0.0164, "step": 80880 }, { "epoch": 0.6830339237085981, "grad_norm": 0.8058419823646545, "learning_rate": 8.325936095540905e-06, "loss": 0.0169, "step": 80890 }, { "epoch": 0.6831183635557619, "grad_norm": 0.8968874216079712, "learning_rate": 8.325385846971377e-06, "loss": 0.0165, "step": 80900 }, { "epoch": 0.6832028034029258, "grad_norm": 0.4387376606464386, "learning_rate": 8.32483552617515e-06, "loss": 0.0104, "step": 80910 }, { "epoch": 0.6832872432500897, "grad_norm": 0.36004021763801575, "learning_rate": 8.324285133164179e-06, "loss": 0.0113, "step": 80920 }, { "epoch": 0.6833716830972536, "grad_norm": 0.37322816252708435, "learning_rate": 8.323734667950413e-06, "loss": 0.0152, "step": 80930 }, { "epoch": 0.6834561229444175, "grad_norm": 0.20796340703964233, "learning_rate": 8.323184130545812e-06, "loss": 0.0099, "step": 80940 }, { "epoch": 0.6835405627915814, "grad_norm": 0.5447248816490173, "learning_rate": 8.322633520962333e-06, "loss": 0.0102, "step": 80950 }, { "epoch": 0.6836250026387453, "grad_norm": 0.1900366097688675, "learning_rate": 8.322082839211933e-06, "loss": 0.0156, "step": 80960 }, { "epoch": 0.6837094424859091, "grad_norm": 0.35244253277778625, "learning_rate": 8.321532085306578e-06, "loss": 0.0113, "step": 80970 }, { "epoch": 0.6837938823330729, "grad_norm": 0.29856041073799133, "learning_rate": 8.320981259258223e-06, "loss": 0.0168, "step": 80980 }, { "epoch": 0.6838783221802368, "grad_norm": 0.37204697728157043, "learning_rate": 8.320430361078839e-06, "loss": 0.0111, "step": 80990 }, { "epoch": 0.6839627620274007, "grad_norm": 0.4143785536289215, "learning_rate": 8.319879390780387e-06, "loss": 0.0145, "step": 81000 }, { "epoch": 0.6840472018745646, "grad_norm": 0.11750143766403198, "learning_rate": 8.319328348374832e-06, "loss": 0.0081, "step": 81010 }, { "epoch": 0.6841316417217285, "grad_norm": 0.2654975652694702, "learning_rate": 8.31877723387415e-06, "loss": 0.0128, "step": 81020 }, { "epoch": 0.6842160815688924, "grad_norm": 0.2809276878833771, "learning_rate": 8.318226047290302e-06, "loss": 0.0157, "step": 81030 }, { "epoch": 0.6843005214160562, "grad_norm": 0.22768880426883698, "learning_rate": 8.317674788635268e-06, "loss": 0.0129, "step": 81040 }, { "epoch": 0.6843849612632201, "grad_norm": 0.25823721289634705, "learning_rate": 8.317123457921017e-06, "loss": 0.0069, "step": 81050 }, { "epoch": 0.684469401110384, "grad_norm": 0.49360841512680054, "learning_rate": 8.316572055159523e-06, "loss": 0.0129, "step": 81060 }, { "epoch": 0.6845538409575479, "grad_norm": 0.37242603302001953, "learning_rate": 8.316020580362765e-06, "loss": 0.0109, "step": 81070 }, { "epoch": 0.6846382808047118, "grad_norm": 0.5572257041931152, "learning_rate": 8.315469033542719e-06, "loss": 0.0232, "step": 81080 }, { "epoch": 0.6847227206518757, "grad_norm": 0.5175872445106506, "learning_rate": 8.314917414711363e-06, "loss": 0.009, "step": 81090 }, { "epoch": 0.6848071604990394, "grad_norm": 0.32388174533843994, "learning_rate": 8.314365723880681e-06, "loss": 0.0125, "step": 81100 }, { "epoch": 0.6848916003462033, "grad_norm": 0.5732938647270203, "learning_rate": 8.313813961062656e-06, "loss": 0.0167, "step": 81110 }, { "epoch": 0.6849760401933672, "grad_norm": 0.11477996408939362, "learning_rate": 8.31326212626927e-06, "loss": 0.0147, "step": 81120 }, { "epoch": 0.6850604800405311, "grad_norm": 0.1608123928308487, "learning_rate": 8.312710219512508e-06, "loss": 0.0065, "step": 81130 }, { "epoch": 0.685144919887695, "grad_norm": 0.4372078776359558, "learning_rate": 8.312158240804362e-06, "loss": 0.0129, "step": 81140 }, { "epoch": 0.6852293597348589, "grad_norm": 0.2598971128463745, "learning_rate": 8.311606190156816e-06, "loss": 0.0123, "step": 81150 }, { "epoch": 0.6853137995820228, "grad_norm": 0.8648326992988586, "learning_rate": 8.311054067581864e-06, "loss": 0.0113, "step": 81160 }, { "epoch": 0.6853982394291867, "grad_norm": 0.3805135190486908, "learning_rate": 8.310501873091493e-06, "loss": 0.012, "step": 81170 }, { "epoch": 0.6854826792763505, "grad_norm": 0.11101268231868744, "learning_rate": 8.3099496066977e-06, "loss": 0.016, "step": 81180 }, { "epoch": 0.6855671191235144, "grad_norm": 0.37438684701919556, "learning_rate": 8.309397268412482e-06, "loss": 0.0179, "step": 81190 }, { "epoch": 0.6856515589706783, "grad_norm": 0.7193662524223328, "learning_rate": 8.308844858247832e-06, "loss": 0.0149, "step": 81200 }, { "epoch": 0.6857359988178421, "grad_norm": 0.37905991077423096, "learning_rate": 8.30829237621575e-06, "loss": 0.0095, "step": 81210 }, { "epoch": 0.685820438665006, "grad_norm": 0.5248475074768066, "learning_rate": 8.307739822328235e-06, "loss": 0.0107, "step": 81220 }, { "epoch": 0.6859048785121699, "grad_norm": 0.48133283853530884, "learning_rate": 8.30718719659729e-06, "loss": 0.0216, "step": 81230 }, { "epoch": 0.6859893183593337, "grad_norm": 0.2529524266719818, "learning_rate": 8.306634499034917e-06, "loss": 0.0088, "step": 81240 }, { "epoch": 0.6860737582064976, "grad_norm": 0.5510260462760925, "learning_rate": 8.30608172965312e-06, "loss": 0.0093, "step": 81250 }, { "epoch": 0.6861581980536615, "grad_norm": 0.5588108897209167, "learning_rate": 8.305528888463904e-06, "loss": 0.0114, "step": 81260 }, { "epoch": 0.6862426379008254, "grad_norm": 0.2861756682395935, "learning_rate": 8.304975975479279e-06, "loss": 0.0105, "step": 81270 }, { "epoch": 0.6863270777479893, "grad_norm": 0.347267210483551, "learning_rate": 8.304422990711255e-06, "loss": 0.0089, "step": 81280 }, { "epoch": 0.6864115175951532, "grad_norm": 0.44178083539009094, "learning_rate": 8.303869934171838e-06, "loss": 0.012, "step": 81290 }, { "epoch": 0.6864959574423171, "grad_norm": 0.39140668511390686, "learning_rate": 8.303316805873042e-06, "loss": 0.0139, "step": 81300 }, { "epoch": 0.686580397289481, "grad_norm": 0.3785880506038666, "learning_rate": 8.302763605826885e-06, "loss": 0.0154, "step": 81310 }, { "epoch": 0.6866648371366448, "grad_norm": 0.12616710364818573, "learning_rate": 8.302210334045377e-06, "loss": 0.0101, "step": 81320 }, { "epoch": 0.6867492769838086, "grad_norm": 0.11158664524555206, "learning_rate": 8.301656990540539e-06, "loss": 0.0125, "step": 81330 }, { "epoch": 0.6868337168309725, "grad_norm": 0.7639073133468628, "learning_rate": 8.301103575324387e-06, "loss": 0.0178, "step": 81340 }, { "epoch": 0.6869181566781364, "grad_norm": 0.24194343388080597, "learning_rate": 8.300550088408942e-06, "loss": 0.0106, "step": 81350 }, { "epoch": 0.6870025965253003, "grad_norm": 0.2826792597770691, "learning_rate": 8.299996529806224e-06, "loss": 0.0181, "step": 81360 }, { "epoch": 0.6870870363724642, "grad_norm": 1.1313821077346802, "learning_rate": 8.29944289952826e-06, "loss": 0.0157, "step": 81370 }, { "epoch": 0.687171476219628, "grad_norm": 0.2452223002910614, "learning_rate": 8.29888919758707e-06, "loss": 0.0095, "step": 81380 }, { "epoch": 0.6872559160667919, "grad_norm": 0.2612493336200714, "learning_rate": 8.298335423994685e-06, "loss": 0.0122, "step": 81390 }, { "epoch": 0.6873403559139558, "grad_norm": 0.2836968004703522, "learning_rate": 8.297781578763129e-06, "loss": 0.011, "step": 81400 }, { "epoch": 0.6874247957611197, "grad_norm": 0.4741588532924652, "learning_rate": 8.297227661904432e-06, "loss": 0.0087, "step": 81410 }, { "epoch": 0.6875092356082836, "grad_norm": 0.5427370667457581, "learning_rate": 8.296673673430628e-06, "loss": 0.0153, "step": 81420 }, { "epoch": 0.6875936754554475, "grad_norm": 0.5738050937652588, "learning_rate": 8.296119613353745e-06, "loss": 0.0138, "step": 81430 }, { "epoch": 0.6876781153026112, "grad_norm": 0.03977671638131142, "learning_rate": 8.29556548168582e-06, "loss": 0.0159, "step": 81440 }, { "epoch": 0.6877625551497751, "grad_norm": 0.5444974303245544, "learning_rate": 8.29501127843889e-06, "loss": 0.0247, "step": 81450 }, { "epoch": 0.687846994996939, "grad_norm": 0.6328869462013245, "learning_rate": 8.29445700362499e-06, "loss": 0.0169, "step": 81460 }, { "epoch": 0.6879314348441029, "grad_norm": 0.3233468532562256, "learning_rate": 8.293902657256158e-06, "loss": 0.0128, "step": 81470 }, { "epoch": 0.6880158746912668, "grad_norm": 0.6690241098403931, "learning_rate": 8.293348239344435e-06, "loss": 0.0086, "step": 81480 }, { "epoch": 0.6881003145384307, "grad_norm": 0.7585324048995972, "learning_rate": 8.292793749901864e-06, "loss": 0.0126, "step": 81490 }, { "epoch": 0.6881847543855946, "grad_norm": 0.404866099357605, "learning_rate": 8.292239188940486e-06, "loss": 0.0078, "step": 81500 }, { "epoch": 0.6882691942327585, "grad_norm": 0.28257104754447937, "learning_rate": 8.291684556472349e-06, "loss": 0.0142, "step": 81510 }, { "epoch": 0.6883536340799223, "grad_norm": 0.5108746290206909, "learning_rate": 8.291129852509497e-06, "loss": 0.018, "step": 81520 }, { "epoch": 0.6884380739270862, "grad_norm": 0.302493691444397, "learning_rate": 8.29057507706398e-06, "loss": 0.0073, "step": 81530 }, { "epoch": 0.6885225137742501, "grad_norm": 0.12852880358695984, "learning_rate": 8.290020230147846e-06, "loss": 0.0068, "step": 81540 }, { "epoch": 0.688606953621414, "grad_norm": 0.9269773960113525, "learning_rate": 8.289465311773146e-06, "loss": 0.0132, "step": 81550 }, { "epoch": 0.6886913934685778, "grad_norm": 0.5364383459091187, "learning_rate": 8.288910321951935e-06, "loss": 0.0104, "step": 81560 }, { "epoch": 0.6887758333157417, "grad_norm": 0.45675161480903625, "learning_rate": 8.288355260696265e-06, "loss": 0.0121, "step": 81570 }, { "epoch": 0.6888602731629055, "grad_norm": 0.7742226123809814, "learning_rate": 8.287800128018194e-06, "loss": 0.0141, "step": 81580 }, { "epoch": 0.6889447130100694, "grad_norm": 0.27632448077201843, "learning_rate": 8.287244923929774e-06, "loss": 0.0114, "step": 81590 }, { "epoch": 0.6890291528572333, "grad_norm": 0.7916613221168518, "learning_rate": 8.286689648443072e-06, "loss": 0.0162, "step": 81600 }, { "epoch": 0.6891135927043972, "grad_norm": 0.398217111825943, "learning_rate": 8.286134301570143e-06, "loss": 0.0124, "step": 81610 }, { "epoch": 0.6891980325515611, "grad_norm": 0.22585897147655487, "learning_rate": 8.28557888332305e-06, "loss": 0.018, "step": 81620 }, { "epoch": 0.689282472398725, "grad_norm": 0.5867149233818054, "learning_rate": 8.285023393713857e-06, "loss": 0.0179, "step": 81630 }, { "epoch": 0.6893669122458889, "grad_norm": 0.38628679513931274, "learning_rate": 8.28446783275463e-06, "loss": 0.0096, "step": 81640 }, { "epoch": 0.6894513520930527, "grad_norm": 0.15807007253170013, "learning_rate": 8.283912200457433e-06, "loss": 0.0211, "step": 81650 }, { "epoch": 0.6895357919402166, "grad_norm": 0.440744012594223, "learning_rate": 8.283356496834337e-06, "loss": 0.013, "step": 81660 }, { "epoch": 0.6896202317873804, "grad_norm": 0.46229755878448486, "learning_rate": 8.28280072189741e-06, "loss": 0.0098, "step": 81670 }, { "epoch": 0.6897046716345443, "grad_norm": 0.8123787641525269, "learning_rate": 8.282244875658724e-06, "loss": 0.0211, "step": 81680 }, { "epoch": 0.6897891114817082, "grad_norm": 0.5006250739097595, "learning_rate": 8.281688958130354e-06, "loss": 0.0097, "step": 81690 }, { "epoch": 0.6898735513288721, "grad_norm": 0.4843263626098633, "learning_rate": 8.281132969324371e-06, "loss": 0.0154, "step": 81700 }, { "epoch": 0.689957991176036, "grad_norm": 0.23676012456417084, "learning_rate": 8.280576909252851e-06, "loss": 0.0132, "step": 81710 }, { "epoch": 0.6900424310231998, "grad_norm": 0.28496766090393066, "learning_rate": 8.280020777927874e-06, "loss": 0.0123, "step": 81720 }, { "epoch": 0.6901268708703637, "grad_norm": 0.3728136122226715, "learning_rate": 8.279464575361518e-06, "loss": 0.0106, "step": 81730 }, { "epoch": 0.6902113107175276, "grad_norm": 0.7834615111351013, "learning_rate": 8.278908301565862e-06, "loss": 0.013, "step": 81740 }, { "epoch": 0.6902957505646915, "grad_norm": 0.21685394644737244, "learning_rate": 8.278351956552992e-06, "loss": 0.0136, "step": 81750 }, { "epoch": 0.6903801904118554, "grad_norm": 0.3625652492046356, "learning_rate": 8.277795540334989e-06, "loss": 0.0109, "step": 81760 }, { "epoch": 0.6904646302590193, "grad_norm": 0.27493950724601746, "learning_rate": 8.277239052923938e-06, "loss": 0.0088, "step": 81770 }, { "epoch": 0.6905490701061832, "grad_norm": 0.45999959111213684, "learning_rate": 8.276682494331925e-06, "loss": 0.0121, "step": 81780 }, { "epoch": 0.6906335099533469, "grad_norm": 0.5727910995483398, "learning_rate": 8.276125864571041e-06, "loss": 0.012, "step": 81790 }, { "epoch": 0.6907179498005108, "grad_norm": 0.45561540126800537, "learning_rate": 8.275569163653373e-06, "loss": 0.0127, "step": 81800 }, { "epoch": 0.6908023896476747, "grad_norm": 0.5015525817871094, "learning_rate": 8.275012391591015e-06, "loss": 0.013, "step": 81810 }, { "epoch": 0.6908868294948386, "grad_norm": 0.1616978496313095, "learning_rate": 8.27445554839606e-06, "loss": 0.0111, "step": 81820 }, { "epoch": 0.6909712693420025, "grad_norm": 0.5977077484130859, "learning_rate": 8.2738986340806e-06, "loss": 0.0174, "step": 81830 }, { "epoch": 0.6910557091891664, "grad_norm": 0.3485397696495056, "learning_rate": 8.273341648656735e-06, "loss": 0.0179, "step": 81840 }, { "epoch": 0.6911401490363303, "grad_norm": 0.21537329256534576, "learning_rate": 8.272784592136557e-06, "loss": 0.0211, "step": 81850 }, { "epoch": 0.6912245888834941, "grad_norm": 0.07201245427131653, "learning_rate": 8.27222746453217e-06, "loss": 0.0084, "step": 81860 }, { "epoch": 0.691309028730658, "grad_norm": 0.1147824376821518, "learning_rate": 8.271670265855673e-06, "loss": 0.009, "step": 81870 }, { "epoch": 0.6913934685778219, "grad_norm": 0.22492825984954834, "learning_rate": 8.271112996119168e-06, "loss": 0.0146, "step": 81880 }, { "epoch": 0.6914779084249858, "grad_norm": 0.3183732032775879, "learning_rate": 8.270555655334758e-06, "loss": 0.0104, "step": 81890 }, { "epoch": 0.6915623482721496, "grad_norm": 0.30781206488609314, "learning_rate": 8.26999824351455e-06, "loss": 0.0096, "step": 81900 }, { "epoch": 0.6916467881193135, "grad_norm": 0.2264307290315628, "learning_rate": 8.269440760670652e-06, "loss": 0.0133, "step": 81910 }, { "epoch": 0.6917312279664773, "grad_norm": 0.6992603540420532, "learning_rate": 8.268883206815168e-06, "loss": 0.0111, "step": 81920 }, { "epoch": 0.6918156678136412, "grad_norm": 0.6212215423583984, "learning_rate": 8.268325581960212e-06, "loss": 0.0104, "step": 81930 }, { "epoch": 0.6919001076608051, "grad_norm": 0.37534764409065247, "learning_rate": 8.267767886117894e-06, "loss": 0.0121, "step": 81940 }, { "epoch": 0.691984547507969, "grad_norm": 0.43668967485427856, "learning_rate": 8.267210119300327e-06, "loss": 0.0149, "step": 81950 }, { "epoch": 0.6920689873551329, "grad_norm": 0.45219865441322327, "learning_rate": 8.266652281519627e-06, "loss": 0.0076, "step": 81960 }, { "epoch": 0.6921534272022968, "grad_norm": 0.3854905366897583, "learning_rate": 8.266094372787907e-06, "loss": 0.0161, "step": 81970 }, { "epoch": 0.6922378670494607, "grad_norm": 0.10095866769552231, "learning_rate": 8.265536393117289e-06, "loss": 0.0058, "step": 81980 }, { "epoch": 0.6923223068966246, "grad_norm": 0.1707325130701065, "learning_rate": 8.264978342519888e-06, "loss": 0.0101, "step": 81990 }, { "epoch": 0.6924067467437884, "grad_norm": 0.2592475712299347, "learning_rate": 8.264420221007824e-06, "loss": 0.01, "step": 82000 }, { "epoch": 0.6924911865909523, "grad_norm": 0.43444278836250305, "learning_rate": 8.263862028593225e-06, "loss": 0.0098, "step": 82010 }, { "epoch": 0.6925756264381161, "grad_norm": 0.437033474445343, "learning_rate": 8.26330376528821e-06, "loss": 0.0071, "step": 82020 }, { "epoch": 0.69266006628528, "grad_norm": 0.3148089647293091, "learning_rate": 8.262745431104907e-06, "loss": 0.0149, "step": 82030 }, { "epoch": 0.6927445061324439, "grad_norm": 0.1953108012676239, "learning_rate": 8.262187026055441e-06, "loss": 0.0118, "step": 82040 }, { "epoch": 0.6928289459796078, "grad_norm": 0.775871753692627, "learning_rate": 8.261628550151942e-06, "loss": 0.0134, "step": 82050 }, { "epoch": 0.6929133858267716, "grad_norm": 0.25844016671180725, "learning_rate": 8.261070003406539e-06, "loss": 0.0153, "step": 82060 }, { "epoch": 0.6929978256739355, "grad_norm": 0.4106104075908661, "learning_rate": 8.260511385831363e-06, "loss": 0.0129, "step": 82070 }, { "epoch": 0.6930822655210994, "grad_norm": 0.415802925825119, "learning_rate": 8.259952697438549e-06, "loss": 0.0108, "step": 82080 }, { "epoch": 0.6931667053682633, "grad_norm": 0.042692944407463074, "learning_rate": 8.25939393824023e-06, "loss": 0.0057, "step": 82090 }, { "epoch": 0.6932511452154272, "grad_norm": 0.3326548635959625, "learning_rate": 8.258835108248543e-06, "loss": 0.0077, "step": 82100 }, { "epoch": 0.6933355850625911, "grad_norm": 0.2789020836353302, "learning_rate": 8.258276207475626e-06, "loss": 0.0093, "step": 82110 }, { "epoch": 0.693420024909755, "grad_norm": 0.07581235468387604, "learning_rate": 8.257717235933616e-06, "loss": 0.0121, "step": 82120 }, { "epoch": 0.6935044647569187, "grad_norm": 0.1622704118490219, "learning_rate": 8.257158193634655e-06, "loss": 0.0087, "step": 82130 }, { "epoch": 0.6935889046040826, "grad_norm": 0.2982847988605499, "learning_rate": 8.256599080590886e-06, "loss": 0.0087, "step": 82140 }, { "epoch": 0.6936733444512465, "grad_norm": 0.35151511430740356, "learning_rate": 8.256039896814451e-06, "loss": 0.0106, "step": 82150 }, { "epoch": 0.6937577842984104, "grad_norm": 0.2306014448404312, "learning_rate": 8.255480642317499e-06, "loss": 0.0096, "step": 82160 }, { "epoch": 0.6938422241455743, "grad_norm": 0.01481581386178732, "learning_rate": 8.254921317112173e-06, "loss": 0.0084, "step": 82170 }, { "epoch": 0.6939266639927382, "grad_norm": 0.22065502405166626, "learning_rate": 8.254361921210623e-06, "loss": 0.0133, "step": 82180 }, { "epoch": 0.694011103839902, "grad_norm": 0.33641403913497925, "learning_rate": 8.253802454625001e-06, "loss": 0.0105, "step": 82190 }, { "epoch": 0.6940955436870659, "grad_norm": 0.6908592581748962, "learning_rate": 8.253242917367454e-06, "loss": 0.0198, "step": 82200 }, { "epoch": 0.6941799835342298, "grad_norm": 0.5821762681007385, "learning_rate": 8.252683309450138e-06, "loss": 0.0114, "step": 82210 }, { "epoch": 0.6942644233813937, "grad_norm": 0.6856831908226013, "learning_rate": 8.25212363088521e-06, "loss": 0.0101, "step": 82220 }, { "epoch": 0.6943488632285576, "grad_norm": 0.07396586984395981, "learning_rate": 8.251563881684819e-06, "loss": 0.009, "step": 82230 }, { "epoch": 0.6944333030757215, "grad_norm": 0.3565175235271454, "learning_rate": 8.251004061861129e-06, "loss": 0.0111, "step": 82240 }, { "epoch": 0.6945177429228853, "grad_norm": 0.3179433047771454, "learning_rate": 8.250444171426296e-06, "loss": 0.0087, "step": 82250 }, { "epoch": 0.6946021827700491, "grad_norm": 0.27272024750709534, "learning_rate": 8.249884210392483e-06, "loss": 0.0096, "step": 82260 }, { "epoch": 0.694686622617213, "grad_norm": 0.14016321301460266, "learning_rate": 8.24932417877185e-06, "loss": 0.0076, "step": 82270 }, { "epoch": 0.6947710624643769, "grad_norm": 0.20798540115356445, "learning_rate": 8.24876407657656e-06, "loss": 0.0074, "step": 82280 }, { "epoch": 0.6948555023115408, "grad_norm": 0.45511555671691895, "learning_rate": 8.248203903818783e-06, "loss": 0.0107, "step": 82290 }, { "epoch": 0.6949399421587047, "grad_norm": 0.35830721259117126, "learning_rate": 8.24764366051068e-06, "loss": 0.0204, "step": 82300 }, { "epoch": 0.6950243820058686, "grad_norm": 0.8804491758346558, "learning_rate": 8.247083346664423e-06, "loss": 0.012, "step": 82310 }, { "epoch": 0.6951088218530325, "grad_norm": 0.37420979142189026, "learning_rate": 8.246522962292183e-06, "loss": 0.0132, "step": 82320 }, { "epoch": 0.6951932617001964, "grad_norm": 0.4986061751842499, "learning_rate": 8.245962507406129e-06, "loss": 0.0157, "step": 82330 }, { "epoch": 0.6952777015473602, "grad_norm": 0.3633858859539032, "learning_rate": 8.245401982018434e-06, "loss": 0.0098, "step": 82340 }, { "epoch": 0.6953621413945241, "grad_norm": 1.6147644519805908, "learning_rate": 8.244841386141273e-06, "loss": 0.0173, "step": 82350 }, { "epoch": 0.6954465812416879, "grad_norm": 0.19656550884246826, "learning_rate": 8.244280719786822e-06, "loss": 0.0148, "step": 82360 }, { "epoch": 0.6955310210888518, "grad_norm": 0.5395141839981079, "learning_rate": 8.243719982967257e-06, "loss": 0.0166, "step": 82370 }, { "epoch": 0.6956154609360157, "grad_norm": 0.22044815123081207, "learning_rate": 8.243159175694761e-06, "loss": 0.0111, "step": 82380 }, { "epoch": 0.6956999007831796, "grad_norm": 0.2569373846054077, "learning_rate": 8.242598297981513e-06, "loss": 0.01, "step": 82390 }, { "epoch": 0.6957843406303434, "grad_norm": 0.6875870227813721, "learning_rate": 8.24203734983969e-06, "loss": 0.0105, "step": 82400 }, { "epoch": 0.6958687804775073, "grad_norm": 0.141846165060997, "learning_rate": 8.241476331281484e-06, "loss": 0.0123, "step": 82410 }, { "epoch": 0.6959532203246712, "grad_norm": 0.22322174906730652, "learning_rate": 8.240915242319076e-06, "loss": 0.0172, "step": 82420 }, { "epoch": 0.6960376601718351, "grad_norm": 0.35694533586502075, "learning_rate": 8.240354082964652e-06, "loss": 0.0107, "step": 82430 }, { "epoch": 0.696122100018999, "grad_norm": 0.38569432497024536, "learning_rate": 8.239792853230401e-06, "loss": 0.0083, "step": 82440 }, { "epoch": 0.6962065398661629, "grad_norm": 0.38877731561660767, "learning_rate": 8.239231553128514e-06, "loss": 0.019, "step": 82450 }, { "epoch": 0.6962909797133268, "grad_norm": 0.39248815178871155, "learning_rate": 8.238670182671182e-06, "loss": 0.0106, "step": 82460 }, { "epoch": 0.6963754195604906, "grad_norm": 0.17072869837284088, "learning_rate": 8.238108741870597e-06, "loss": 0.0053, "step": 82470 }, { "epoch": 0.6964598594076544, "grad_norm": 0.3793832063674927, "learning_rate": 8.237547230738953e-06, "loss": 0.0085, "step": 82480 }, { "epoch": 0.6965442992548183, "grad_norm": 0.30555230379104614, "learning_rate": 8.236985649288445e-06, "loss": 0.0111, "step": 82490 }, { "epoch": 0.6966287391019822, "grad_norm": 0.16501961648464203, "learning_rate": 8.236423997531275e-06, "loss": 0.0065, "step": 82500 }, { "epoch": 0.6967131789491461, "grad_norm": 0.23682041466236115, "learning_rate": 8.235862275479637e-06, "loss": 0.0198, "step": 82510 }, { "epoch": 0.69679761879631, "grad_norm": 0.5855555534362793, "learning_rate": 8.235300483145735e-06, "loss": 0.014, "step": 82520 }, { "epoch": 0.6968820586434739, "grad_norm": 0.45543205738067627, "learning_rate": 8.234738620541768e-06, "loss": 0.0069, "step": 82530 }, { "epoch": 0.6969664984906377, "grad_norm": 0.18936298787593842, "learning_rate": 8.234176687679942e-06, "loss": 0.0196, "step": 82540 }, { "epoch": 0.6970509383378016, "grad_norm": 0.36618098616600037, "learning_rate": 8.233614684572462e-06, "loss": 0.0114, "step": 82550 }, { "epoch": 0.6971353781849655, "grad_norm": 0.21113523840904236, "learning_rate": 8.23305261123153e-06, "loss": 0.0083, "step": 82560 }, { "epoch": 0.6972198180321294, "grad_norm": 0.23953203856945038, "learning_rate": 8.232490467669362e-06, "loss": 0.0093, "step": 82570 }, { "epoch": 0.6973042578792933, "grad_norm": 0.4679543972015381, "learning_rate": 8.231928253898163e-06, "loss": 0.0186, "step": 82580 }, { "epoch": 0.6973886977264571, "grad_norm": 0.21340209245681763, "learning_rate": 8.231365969930143e-06, "loss": 0.0107, "step": 82590 }, { "epoch": 0.697473137573621, "grad_norm": 0.2649042010307312, "learning_rate": 8.230803615777515e-06, "loss": 0.0107, "step": 82600 }, { "epoch": 0.6975575774207848, "grad_norm": 0.32747310400009155, "learning_rate": 8.230241191452497e-06, "loss": 0.0085, "step": 82610 }, { "epoch": 0.6976420172679487, "grad_norm": 1.1358600854873657, "learning_rate": 8.229678696967303e-06, "loss": 0.0165, "step": 82620 }, { "epoch": 0.6977264571151126, "grad_norm": 0.1527033895254135, "learning_rate": 8.229116132334147e-06, "loss": 0.0105, "step": 82630 }, { "epoch": 0.6978108969622765, "grad_norm": 0.7334040999412537, "learning_rate": 8.228553497565252e-06, "loss": 0.0215, "step": 82640 }, { "epoch": 0.6978953368094404, "grad_norm": 0.34287235140800476, "learning_rate": 8.227990792672835e-06, "loss": 0.0124, "step": 82650 }, { "epoch": 0.6979797766566043, "grad_norm": 0.46014145016670227, "learning_rate": 8.227428017669121e-06, "loss": 0.015, "step": 82660 }, { "epoch": 0.6980642165037682, "grad_norm": 0.1029108539223671, "learning_rate": 8.226865172566332e-06, "loss": 0.0182, "step": 82670 }, { "epoch": 0.698148656350932, "grad_norm": 0.17463034391403198, "learning_rate": 8.22630225737669e-06, "loss": 0.009, "step": 82680 }, { "epoch": 0.6982330961980959, "grad_norm": 0.24135719239711761, "learning_rate": 8.225739272112428e-06, "loss": 0.0123, "step": 82690 }, { "epoch": 0.6983175360452598, "grad_norm": 0.2637327313423157, "learning_rate": 8.225176216785768e-06, "loss": 0.0144, "step": 82700 }, { "epoch": 0.6984019758924236, "grad_norm": 0.33746635913848877, "learning_rate": 8.224613091408941e-06, "loss": 0.0168, "step": 82710 }, { "epoch": 0.6984864157395875, "grad_norm": 0.06091269850730896, "learning_rate": 8.22404989599418e-06, "loss": 0.0114, "step": 82720 }, { "epoch": 0.6985708555867514, "grad_norm": 0.1346234530210495, "learning_rate": 8.223486630553715e-06, "loss": 0.0064, "step": 82730 }, { "epoch": 0.6986552954339152, "grad_norm": 0.43322300910949707, "learning_rate": 8.22292329509978e-06, "loss": 0.0109, "step": 82740 }, { "epoch": 0.6987397352810791, "grad_norm": 0.22799694538116455, "learning_rate": 8.22235988964461e-06, "loss": 0.0088, "step": 82750 }, { "epoch": 0.698824175128243, "grad_norm": 0.21376873552799225, "learning_rate": 8.221796414200448e-06, "loss": 0.0131, "step": 82760 }, { "epoch": 0.6989086149754069, "grad_norm": 0.30416539311408997, "learning_rate": 8.221232868779525e-06, "loss": 0.017, "step": 82770 }, { "epoch": 0.6989930548225708, "grad_norm": 0.46155041456222534, "learning_rate": 8.220669253394086e-06, "loss": 0.0115, "step": 82780 }, { "epoch": 0.6990774946697347, "grad_norm": 0.26636701822280884, "learning_rate": 8.22010556805637e-06, "loss": 0.01, "step": 82790 }, { "epoch": 0.6991619345168986, "grad_norm": 0.5067287087440491, "learning_rate": 8.21954181277862e-06, "loss": 0.0076, "step": 82800 }, { "epoch": 0.6992463743640625, "grad_norm": 0.2428322732448578, "learning_rate": 8.218977987573083e-06, "loss": 0.0161, "step": 82810 }, { "epoch": 0.6993308142112262, "grad_norm": 0.6902730464935303, "learning_rate": 8.218414092452004e-06, "loss": 0.0127, "step": 82820 }, { "epoch": 0.6994152540583901, "grad_norm": 0.5288575291633606, "learning_rate": 8.21785012742763e-06, "loss": 0.0167, "step": 82830 }, { "epoch": 0.699499693905554, "grad_norm": 0.3098944425582886, "learning_rate": 8.217286092512208e-06, "loss": 0.0092, "step": 82840 }, { "epoch": 0.6995841337527179, "grad_norm": 0.143926739692688, "learning_rate": 8.216721987717994e-06, "loss": 0.009, "step": 82850 }, { "epoch": 0.6996685735998818, "grad_norm": 0.21679648756980896, "learning_rate": 8.216157813057238e-06, "loss": 0.0086, "step": 82860 }, { "epoch": 0.6997530134470457, "grad_norm": 0.7128202319145203, "learning_rate": 8.215593568542193e-06, "loss": 0.0137, "step": 82870 }, { "epoch": 0.6998374532942095, "grad_norm": 0.506156325340271, "learning_rate": 8.215029254185114e-06, "loss": 0.017, "step": 82880 }, { "epoch": 0.6999218931413734, "grad_norm": 0.14952436089515686, "learning_rate": 8.214464869998257e-06, "loss": 0.0174, "step": 82890 }, { "epoch": 0.7000063329885373, "grad_norm": 0.6076602339744568, "learning_rate": 8.213900415993885e-06, "loss": 0.0113, "step": 82900 }, { "epoch": 0.7000907728357012, "grad_norm": 0.6217253804206848, "learning_rate": 8.213335892184252e-06, "loss": 0.0161, "step": 82910 }, { "epoch": 0.7001752126828651, "grad_norm": 0.366942822933197, "learning_rate": 8.212771298581624e-06, "loss": 0.0117, "step": 82920 }, { "epoch": 0.700259652530029, "grad_norm": 0.5962817072868347, "learning_rate": 8.21220663519826e-06, "loss": 0.0159, "step": 82930 }, { "epoch": 0.7003440923771928, "grad_norm": 0.7036097049713135, "learning_rate": 8.211641902046426e-06, "loss": 0.0134, "step": 82940 }, { "epoch": 0.7004285322243566, "grad_norm": 0.4178280532360077, "learning_rate": 8.21107709913839e-06, "loss": 0.0128, "step": 82950 }, { "epoch": 0.7005129720715205, "grad_norm": 0.1689881533384323, "learning_rate": 8.210512226486415e-06, "loss": 0.0086, "step": 82960 }, { "epoch": 0.7005974119186844, "grad_norm": 0.6960498690605164, "learning_rate": 8.209947284102773e-06, "loss": 0.0086, "step": 82970 }, { "epoch": 0.7006818517658483, "grad_norm": 0.22942577302455902, "learning_rate": 8.209382271999735e-06, "loss": 0.0084, "step": 82980 }, { "epoch": 0.7007662916130122, "grad_norm": 0.22647641599178314, "learning_rate": 8.208817190189571e-06, "loss": 0.0094, "step": 82990 }, { "epoch": 0.7008507314601761, "grad_norm": 0.15429970622062683, "learning_rate": 8.208252038684555e-06, "loss": 0.0098, "step": 83000 }, { "epoch": 0.70093517130734, "grad_norm": 0.3817034959793091, "learning_rate": 8.207686817496962e-06, "loss": 0.0081, "step": 83010 }, { "epoch": 0.7010196111545038, "grad_norm": 0.09679494053125381, "learning_rate": 8.207121526639068e-06, "loss": 0.0267, "step": 83020 }, { "epoch": 0.7011040510016677, "grad_norm": 0.22580809891223907, "learning_rate": 8.206556166123153e-06, "loss": 0.0121, "step": 83030 }, { "epoch": 0.7011884908488316, "grad_norm": 0.08916079252958298, "learning_rate": 8.205990735961495e-06, "loss": 0.0104, "step": 83040 }, { "epoch": 0.7012729306959954, "grad_norm": 0.6605040431022644, "learning_rate": 8.205425236166375e-06, "loss": 0.0271, "step": 83050 }, { "epoch": 0.7013573705431593, "grad_norm": 0.11977535486221313, "learning_rate": 8.204859666750076e-06, "loss": 0.0087, "step": 83060 }, { "epoch": 0.7014418103903232, "grad_norm": 0.2919471561908722, "learning_rate": 8.20429402772488e-06, "loss": 0.013, "step": 83070 }, { "epoch": 0.701526250237487, "grad_norm": 0.466319739818573, "learning_rate": 8.203728319103077e-06, "loss": 0.005, "step": 83080 }, { "epoch": 0.7016106900846509, "grad_norm": 0.2739589810371399, "learning_rate": 8.20316254089695e-06, "loss": 0.0108, "step": 83090 }, { "epoch": 0.7016951299318148, "grad_norm": 0.4583062529563904, "learning_rate": 8.20259669311879e-06, "loss": 0.0124, "step": 83100 }, { "epoch": 0.7017795697789787, "grad_norm": 0.5849690437316895, "learning_rate": 8.202030775780884e-06, "loss": 0.0104, "step": 83110 }, { "epoch": 0.7018640096261426, "grad_norm": 1.0883005857467651, "learning_rate": 8.201464788895527e-06, "loss": 0.0128, "step": 83120 }, { "epoch": 0.7019484494733065, "grad_norm": 0.6845837831497192, "learning_rate": 8.20089873247501e-06, "loss": 0.0181, "step": 83130 }, { "epoch": 0.7020328893204704, "grad_norm": 0.4184657037258148, "learning_rate": 8.20033260653163e-06, "loss": 0.0118, "step": 83140 }, { "epoch": 0.7021173291676343, "grad_norm": 0.21888132393360138, "learning_rate": 8.19976641107768e-06, "loss": 0.0117, "step": 83150 }, { "epoch": 0.7022017690147981, "grad_norm": 0.228291317820549, "learning_rate": 8.19920014612546e-06, "loss": 0.0139, "step": 83160 }, { "epoch": 0.7022862088619619, "grad_norm": 0.10714560747146606, "learning_rate": 8.198633811687269e-06, "loss": 0.0132, "step": 83170 }, { "epoch": 0.7023706487091258, "grad_norm": 0.06846246123313904, "learning_rate": 8.198067407775407e-06, "loss": 0.0108, "step": 83180 }, { "epoch": 0.7024550885562897, "grad_norm": 0.5158481001853943, "learning_rate": 8.197500934402176e-06, "loss": 0.0136, "step": 83190 }, { "epoch": 0.7025395284034536, "grad_norm": 0.394161194562912, "learning_rate": 8.196934391579878e-06, "loss": 0.0167, "step": 83200 }, { "epoch": 0.7026239682506175, "grad_norm": 0.28419041633605957, "learning_rate": 8.196367779320823e-06, "loss": 0.0103, "step": 83210 }, { "epoch": 0.7027084080977813, "grad_norm": 0.2014034241437912, "learning_rate": 8.195801097637313e-06, "loss": 0.01, "step": 83220 }, { "epoch": 0.7027928479449452, "grad_norm": 0.4787655174732208, "learning_rate": 8.195234346541659e-06, "loss": 0.0134, "step": 83230 }, { "epoch": 0.7028772877921091, "grad_norm": 0.5929791331291199, "learning_rate": 8.194667526046169e-06, "loss": 0.0114, "step": 83240 }, { "epoch": 0.702961727639273, "grad_norm": 0.3691098392009735, "learning_rate": 8.194100636163157e-06, "loss": 0.0097, "step": 83250 }, { "epoch": 0.7030461674864369, "grad_norm": 0.31357499957084656, "learning_rate": 8.19353367690493e-06, "loss": 0.014, "step": 83260 }, { "epoch": 0.7031306073336008, "grad_norm": 0.7610851526260376, "learning_rate": 8.192966648283808e-06, "loss": 0.0161, "step": 83270 }, { "epoch": 0.7032150471807646, "grad_norm": 0.7463268637657166, "learning_rate": 8.192399550312107e-06, "loss": 0.0221, "step": 83280 }, { "epoch": 0.7032994870279284, "grad_norm": 0.36344975233078003, "learning_rate": 8.19183238300214e-06, "loss": 0.0068, "step": 83290 }, { "epoch": 0.7033839268750923, "grad_norm": 0.05898697301745415, "learning_rate": 8.191265146366228e-06, "loss": 0.0167, "step": 83300 }, { "epoch": 0.7034683667222562, "grad_norm": 1.2638601064682007, "learning_rate": 8.19069784041669e-06, "loss": 0.0244, "step": 83310 }, { "epoch": 0.7035528065694201, "grad_norm": 0.8486001491546631, "learning_rate": 8.190130465165848e-06, "loss": 0.0176, "step": 83320 }, { "epoch": 0.703637246416584, "grad_norm": 0.25006797909736633, "learning_rate": 8.189563020626027e-06, "loss": 0.0082, "step": 83330 }, { "epoch": 0.7037216862637479, "grad_norm": 0.12264701724052429, "learning_rate": 8.188995506809552e-06, "loss": 0.0066, "step": 83340 }, { "epoch": 0.7038061261109118, "grad_norm": 0.5007245540618896, "learning_rate": 8.188427923728745e-06, "loss": 0.0119, "step": 83350 }, { "epoch": 0.7038905659580756, "grad_norm": 0.22992457449436188, "learning_rate": 8.18786027139594e-06, "loss": 0.0094, "step": 83360 }, { "epoch": 0.7039750058052395, "grad_norm": 1.2995439767837524, "learning_rate": 8.187292549823462e-06, "loss": 0.0152, "step": 83370 }, { "epoch": 0.7040594456524034, "grad_norm": 0.36813637614250183, "learning_rate": 8.186724759023644e-06, "loss": 0.0117, "step": 83380 }, { "epoch": 0.7041438854995672, "grad_norm": 0.3500734865665436, "learning_rate": 8.186156899008816e-06, "loss": 0.0145, "step": 83390 }, { "epoch": 0.7042283253467311, "grad_norm": 0.15268079936504364, "learning_rate": 8.185588969791314e-06, "loss": 0.0158, "step": 83400 }, { "epoch": 0.704312765193895, "grad_norm": 0.2809697389602661, "learning_rate": 8.18502097138347e-06, "loss": 0.0099, "step": 83410 }, { "epoch": 0.7043972050410588, "grad_norm": 0.37860026955604553, "learning_rate": 8.184452903797626e-06, "loss": 0.0102, "step": 83420 }, { "epoch": 0.7044816448882227, "grad_norm": 0.19709542393684387, "learning_rate": 8.183884767046118e-06, "loss": 0.0118, "step": 83430 }, { "epoch": 0.7045660847353866, "grad_norm": 0.25057339668273926, "learning_rate": 8.183316561141283e-06, "loss": 0.0147, "step": 83440 }, { "epoch": 0.7046505245825505, "grad_norm": 0.33047544956207275, "learning_rate": 8.182748286095466e-06, "loss": 0.0085, "step": 83450 }, { "epoch": 0.7047349644297144, "grad_norm": 0.3343786597251892, "learning_rate": 8.182179941921007e-06, "loss": 0.0128, "step": 83460 }, { "epoch": 0.7048194042768783, "grad_norm": 0.3796599209308624, "learning_rate": 8.181611528630252e-06, "loss": 0.0089, "step": 83470 }, { "epoch": 0.7049038441240422, "grad_norm": 0.5666306018829346, "learning_rate": 8.181043046235547e-06, "loss": 0.0154, "step": 83480 }, { "epoch": 0.704988283971206, "grad_norm": 0.2718229591846466, "learning_rate": 8.180474494749239e-06, "loss": 0.0132, "step": 83490 }, { "epoch": 0.7050727238183699, "grad_norm": 0.40368977189064026, "learning_rate": 8.179905874183675e-06, "loss": 0.0202, "step": 83500 }, { "epoch": 0.7051571636655337, "grad_norm": 0.0383659228682518, "learning_rate": 8.179337184551211e-06, "loss": 0.0176, "step": 83510 }, { "epoch": 0.7052416035126976, "grad_norm": 0.025355186313390732, "learning_rate": 8.178768425864193e-06, "loss": 0.0186, "step": 83520 }, { "epoch": 0.7053260433598615, "grad_norm": 0.1904257982969284, "learning_rate": 8.178199598134975e-06, "loss": 0.008, "step": 83530 }, { "epoch": 0.7054104832070254, "grad_norm": 0.5137133002281189, "learning_rate": 8.177630701375913e-06, "loss": 0.0119, "step": 83540 }, { "epoch": 0.7054949230541893, "grad_norm": 0.121900275349617, "learning_rate": 8.177061735599365e-06, "loss": 0.0108, "step": 83550 }, { "epoch": 0.7055793629013531, "grad_norm": 0.3265255391597748, "learning_rate": 8.176492700817686e-06, "loss": 0.0154, "step": 83560 }, { "epoch": 0.705663802748517, "grad_norm": 0.5643553137779236, "learning_rate": 8.175923597043237e-06, "loss": 0.0149, "step": 83570 }, { "epoch": 0.7057482425956809, "grad_norm": 0.11098714172840118, "learning_rate": 8.175354424288377e-06, "loss": 0.0105, "step": 83580 }, { "epoch": 0.7058326824428448, "grad_norm": 0.32909369468688965, "learning_rate": 8.17478518256547e-06, "loss": 0.0091, "step": 83590 }, { "epoch": 0.7059171222900087, "grad_norm": 0.35154345631599426, "learning_rate": 8.174215871886881e-06, "loss": 0.0105, "step": 83600 }, { "epoch": 0.7060015621371726, "grad_norm": 0.06954474002122879, "learning_rate": 8.173646492264973e-06, "loss": 0.0112, "step": 83610 }, { "epoch": 0.7060860019843364, "grad_norm": 0.37947893142700195, "learning_rate": 8.173077043712113e-06, "loss": 0.0103, "step": 83620 }, { "epoch": 0.7061704418315002, "grad_norm": 0.0023438120260834694, "learning_rate": 8.17250752624067e-06, "loss": 0.0047, "step": 83630 }, { "epoch": 0.7062548816786641, "grad_norm": 0.35076987743377686, "learning_rate": 8.171937939863015e-06, "loss": 0.0126, "step": 83640 }, { "epoch": 0.706339321525828, "grad_norm": 0.37645354866981506, "learning_rate": 8.171368284591515e-06, "loss": 0.0152, "step": 83650 }, { "epoch": 0.7064237613729919, "grad_norm": 1.6228004693984985, "learning_rate": 8.17079856043855e-06, "loss": 0.0256, "step": 83660 }, { "epoch": 0.7065082012201558, "grad_norm": 0.22891376912593842, "learning_rate": 8.170228767416485e-06, "loss": 0.0079, "step": 83670 }, { "epoch": 0.7065926410673197, "grad_norm": 0.1848321557044983, "learning_rate": 8.169658905537704e-06, "loss": 0.0117, "step": 83680 }, { "epoch": 0.7066770809144836, "grad_norm": 0.1604161411523819, "learning_rate": 8.16908897481458e-06, "loss": 0.0119, "step": 83690 }, { "epoch": 0.7067615207616474, "grad_norm": 0.5007867217063904, "learning_rate": 8.168518975259492e-06, "loss": 0.0148, "step": 83700 }, { "epoch": 0.7068459606088113, "grad_norm": 0.48065945506095886, "learning_rate": 8.167948906884824e-06, "loss": 0.0138, "step": 83710 }, { "epoch": 0.7069304004559752, "grad_norm": 0.5027404427528381, "learning_rate": 8.167378769702953e-06, "loss": 0.013, "step": 83720 }, { "epoch": 0.7070148403031391, "grad_norm": 0.42285796999931335, "learning_rate": 8.166808563726264e-06, "loss": 0.0108, "step": 83730 }, { "epoch": 0.7070992801503029, "grad_norm": 0.36577698588371277, "learning_rate": 8.166238288967142e-06, "loss": 0.0157, "step": 83740 }, { "epoch": 0.7071837199974668, "grad_norm": 0.4538264870643616, "learning_rate": 8.165667945437974e-06, "loss": 0.0138, "step": 83750 }, { "epoch": 0.7072681598446307, "grad_norm": 0.39997971057891846, "learning_rate": 8.165097533151147e-06, "loss": 0.0112, "step": 83760 }, { "epoch": 0.7073525996917945, "grad_norm": 0.4057069718837738, "learning_rate": 8.164527052119049e-06, "loss": 0.0139, "step": 83770 }, { "epoch": 0.7074370395389584, "grad_norm": 0.32620424032211304, "learning_rate": 8.163956502354073e-06, "loss": 0.0274, "step": 83780 }, { "epoch": 0.7075214793861223, "grad_norm": 0.3471427857875824, "learning_rate": 8.16338588386861e-06, "loss": 0.0096, "step": 83790 }, { "epoch": 0.7076059192332862, "grad_norm": 0.324889600276947, "learning_rate": 8.162815196675054e-06, "loss": 0.0111, "step": 83800 }, { "epoch": 0.7076903590804501, "grad_norm": 0.8058412075042725, "learning_rate": 8.162244440785798e-06, "loss": 0.01, "step": 83810 }, { "epoch": 0.707774798927614, "grad_norm": 0.5051807761192322, "learning_rate": 8.161673616213243e-06, "loss": 0.0193, "step": 83820 }, { "epoch": 0.7078592387747779, "grad_norm": 0.6030798554420471, "learning_rate": 8.161102722969784e-06, "loss": 0.0147, "step": 83830 }, { "epoch": 0.7079436786219417, "grad_norm": 0.2644585967063904, "learning_rate": 8.160531761067823e-06, "loss": 0.0058, "step": 83840 }, { "epoch": 0.7080281184691055, "grad_norm": 0.029755454510450363, "learning_rate": 8.159960730519757e-06, "loss": 0.0119, "step": 83850 }, { "epoch": 0.7081125583162694, "grad_norm": 0.17834876477718353, "learning_rate": 8.159389631337994e-06, "loss": 0.0079, "step": 83860 }, { "epoch": 0.7081969981634333, "grad_norm": 0.27709805965423584, "learning_rate": 8.158818463534935e-06, "loss": 0.0073, "step": 83870 }, { "epoch": 0.7082814380105972, "grad_norm": 0.5562899112701416, "learning_rate": 8.158247227122987e-06, "loss": 0.0128, "step": 83880 }, { "epoch": 0.7083658778577611, "grad_norm": 0.7836554050445557, "learning_rate": 8.157675922114556e-06, "loss": 0.0116, "step": 83890 }, { "epoch": 0.708450317704925, "grad_norm": 0.48227059841156006, "learning_rate": 8.15710454852205e-06, "loss": 0.0125, "step": 83900 }, { "epoch": 0.7085347575520888, "grad_norm": 0.4751468598842621, "learning_rate": 8.156533106357881e-06, "loss": 0.0089, "step": 83910 }, { "epoch": 0.7086191973992527, "grad_norm": 0.6471118927001953, "learning_rate": 8.155961595634459e-06, "loss": 0.0096, "step": 83920 }, { "epoch": 0.7087036372464166, "grad_norm": 0.47085514664649963, "learning_rate": 8.155390016364198e-06, "loss": 0.0119, "step": 83930 }, { "epoch": 0.7087880770935805, "grad_norm": 0.1684342473745346, "learning_rate": 8.154818368559514e-06, "loss": 0.0143, "step": 83940 }, { "epoch": 0.7088725169407444, "grad_norm": 0.14479577541351318, "learning_rate": 8.15424665223282e-06, "loss": 0.0099, "step": 83950 }, { "epoch": 0.7089569567879083, "grad_norm": 0.32269152998924255, "learning_rate": 8.153674867396534e-06, "loss": 0.0139, "step": 83960 }, { "epoch": 0.709041396635072, "grad_norm": 0.19549433887004852, "learning_rate": 8.153103014063078e-06, "loss": 0.0116, "step": 83970 }, { "epoch": 0.7091258364822359, "grad_norm": 0.18895851075649261, "learning_rate": 8.15253109224487e-06, "loss": 0.0104, "step": 83980 }, { "epoch": 0.7092102763293998, "grad_norm": 0.25676706433296204, "learning_rate": 8.151959101954332e-06, "loss": 0.0167, "step": 83990 }, { "epoch": 0.7092947161765637, "grad_norm": 0.2863207459449768, "learning_rate": 8.15138704320389e-06, "loss": 0.0092, "step": 84000 }, { "epoch": 0.7093791560237276, "grad_norm": 0.22673660516738892, "learning_rate": 8.150814916005968e-06, "loss": 0.0097, "step": 84010 }, { "epoch": 0.7094635958708915, "grad_norm": 0.3821026682853699, "learning_rate": 8.150242720372989e-06, "loss": 0.0117, "step": 84020 }, { "epoch": 0.7095480357180554, "grad_norm": 0.4968858063220978, "learning_rate": 8.149670456317385e-06, "loss": 0.0124, "step": 84030 }, { "epoch": 0.7096324755652192, "grad_norm": 0.6787943840026855, "learning_rate": 8.149098123851583e-06, "loss": 0.0198, "step": 84040 }, { "epoch": 0.7097169154123831, "grad_norm": 0.5054089426994324, "learning_rate": 8.148525722988015e-06, "loss": 0.0167, "step": 84050 }, { "epoch": 0.709801355259547, "grad_norm": 0.07003816962242126, "learning_rate": 8.147953253739114e-06, "loss": 0.0081, "step": 84060 }, { "epoch": 0.7098857951067109, "grad_norm": 0.2402985543012619, "learning_rate": 8.147380716117315e-06, "loss": 0.0091, "step": 84070 }, { "epoch": 0.7099702349538747, "grad_norm": 0.3362463116645813, "learning_rate": 8.146808110135049e-06, "loss": 0.0065, "step": 84080 }, { "epoch": 0.7100546748010386, "grad_norm": 0.07609815895557404, "learning_rate": 8.146235435804757e-06, "loss": 0.011, "step": 84090 }, { "epoch": 0.7101391146482025, "grad_norm": 0.5469462871551514, "learning_rate": 8.145662693138877e-06, "loss": 0.01, "step": 84100 }, { "epoch": 0.7102235544953663, "grad_norm": 0.03347454220056534, "learning_rate": 8.145089882149848e-06, "loss": 0.0071, "step": 84110 }, { "epoch": 0.7103079943425302, "grad_norm": 0.3071613311767578, "learning_rate": 8.14451700285011e-06, "loss": 0.0103, "step": 84120 }, { "epoch": 0.7103924341896941, "grad_norm": 0.5423092246055603, "learning_rate": 8.143944055252107e-06, "loss": 0.0112, "step": 84130 }, { "epoch": 0.710476874036858, "grad_norm": 0.9524573683738708, "learning_rate": 8.143371039368283e-06, "loss": 0.0191, "step": 84140 }, { "epoch": 0.7105613138840219, "grad_norm": 0.5130526423454285, "learning_rate": 8.142797955211085e-06, "loss": 0.0117, "step": 84150 }, { "epoch": 0.7106457537311858, "grad_norm": 0.29890206456184387, "learning_rate": 8.142224802792962e-06, "loss": 0.0095, "step": 84160 }, { "epoch": 0.7107301935783497, "grad_norm": 0.4122990071773529, "learning_rate": 8.141651582126357e-06, "loss": 0.0106, "step": 84170 }, { "epoch": 0.7108146334255135, "grad_norm": 0.5605388879776001, "learning_rate": 8.141078293223723e-06, "loss": 0.0101, "step": 84180 }, { "epoch": 0.7108990732726774, "grad_norm": 0.3648883104324341, "learning_rate": 8.140504936097513e-06, "loss": 0.0133, "step": 84190 }, { "epoch": 0.7109835131198412, "grad_norm": 0.03511831536889076, "learning_rate": 8.139931510760179e-06, "loss": 0.0068, "step": 84200 }, { "epoch": 0.7110679529670051, "grad_norm": 0.15728744864463806, "learning_rate": 8.139358017224177e-06, "loss": 0.0086, "step": 84210 }, { "epoch": 0.711152392814169, "grad_norm": 0.27865952253341675, "learning_rate": 8.138784455501963e-06, "loss": 0.0174, "step": 84220 }, { "epoch": 0.7112368326613329, "grad_norm": 0.4254343509674072, "learning_rate": 8.138210825605992e-06, "loss": 0.0117, "step": 84230 }, { "epoch": 0.7113212725084967, "grad_norm": 0.5188626050949097, "learning_rate": 8.137637127548726e-06, "loss": 0.0072, "step": 84240 }, { "epoch": 0.7114057123556606, "grad_norm": 0.3714170455932617, "learning_rate": 8.137063361342625e-06, "loss": 0.0129, "step": 84250 }, { "epoch": 0.7114901522028245, "grad_norm": 0.0878918468952179, "learning_rate": 8.136489527000148e-06, "loss": 0.0115, "step": 84260 }, { "epoch": 0.7115745920499884, "grad_norm": 0.14501747488975525, "learning_rate": 8.135915624533764e-06, "loss": 0.0103, "step": 84270 }, { "epoch": 0.7116590318971523, "grad_norm": 0.26557230949401855, "learning_rate": 8.135341653955933e-06, "loss": 0.0142, "step": 84280 }, { "epoch": 0.7117434717443162, "grad_norm": 0.31850749254226685, "learning_rate": 8.134767615279126e-06, "loss": 0.0137, "step": 84290 }, { "epoch": 0.7118279115914801, "grad_norm": 0.6069291830062866, "learning_rate": 8.134193508515806e-06, "loss": 0.0204, "step": 84300 }, { "epoch": 0.7119123514386438, "grad_norm": 0.2839243710041046, "learning_rate": 8.133619333678448e-06, "loss": 0.0097, "step": 84310 }, { "epoch": 0.7119967912858077, "grad_norm": 0.5377713441848755, "learning_rate": 8.133045090779519e-06, "loss": 0.0077, "step": 84320 }, { "epoch": 0.7120812311329716, "grad_norm": 0.22501535713672638, "learning_rate": 8.132470779831493e-06, "loss": 0.0138, "step": 84330 }, { "epoch": 0.7121656709801355, "grad_norm": 0.33124449849128723, "learning_rate": 8.131896400846845e-06, "loss": 0.012, "step": 84340 }, { "epoch": 0.7122501108272994, "grad_norm": 0.41867727041244507, "learning_rate": 8.131321953838046e-06, "loss": 0.008, "step": 84350 }, { "epoch": 0.7123345506744633, "grad_norm": 0.2341863512992859, "learning_rate": 8.130747438817577e-06, "loss": 0.0094, "step": 84360 }, { "epoch": 0.7124189905216272, "grad_norm": 0.017280900850892067, "learning_rate": 8.130172855797916e-06, "loss": 0.008, "step": 84370 }, { "epoch": 0.712503430368791, "grad_norm": 0.09774865955114365, "learning_rate": 8.12959820479154e-06, "loss": 0.0105, "step": 84380 }, { "epoch": 0.7125878702159549, "grad_norm": 0.37699800729751587, "learning_rate": 8.129023485810933e-06, "loss": 0.0079, "step": 84390 }, { "epoch": 0.7126723100631188, "grad_norm": 0.2851663827896118, "learning_rate": 8.128448698868578e-06, "loss": 0.0093, "step": 84400 }, { "epoch": 0.7127567499102827, "grad_norm": 0.3309171497821808, "learning_rate": 8.127873843976959e-06, "loss": 0.0115, "step": 84410 }, { "epoch": 0.7128411897574466, "grad_norm": 0.8999245762825012, "learning_rate": 8.12729892114856e-06, "loss": 0.0146, "step": 84420 }, { "epoch": 0.7129256296046104, "grad_norm": 0.18162241578102112, "learning_rate": 8.12672393039587e-06, "loss": 0.0103, "step": 84430 }, { "epoch": 0.7130100694517743, "grad_norm": 0.2358904927968979, "learning_rate": 8.126148871731376e-06, "loss": 0.012, "step": 84440 }, { "epoch": 0.7130945092989381, "grad_norm": 0.2235434353351593, "learning_rate": 8.125573745167569e-06, "loss": 0.0106, "step": 84450 }, { "epoch": 0.713178949146102, "grad_norm": 0.5728037357330322, "learning_rate": 8.124998550716942e-06, "loss": 0.0086, "step": 84460 }, { "epoch": 0.7132633889932659, "grad_norm": 0.24967741966247559, "learning_rate": 8.124423288391987e-06, "loss": 0.0088, "step": 84470 }, { "epoch": 0.7133478288404298, "grad_norm": 0.4242013394832611, "learning_rate": 8.123847958205197e-06, "loss": 0.0168, "step": 84480 }, { "epoch": 0.7134322686875937, "grad_norm": 0.9637530446052551, "learning_rate": 8.12327256016907e-06, "loss": 0.0253, "step": 84490 }, { "epoch": 0.7135167085347576, "grad_norm": 0.5605854988098145, "learning_rate": 8.122697094296103e-06, "loss": 0.0169, "step": 84500 }, { "epoch": 0.7136011483819215, "grad_norm": 0.2493484616279602, "learning_rate": 8.122121560598794e-06, "loss": 0.0072, "step": 84510 }, { "epoch": 0.7136855882290853, "grad_norm": 0.6149905920028687, "learning_rate": 8.121545959089646e-06, "loss": 0.0119, "step": 84520 }, { "epoch": 0.7137700280762492, "grad_norm": 0.45761755108833313, "learning_rate": 8.120970289781159e-06, "loss": 0.0086, "step": 84530 }, { "epoch": 0.713854467923413, "grad_norm": 0.25862544775009155, "learning_rate": 8.120394552685838e-06, "loss": 0.0201, "step": 84540 }, { "epoch": 0.7139389077705769, "grad_norm": 1.991888403892517, "learning_rate": 8.119818747816184e-06, "loss": 0.029, "step": 84550 }, { "epoch": 0.7140233476177408, "grad_norm": 0.29709193110466003, "learning_rate": 8.119242875184707e-06, "loss": 0.0126, "step": 84560 }, { "epoch": 0.7141077874649047, "grad_norm": 0.29726454615592957, "learning_rate": 8.118666934803915e-06, "loss": 0.0089, "step": 84570 }, { "epoch": 0.7141922273120686, "grad_norm": 0.30703768134117126, "learning_rate": 8.118090926686317e-06, "loss": 0.0105, "step": 84580 }, { "epoch": 0.7142766671592324, "grad_norm": 0.24956491589546204, "learning_rate": 8.11751485084442e-06, "loss": 0.0122, "step": 84590 }, { "epoch": 0.7143611070063963, "grad_norm": 0.8212714791297913, "learning_rate": 8.116938707290742e-06, "loss": 0.0143, "step": 84600 }, { "epoch": 0.7144455468535602, "grad_norm": 0.29904210567474365, "learning_rate": 8.116362496037795e-06, "loss": 0.0209, "step": 84610 }, { "epoch": 0.7145299867007241, "grad_norm": 0.3979856073856354, "learning_rate": 8.115786217098091e-06, "loss": 0.0074, "step": 84620 }, { "epoch": 0.714614426547888, "grad_norm": 0.46342357993125916, "learning_rate": 8.115209870484148e-06, "loss": 0.0076, "step": 84630 }, { "epoch": 0.7146988663950519, "grad_norm": 0.43152427673339844, "learning_rate": 8.114633456208487e-06, "loss": 0.011, "step": 84640 }, { "epoch": 0.7147833062422158, "grad_norm": 0.6072138547897339, "learning_rate": 8.114056974283624e-06, "loss": 0.0144, "step": 84650 }, { "epoch": 0.7148677460893795, "grad_norm": 0.4142525792121887, "learning_rate": 8.113480424722083e-06, "loss": 0.0131, "step": 84660 }, { "epoch": 0.7149521859365434, "grad_norm": 0.2733376920223236, "learning_rate": 8.112903807536385e-06, "loss": 0.0088, "step": 84670 }, { "epoch": 0.7150366257837073, "grad_norm": 0.2141859531402588, "learning_rate": 8.112327122739052e-06, "loss": 0.0221, "step": 84680 }, { "epoch": 0.7151210656308712, "grad_norm": 0.2813279330730438, "learning_rate": 8.111750370342614e-06, "loss": 0.0182, "step": 84690 }, { "epoch": 0.7152055054780351, "grad_norm": 0.314947247505188, "learning_rate": 8.111173550359597e-06, "loss": 0.0201, "step": 84700 }, { "epoch": 0.715289945325199, "grad_norm": 0.5303381681442261, "learning_rate": 8.110596662802526e-06, "loss": 0.0156, "step": 84710 }, { "epoch": 0.7153743851723628, "grad_norm": 0.5542613863945007, "learning_rate": 8.110019707683932e-06, "loss": 0.0114, "step": 84720 }, { "epoch": 0.7154588250195267, "grad_norm": 0.13545475900173187, "learning_rate": 8.109442685016348e-06, "loss": 0.0202, "step": 84730 }, { "epoch": 0.7155432648666906, "grad_norm": 0.5615025758743286, "learning_rate": 8.108865594812308e-06, "loss": 0.0156, "step": 84740 }, { "epoch": 0.7156277047138545, "grad_norm": 0.16111476719379425, "learning_rate": 8.108288437084341e-06, "loss": 0.0095, "step": 84750 }, { "epoch": 0.7157121445610184, "grad_norm": 0.23261696100234985, "learning_rate": 8.107711211844989e-06, "loss": 0.0168, "step": 84760 }, { "epoch": 0.7157965844081822, "grad_norm": 0.3021450936794281, "learning_rate": 8.107133919106785e-06, "loss": 0.0141, "step": 84770 }, { "epoch": 0.715881024255346, "grad_norm": 0.4608181416988373, "learning_rate": 8.106556558882268e-06, "loss": 0.0127, "step": 84780 }, { "epoch": 0.7159654641025099, "grad_norm": 0.038455620408058167, "learning_rate": 8.105979131183981e-06, "loss": 0.0067, "step": 84790 }, { "epoch": 0.7160499039496738, "grad_norm": 0.4456230103969574, "learning_rate": 8.105401636024463e-06, "loss": 0.0102, "step": 84800 }, { "epoch": 0.7161343437968377, "grad_norm": 0.13013483583927155, "learning_rate": 8.104824073416257e-06, "loss": 0.0057, "step": 84810 }, { "epoch": 0.7162187836440016, "grad_norm": 0.31766876578330994, "learning_rate": 8.10424644337191e-06, "loss": 0.0136, "step": 84820 }, { "epoch": 0.7163032234911655, "grad_norm": 0.3334675133228302, "learning_rate": 8.103668745903965e-06, "loss": 0.0125, "step": 84830 }, { "epoch": 0.7163876633383294, "grad_norm": 0.2222057282924652, "learning_rate": 8.103090981024972e-06, "loss": 0.0172, "step": 84840 }, { "epoch": 0.7164721031854933, "grad_norm": 0.35525721311569214, "learning_rate": 8.102513148747478e-06, "loss": 0.0094, "step": 84850 }, { "epoch": 0.7165565430326571, "grad_norm": 0.10747754573822021, "learning_rate": 8.101935249084035e-06, "loss": 0.0089, "step": 84860 }, { "epoch": 0.716640982879821, "grad_norm": 0.20642265677452087, "learning_rate": 8.101357282047191e-06, "loss": 0.01, "step": 84870 }, { "epoch": 0.7167254227269849, "grad_norm": 0.35314375162124634, "learning_rate": 8.100779247649506e-06, "loss": 0.0146, "step": 84880 }, { "epoch": 0.7168098625741487, "grad_norm": 0.29278820753097534, "learning_rate": 8.10020114590353e-06, "loss": 0.0076, "step": 84890 }, { "epoch": 0.7168943024213126, "grad_norm": 0.8244536519050598, "learning_rate": 8.09962297682182e-06, "loss": 0.0236, "step": 84900 }, { "epoch": 0.7169787422684765, "grad_norm": 0.4054781496524811, "learning_rate": 8.099044740416934e-06, "loss": 0.0158, "step": 84910 }, { "epoch": 0.7170631821156404, "grad_norm": 1.047957420349121, "learning_rate": 8.098466436701432e-06, "loss": 0.007, "step": 84920 }, { "epoch": 0.7171476219628042, "grad_norm": 0.21313059329986572, "learning_rate": 8.097888065687874e-06, "loss": 0.0083, "step": 84930 }, { "epoch": 0.7172320618099681, "grad_norm": 0.2640988528728485, "learning_rate": 8.097309627388823e-06, "loss": 0.0065, "step": 84940 }, { "epoch": 0.717316501657132, "grad_norm": 0.5850040912628174, "learning_rate": 8.09673112181684e-06, "loss": 0.0111, "step": 84950 }, { "epoch": 0.7174009415042959, "grad_norm": 0.8225745558738708, "learning_rate": 8.09615254898449e-06, "loss": 0.0234, "step": 84960 }, { "epoch": 0.7174853813514598, "grad_norm": 0.071700818836689, "learning_rate": 8.095573908904344e-06, "loss": 0.0214, "step": 84970 }, { "epoch": 0.7175698211986237, "grad_norm": 0.298577219247818, "learning_rate": 8.094995201588967e-06, "loss": 0.0127, "step": 84980 }, { "epoch": 0.7176542610457876, "grad_norm": 0.1920495182275772, "learning_rate": 8.094416427050926e-06, "loss": 0.0095, "step": 84990 }, { "epoch": 0.7177387008929513, "grad_norm": 0.3384416997432709, "learning_rate": 8.093837585302795e-06, "loss": 0.0104, "step": 85000 }, { "epoch": 0.7178231407401152, "grad_norm": 0.44173458218574524, "learning_rate": 8.093258676357148e-06, "loss": 0.0172, "step": 85010 }, { "epoch": 0.7179075805872791, "grad_norm": 0.15733839571475983, "learning_rate": 8.092679700226553e-06, "loss": 0.0127, "step": 85020 }, { "epoch": 0.717992020434443, "grad_norm": 0.07593591511249542, "learning_rate": 8.092100656923592e-06, "loss": 0.0136, "step": 85030 }, { "epoch": 0.7180764602816069, "grad_norm": 1.0184425115585327, "learning_rate": 8.091521546460837e-06, "loss": 0.0124, "step": 85040 }, { "epoch": 0.7181609001287708, "grad_norm": 1.0254560708999634, "learning_rate": 8.090942368850865e-06, "loss": 0.0158, "step": 85050 }, { "epoch": 0.7182453399759346, "grad_norm": 0.3367123603820801, "learning_rate": 8.090363124106261e-06, "loss": 0.0087, "step": 85060 }, { "epoch": 0.7183297798230985, "grad_norm": 0.6699317693710327, "learning_rate": 8.089783812239604e-06, "loss": 0.0107, "step": 85070 }, { "epoch": 0.7184142196702624, "grad_norm": 0.5408841371536255, "learning_rate": 8.089204433263475e-06, "loss": 0.0181, "step": 85080 }, { "epoch": 0.7184986595174263, "grad_norm": 0.33722952008247375, "learning_rate": 8.088624987190457e-06, "loss": 0.013, "step": 85090 }, { "epoch": 0.7185830993645902, "grad_norm": 0.5393622517585754, "learning_rate": 8.08804547403314e-06, "loss": 0.0099, "step": 85100 }, { "epoch": 0.7186675392117541, "grad_norm": 0.24194864928722382, "learning_rate": 8.087465893804105e-06, "loss": 0.0121, "step": 85110 }, { "epoch": 0.7187519790589179, "grad_norm": 0.15442828834056854, "learning_rate": 8.086886246515946e-06, "loss": 0.0097, "step": 85120 }, { "epoch": 0.7188364189060817, "grad_norm": 0.061928313225507736, "learning_rate": 8.086306532181248e-06, "loss": 0.0073, "step": 85130 }, { "epoch": 0.7189208587532456, "grad_norm": 0.004319028463214636, "learning_rate": 8.085726750812605e-06, "loss": 0.0146, "step": 85140 }, { "epoch": 0.7190052986004095, "grad_norm": 0.523847222328186, "learning_rate": 8.085146902422611e-06, "loss": 0.0136, "step": 85150 }, { "epoch": 0.7190897384475734, "grad_norm": 0.29421138763427734, "learning_rate": 8.084566987023859e-06, "loss": 0.0071, "step": 85160 }, { "epoch": 0.7191741782947373, "grad_norm": 0.33267733454704285, "learning_rate": 8.083987004628942e-06, "loss": 0.0165, "step": 85170 }, { "epoch": 0.7192586181419012, "grad_norm": 1.1568958759307861, "learning_rate": 8.08340695525046e-06, "loss": 0.015, "step": 85180 }, { "epoch": 0.7193430579890651, "grad_norm": 0.19423629343509674, "learning_rate": 8.082826838901012e-06, "loss": 0.0084, "step": 85190 }, { "epoch": 0.719427497836229, "grad_norm": 0.3013705313205719, "learning_rate": 8.082246655593194e-06, "loss": 0.0183, "step": 85200 }, { "epoch": 0.7195119376833928, "grad_norm": 0.29131561517715454, "learning_rate": 8.081666405339612e-06, "loss": 0.0129, "step": 85210 }, { "epoch": 0.7195963775305567, "grad_norm": 0.7938793301582336, "learning_rate": 8.081086088152868e-06, "loss": 0.0125, "step": 85220 }, { "epoch": 0.7196808173777205, "grad_norm": 0.5482991933822632, "learning_rate": 8.080505704045563e-06, "loss": 0.0142, "step": 85230 }, { "epoch": 0.7197652572248844, "grad_norm": 0.25563672184944153, "learning_rate": 8.079925253030307e-06, "loss": 0.02, "step": 85240 }, { "epoch": 0.7198496970720483, "grad_norm": 0.1270466446876526, "learning_rate": 8.079344735119704e-06, "loss": 0.011, "step": 85250 }, { "epoch": 0.7199341369192122, "grad_norm": 0.10451961308717728, "learning_rate": 8.078764150326369e-06, "loss": 0.0137, "step": 85260 }, { "epoch": 0.720018576766376, "grad_norm": 0.33227017521858215, "learning_rate": 8.078183498662904e-06, "loss": 0.0138, "step": 85270 }, { "epoch": 0.7201030166135399, "grad_norm": 0.1961441934108734, "learning_rate": 8.077602780141924e-06, "loss": 0.0077, "step": 85280 }, { "epoch": 0.7201874564607038, "grad_norm": 0.41062313318252563, "learning_rate": 8.077021994776044e-06, "loss": 0.0115, "step": 85290 }, { "epoch": 0.7202718963078677, "grad_norm": 0.47229263186454773, "learning_rate": 8.076441142577876e-06, "loss": 0.0192, "step": 85300 }, { "epoch": 0.7203563361550316, "grad_norm": 0.3207506239414215, "learning_rate": 8.075860223560034e-06, "loss": 0.0055, "step": 85310 }, { "epoch": 0.7204407760021955, "grad_norm": 0.47417151927948, "learning_rate": 8.075279237735141e-06, "loss": 0.0073, "step": 85320 }, { "epoch": 0.7205252158493594, "grad_norm": 0.34896981716156006, "learning_rate": 8.074698185115815e-06, "loss": 0.0118, "step": 85330 }, { "epoch": 0.7206096556965232, "grad_norm": 0.7357413172721863, "learning_rate": 8.07411706571467e-06, "loss": 0.0094, "step": 85340 }, { "epoch": 0.720694095543687, "grad_norm": 0.5830125212669373, "learning_rate": 8.073535879544336e-06, "loss": 0.0142, "step": 85350 }, { "epoch": 0.7207785353908509, "grad_norm": 0.5236531496047974, "learning_rate": 8.07295462661743e-06, "loss": 0.01, "step": 85360 }, { "epoch": 0.7208629752380148, "grad_norm": 0.41168296337127686, "learning_rate": 8.072373306946582e-06, "loss": 0.0117, "step": 85370 }, { "epoch": 0.7209474150851787, "grad_norm": 0.44381633400917053, "learning_rate": 8.071791920544414e-06, "loss": 0.0108, "step": 85380 }, { "epoch": 0.7210318549323426, "grad_norm": 0.25728580355644226, "learning_rate": 8.071210467423554e-06, "loss": 0.0089, "step": 85390 }, { "epoch": 0.7211162947795065, "grad_norm": 0.2742183804512024, "learning_rate": 8.070628947596633e-06, "loss": 0.0192, "step": 85400 }, { "epoch": 0.7212007346266703, "grad_norm": 0.3509615659713745, "learning_rate": 8.07004736107628e-06, "loss": 0.0095, "step": 85410 }, { "epoch": 0.7212851744738342, "grad_norm": 0.22207757830619812, "learning_rate": 8.069465707875128e-06, "loss": 0.008, "step": 85420 }, { "epoch": 0.7213696143209981, "grad_norm": 0.1688796728849411, "learning_rate": 8.06888398800581e-06, "loss": 0.0147, "step": 85430 }, { "epoch": 0.721454054168162, "grad_norm": 0.27780041098594666, "learning_rate": 8.06830220148096e-06, "loss": 0.0125, "step": 85440 }, { "epoch": 0.7215384940153259, "grad_norm": 0.40577054023742676, "learning_rate": 8.067720348313215e-06, "loss": 0.0208, "step": 85450 }, { "epoch": 0.7216229338624897, "grad_norm": 0.9617760181427002, "learning_rate": 8.067138428515211e-06, "loss": 0.0171, "step": 85460 }, { "epoch": 0.7217073737096535, "grad_norm": 0.5232282876968384, "learning_rate": 8.066556442099592e-06, "loss": 0.0068, "step": 85470 }, { "epoch": 0.7217918135568174, "grad_norm": 0.13281525671482086, "learning_rate": 8.065974389078995e-06, "loss": 0.0078, "step": 85480 }, { "epoch": 0.7218762534039813, "grad_norm": 0.8056148886680603, "learning_rate": 8.06539226946606e-06, "loss": 0.0108, "step": 85490 }, { "epoch": 0.7219606932511452, "grad_norm": 0.2530115842819214, "learning_rate": 8.064810083273435e-06, "loss": 0.0093, "step": 85500 }, { "epoch": 0.7220451330983091, "grad_norm": 0.21843232214450836, "learning_rate": 8.064227830513762e-06, "loss": 0.0082, "step": 85510 }, { "epoch": 0.722129572945473, "grad_norm": 0.19473440945148468, "learning_rate": 8.06364551119969e-06, "loss": 0.0067, "step": 85520 }, { "epoch": 0.7222140127926369, "grad_norm": 0.754980206489563, "learning_rate": 8.063063125343866e-06, "loss": 0.0154, "step": 85530 }, { "epoch": 0.7222984526398007, "grad_norm": 0.08886639773845673, "learning_rate": 8.062480672958935e-06, "loss": 0.0062, "step": 85540 }, { "epoch": 0.7223828924869646, "grad_norm": 0.37018343806266785, "learning_rate": 8.061898154057554e-06, "loss": 0.0072, "step": 85550 }, { "epoch": 0.7224673323341285, "grad_norm": 0.41866928339004517, "learning_rate": 8.061315568652372e-06, "loss": 0.0096, "step": 85560 }, { "epoch": 0.7225517721812924, "grad_norm": 0.362199068069458, "learning_rate": 8.060732916756044e-06, "loss": 0.0164, "step": 85570 }, { "epoch": 0.7226362120284562, "grad_norm": 0.2464665323495865, "learning_rate": 8.060150198381223e-06, "loss": 0.0096, "step": 85580 }, { "epoch": 0.7227206518756201, "grad_norm": 0.22493013739585876, "learning_rate": 8.059567413540566e-06, "loss": 0.0061, "step": 85590 }, { "epoch": 0.722805091722784, "grad_norm": 0.34853604435920715, "learning_rate": 8.058984562246733e-06, "loss": 0.0122, "step": 85600 }, { "epoch": 0.7228895315699478, "grad_norm": 0.2745780944824219, "learning_rate": 8.058401644512382e-06, "loss": 0.0117, "step": 85610 }, { "epoch": 0.7229739714171117, "grad_norm": 0.44302526116371155, "learning_rate": 8.057818660350174e-06, "loss": 0.0173, "step": 85620 }, { "epoch": 0.7230584112642756, "grad_norm": 0.1298123300075531, "learning_rate": 8.057235609772771e-06, "loss": 0.0051, "step": 85630 }, { "epoch": 0.7231428511114395, "grad_norm": 0.31133562326431274, "learning_rate": 8.056652492792838e-06, "loss": 0.0108, "step": 85640 }, { "epoch": 0.7232272909586034, "grad_norm": 0.22008110582828522, "learning_rate": 8.056069309423039e-06, "loss": 0.012, "step": 85650 }, { "epoch": 0.7233117308057673, "grad_norm": 0.3759043514728546, "learning_rate": 8.05548605967604e-06, "loss": 0.0196, "step": 85660 }, { "epoch": 0.7233961706529312, "grad_norm": 0.34301143884658813, "learning_rate": 8.05490274356451e-06, "loss": 0.0103, "step": 85670 }, { "epoch": 0.723480610500095, "grad_norm": 0.15693090856075287, "learning_rate": 8.05431936110112e-06, "loss": 0.0078, "step": 85680 }, { "epoch": 0.7235650503472588, "grad_norm": 0.34568995237350464, "learning_rate": 8.053735912298536e-06, "loss": 0.0112, "step": 85690 }, { "epoch": 0.7236494901944227, "grad_norm": 0.1896832436323166, "learning_rate": 8.053152397169437e-06, "loss": 0.0132, "step": 85700 }, { "epoch": 0.7237339300415866, "grad_norm": 0.24686206877231598, "learning_rate": 8.052568815726493e-06, "loss": 0.0195, "step": 85710 }, { "epoch": 0.7238183698887505, "grad_norm": 0.6799970269203186, "learning_rate": 8.051985167982379e-06, "loss": 0.012, "step": 85720 }, { "epoch": 0.7239028097359144, "grad_norm": 0.2640422284603119, "learning_rate": 8.051401453949773e-06, "loss": 0.009, "step": 85730 }, { "epoch": 0.7239872495830783, "grad_norm": 0.3491508662700653, "learning_rate": 8.050817673641353e-06, "loss": 0.0135, "step": 85740 }, { "epoch": 0.7240716894302421, "grad_norm": 0.20984958112239838, "learning_rate": 8.050233827069798e-06, "loss": 0.0127, "step": 85750 }, { "epoch": 0.724156129277406, "grad_norm": 0.28878840804100037, "learning_rate": 8.049649914247787e-06, "loss": 0.0096, "step": 85760 }, { "epoch": 0.7242405691245699, "grad_norm": 0.3454396724700928, "learning_rate": 8.04906593518801e-06, "loss": 0.0124, "step": 85770 }, { "epoch": 0.7243250089717338, "grad_norm": 0.39108607172966003, "learning_rate": 8.048481889903142e-06, "loss": 0.0239, "step": 85780 }, { "epoch": 0.7244094488188977, "grad_norm": 0.41004934906959534, "learning_rate": 8.047897778405873e-06, "loss": 0.0167, "step": 85790 }, { "epoch": 0.7244938886660616, "grad_norm": 0.17878763377666473, "learning_rate": 8.047313600708889e-06, "loss": 0.0084, "step": 85800 }, { "epoch": 0.7245783285132253, "grad_norm": 0.8966588377952576, "learning_rate": 8.046729356824877e-06, "loss": 0.0212, "step": 85810 }, { "epoch": 0.7246627683603892, "grad_norm": 1.014836311340332, "learning_rate": 8.046145046766528e-06, "loss": 0.0124, "step": 85820 }, { "epoch": 0.7247472082075531, "grad_norm": 0.40486589074134827, "learning_rate": 8.045560670546533e-06, "loss": 0.0101, "step": 85830 }, { "epoch": 0.724831648054717, "grad_norm": 0.8413609862327576, "learning_rate": 8.044976228177585e-06, "loss": 0.0196, "step": 85840 }, { "epoch": 0.7249160879018809, "grad_norm": 0.47427797317504883, "learning_rate": 8.044391719672376e-06, "loss": 0.0103, "step": 85850 }, { "epoch": 0.7250005277490448, "grad_norm": 0.40349137783050537, "learning_rate": 8.043807145043604e-06, "loss": 0.0065, "step": 85860 }, { "epoch": 0.7250849675962087, "grad_norm": 0.28823673725128174, "learning_rate": 8.043222504303965e-06, "loss": 0.0091, "step": 85870 }, { "epoch": 0.7251694074433725, "grad_norm": 0.10774367302656174, "learning_rate": 8.042637797466156e-06, "loss": 0.0081, "step": 85880 }, { "epoch": 0.7252538472905364, "grad_norm": 0.31429797410964966, "learning_rate": 8.042053024542876e-06, "loss": 0.0097, "step": 85890 }, { "epoch": 0.7253382871377003, "grad_norm": 0.8428884744644165, "learning_rate": 8.04146818554683e-06, "loss": 0.012, "step": 85900 }, { "epoch": 0.7254227269848642, "grad_norm": 0.6725168228149414, "learning_rate": 8.040883280490717e-06, "loss": 0.0145, "step": 85910 }, { "epoch": 0.725507166832028, "grad_norm": 0.5400595664978027, "learning_rate": 8.040298309387242e-06, "loss": 0.0144, "step": 85920 }, { "epoch": 0.7255916066791919, "grad_norm": 0.39759233593940735, "learning_rate": 8.039713272249113e-06, "loss": 0.0118, "step": 85930 }, { "epoch": 0.7256760465263558, "grad_norm": 0.12411327660083771, "learning_rate": 8.039128169089032e-06, "loss": 0.0128, "step": 85940 }, { "epoch": 0.7257604863735196, "grad_norm": 0.5521024465560913, "learning_rate": 8.03854299991971e-06, "loss": 0.0201, "step": 85950 }, { "epoch": 0.7258449262206835, "grad_norm": 0.5866869688034058, "learning_rate": 8.037957764753858e-06, "loss": 0.0215, "step": 85960 }, { "epoch": 0.7259293660678474, "grad_norm": 0.27389636635780334, "learning_rate": 8.037372463604185e-06, "loss": 0.0125, "step": 85970 }, { "epoch": 0.7260138059150113, "grad_norm": 0.23521006107330322, "learning_rate": 8.036787096483406e-06, "loss": 0.0075, "step": 85980 }, { "epoch": 0.7260982457621752, "grad_norm": 0.30681154131889343, "learning_rate": 8.036201663404233e-06, "loss": 0.0206, "step": 85990 }, { "epoch": 0.7261826856093391, "grad_norm": 0.07811274379491806, "learning_rate": 8.035616164379381e-06, "loss": 0.006, "step": 86000 }, { "epoch": 0.726267125456503, "grad_norm": 0.10073135793209076, "learning_rate": 8.03503059942157e-06, "loss": 0.006, "step": 86010 }, { "epoch": 0.7263515653036668, "grad_norm": 0.2114654779434204, "learning_rate": 8.034444968543514e-06, "loss": 0.0121, "step": 86020 }, { "epoch": 0.7264360051508306, "grad_norm": 0.2194860279560089, "learning_rate": 8.033859271757935e-06, "loss": 0.0067, "step": 86030 }, { "epoch": 0.7265204449979945, "grad_norm": 0.26286423206329346, "learning_rate": 8.033273509077556e-06, "loss": 0.0141, "step": 86040 }, { "epoch": 0.7266048848451584, "grad_norm": 0.41872280836105347, "learning_rate": 8.032687680515097e-06, "loss": 0.0124, "step": 86050 }, { "epoch": 0.7266893246923223, "grad_norm": 0.16034497320652008, "learning_rate": 8.032101786083284e-06, "loss": 0.0082, "step": 86060 }, { "epoch": 0.7267737645394862, "grad_norm": 0.13794715702533722, "learning_rate": 8.03151582579484e-06, "loss": 0.0138, "step": 86070 }, { "epoch": 0.72685820438665, "grad_norm": 0.15802542865276337, "learning_rate": 8.030929799662495e-06, "loss": 0.0076, "step": 86080 }, { "epoch": 0.7269426442338139, "grad_norm": 0.3665356934070587, "learning_rate": 8.030343707698974e-06, "loss": 0.011, "step": 86090 }, { "epoch": 0.7270270840809778, "grad_norm": 0.36986279487609863, "learning_rate": 8.02975754991701e-06, "loss": 0.0106, "step": 86100 }, { "epoch": 0.7271115239281417, "grad_norm": 0.22106075286865234, "learning_rate": 8.029171326329333e-06, "loss": 0.0066, "step": 86110 }, { "epoch": 0.7271959637753056, "grad_norm": 0.40880176424980164, "learning_rate": 8.028585036948677e-06, "loss": 0.0093, "step": 86120 }, { "epoch": 0.7272804036224695, "grad_norm": 0.45817482471466064, "learning_rate": 8.027998681787773e-06, "loss": 0.0149, "step": 86130 }, { "epoch": 0.7273648434696334, "grad_norm": 0.881200909614563, "learning_rate": 8.027412260859358e-06, "loss": 0.0125, "step": 86140 }, { "epoch": 0.7274492833167971, "grad_norm": 0.4477532207965851, "learning_rate": 8.026825774176168e-06, "loss": 0.0161, "step": 86150 }, { "epoch": 0.727533723163961, "grad_norm": 0.2666178345680237, "learning_rate": 8.026239221750943e-06, "loss": 0.0112, "step": 86160 }, { "epoch": 0.7276181630111249, "grad_norm": 0.4555070400238037, "learning_rate": 8.025652603596424e-06, "loss": 0.0157, "step": 86170 }, { "epoch": 0.7277026028582888, "grad_norm": 0.5475372672080994, "learning_rate": 8.025065919725348e-06, "loss": 0.0152, "step": 86180 }, { "epoch": 0.7277870427054527, "grad_norm": 0.4671514928340912, "learning_rate": 8.02447917015046e-06, "loss": 0.0083, "step": 86190 }, { "epoch": 0.7278714825526166, "grad_norm": 0.3681669235229492, "learning_rate": 8.023892354884505e-06, "loss": 0.0085, "step": 86200 }, { "epoch": 0.7279559223997805, "grad_norm": 0.8480948209762573, "learning_rate": 8.02330547394023e-06, "loss": 0.0093, "step": 86210 }, { "epoch": 0.7280403622469444, "grad_norm": 0.5730652213096619, "learning_rate": 8.022718527330378e-06, "loss": 0.0127, "step": 86220 }, { "epoch": 0.7281248020941082, "grad_norm": 0.13070325553417206, "learning_rate": 8.022131515067698e-06, "loss": 0.0074, "step": 86230 }, { "epoch": 0.7282092419412721, "grad_norm": 0.1251034140586853, "learning_rate": 8.021544437164944e-06, "loss": 0.0092, "step": 86240 }, { "epoch": 0.728293681788436, "grad_norm": 0.5395075678825378, "learning_rate": 8.02095729363486e-06, "loss": 0.0208, "step": 86250 }, { "epoch": 0.7283781216355998, "grad_norm": 2.343357563018799, "learning_rate": 8.020370084490205e-06, "loss": 0.0149, "step": 86260 }, { "epoch": 0.7284625614827637, "grad_norm": 0.3141193687915802, "learning_rate": 8.019782809743731e-06, "loss": 0.0093, "step": 86270 }, { "epoch": 0.7285470013299276, "grad_norm": 0.4492335319519043, "learning_rate": 8.019195469408191e-06, "loss": 0.0086, "step": 86280 }, { "epoch": 0.7286314411770914, "grad_norm": 0.1651882380247116, "learning_rate": 8.018608063496344e-06, "loss": 0.0064, "step": 86290 }, { "epoch": 0.7287158810242553, "grad_norm": 0.11647457629442215, "learning_rate": 8.01802059202095e-06, "loss": 0.0063, "step": 86300 }, { "epoch": 0.7288003208714192, "grad_norm": 0.4057936668395996, "learning_rate": 8.017433054994767e-06, "loss": 0.0063, "step": 86310 }, { "epoch": 0.7288847607185831, "grad_norm": 0.05450845882296562, "learning_rate": 8.016845452430557e-06, "loss": 0.0138, "step": 86320 }, { "epoch": 0.728969200565747, "grad_norm": 0.43753594160079956, "learning_rate": 8.01625778434108e-06, "loss": 0.0086, "step": 86330 }, { "epoch": 0.7290536404129109, "grad_norm": 0.13659709692001343, "learning_rate": 8.015670050739105e-06, "loss": 0.0143, "step": 86340 }, { "epoch": 0.7291380802600748, "grad_norm": 0.5033588409423828, "learning_rate": 8.015082251637391e-06, "loss": 0.0167, "step": 86350 }, { "epoch": 0.7292225201072386, "grad_norm": 0.30610159039497375, "learning_rate": 8.014494387048711e-06, "loss": 0.0174, "step": 86360 }, { "epoch": 0.7293069599544025, "grad_norm": 0.14252686500549316, "learning_rate": 8.01390645698583e-06, "loss": 0.0166, "step": 86370 }, { "epoch": 0.7293913998015663, "grad_norm": 0.11205604672431946, "learning_rate": 8.013318461461518e-06, "loss": 0.0216, "step": 86380 }, { "epoch": 0.7294758396487302, "grad_norm": 0.38881754875183105, "learning_rate": 8.012730400488547e-06, "loss": 0.0086, "step": 86390 }, { "epoch": 0.7295602794958941, "grad_norm": 0.1604357659816742, "learning_rate": 8.012142274079688e-06, "loss": 0.006, "step": 86400 }, { "epoch": 0.729644719343058, "grad_norm": 0.42966166138648987, "learning_rate": 8.011554082247716e-06, "loss": 0.013, "step": 86410 }, { "epoch": 0.7297291591902219, "grad_norm": 0.6144130229949951, "learning_rate": 8.010965825005408e-06, "loss": 0.0128, "step": 86420 }, { "epoch": 0.7298135990373857, "grad_norm": 0.18380992114543915, "learning_rate": 8.010377502365538e-06, "loss": 0.0105, "step": 86430 }, { "epoch": 0.7298980388845496, "grad_norm": 0.4115404188632965, "learning_rate": 8.009789114340887e-06, "loss": 0.0164, "step": 86440 }, { "epoch": 0.7299824787317135, "grad_norm": 0.0942569151520729, "learning_rate": 8.009200660944231e-06, "loss": 0.0113, "step": 86450 }, { "epoch": 0.7300669185788774, "grad_norm": 0.33643779158592224, "learning_rate": 8.008612142188354e-06, "loss": 0.0178, "step": 86460 }, { "epoch": 0.7301513584260413, "grad_norm": 0.18069158494472504, "learning_rate": 8.00802355808604e-06, "loss": 0.0118, "step": 86470 }, { "epoch": 0.7302357982732052, "grad_norm": 0.39502057433128357, "learning_rate": 8.00743490865007e-06, "loss": 0.0119, "step": 86480 }, { "epoch": 0.730320238120369, "grad_norm": 0.1223309189081192, "learning_rate": 8.006846193893228e-06, "loss": 0.0114, "step": 86490 }, { "epoch": 0.7304046779675328, "grad_norm": 0.5155000686645508, "learning_rate": 8.006257413828303e-06, "loss": 0.0117, "step": 86500 }, { "epoch": 0.7304891178146967, "grad_norm": 0.30243825912475586, "learning_rate": 8.005668568468083e-06, "loss": 0.0126, "step": 86510 }, { "epoch": 0.7305735576618606, "grad_norm": 0.15759891271591187, "learning_rate": 8.005079657825358e-06, "loss": 0.0069, "step": 86520 }, { "epoch": 0.7306579975090245, "grad_norm": 0.5937212109565735, "learning_rate": 8.00449068191292e-06, "loss": 0.0117, "step": 86530 }, { "epoch": 0.7307424373561884, "grad_norm": 0.08832506090402603, "learning_rate": 8.003901640743559e-06, "loss": 0.0132, "step": 86540 }, { "epoch": 0.7308268772033523, "grad_norm": 0.6225007176399231, "learning_rate": 8.003312534330072e-06, "loss": 0.0172, "step": 86550 }, { "epoch": 0.7309113170505162, "grad_norm": 0.422209769487381, "learning_rate": 8.002723362685249e-06, "loss": 0.0114, "step": 86560 }, { "epoch": 0.73099575689768, "grad_norm": 0.8875448703765869, "learning_rate": 8.002134125821891e-06, "loss": 0.01, "step": 86570 }, { "epoch": 0.7310801967448439, "grad_norm": 0.20466001331806183, "learning_rate": 8.001544823752795e-06, "loss": 0.0106, "step": 86580 }, { "epoch": 0.7311646365920078, "grad_norm": 0.5818507671356201, "learning_rate": 8.000955456490762e-06, "loss": 0.0102, "step": 86590 }, { "epoch": 0.7312490764391717, "grad_norm": 0.06305008381605148, "learning_rate": 8.00036602404859e-06, "loss": 0.0128, "step": 86600 }, { "epoch": 0.7313335162863355, "grad_norm": 0.6419315934181213, "learning_rate": 7.999776526439083e-06, "loss": 0.0149, "step": 86610 }, { "epoch": 0.7314179561334994, "grad_norm": 0.4392645061016083, "learning_rate": 7.999186963675044e-06, "loss": 0.0142, "step": 86620 }, { "epoch": 0.7315023959806632, "grad_norm": 0.071088045835495, "learning_rate": 7.99859733576928e-06, "loss": 0.0116, "step": 86630 }, { "epoch": 0.7315868358278271, "grad_norm": 0.41851702332496643, "learning_rate": 7.998007642734593e-06, "loss": 0.0078, "step": 86640 }, { "epoch": 0.731671275674991, "grad_norm": 0.13291271030902863, "learning_rate": 7.997417884583799e-06, "loss": 0.009, "step": 86650 }, { "epoch": 0.7317557155221549, "grad_norm": 0.47474798560142517, "learning_rate": 7.9968280613297e-06, "loss": 0.0101, "step": 86660 }, { "epoch": 0.7318401553693188, "grad_norm": 0.4114578664302826, "learning_rate": 7.996238172985111e-06, "loss": 0.0103, "step": 86670 }, { "epoch": 0.7319245952164827, "grad_norm": 0.036234598606824875, "learning_rate": 7.995648219562842e-06, "loss": 0.0083, "step": 86680 }, { "epoch": 0.7320090350636466, "grad_norm": 0.07190628349781036, "learning_rate": 7.995058201075707e-06, "loss": 0.0143, "step": 86690 }, { "epoch": 0.7320934749108104, "grad_norm": 0.08885630965232849, "learning_rate": 7.994468117536523e-06, "loss": 0.009, "step": 86700 }, { "epoch": 0.7321779147579743, "grad_norm": 0.644585371017456, "learning_rate": 7.993877968958106e-06, "loss": 0.0164, "step": 86710 }, { "epoch": 0.7322623546051381, "grad_norm": 0.10861950367689133, "learning_rate": 7.99328775535327e-06, "loss": 0.0074, "step": 86720 }, { "epoch": 0.732346794452302, "grad_norm": 0.5537658333778381, "learning_rate": 7.992697476734838e-06, "loss": 0.0095, "step": 86730 }, { "epoch": 0.7324312342994659, "grad_norm": 0.15781839191913605, "learning_rate": 7.992107133115632e-06, "loss": 0.0088, "step": 86740 }, { "epoch": 0.7325156741466298, "grad_norm": 0.4882103502750397, "learning_rate": 7.99151672450847e-06, "loss": 0.0062, "step": 86750 }, { "epoch": 0.7326001139937937, "grad_norm": 0.44902563095092773, "learning_rate": 7.990926250926178e-06, "loss": 0.0139, "step": 86760 }, { "epoch": 0.7326845538409575, "grad_norm": 0.6307436227798462, "learning_rate": 7.990335712381583e-06, "loss": 0.0177, "step": 86770 }, { "epoch": 0.7327689936881214, "grad_norm": 0.406148225069046, "learning_rate": 7.989745108887507e-06, "loss": 0.0109, "step": 86780 }, { "epoch": 0.7328534335352853, "grad_norm": 0.35128000378608704, "learning_rate": 7.98915444045678e-06, "loss": 0.0079, "step": 86790 }, { "epoch": 0.7329378733824492, "grad_norm": 0.2594864070415497, "learning_rate": 7.988563707102232e-06, "loss": 0.0069, "step": 86800 }, { "epoch": 0.7330223132296131, "grad_norm": 0.3583991527557373, "learning_rate": 7.987972908836692e-06, "loss": 0.012, "step": 86810 }, { "epoch": 0.733106753076777, "grad_norm": 0.5409294962882996, "learning_rate": 7.987382045672994e-06, "loss": 0.0123, "step": 86820 }, { "epoch": 0.7331911929239409, "grad_norm": 0.23331373929977417, "learning_rate": 7.986791117623969e-06, "loss": 0.0162, "step": 86830 }, { "epoch": 0.7332756327711046, "grad_norm": 0.11279310286045074, "learning_rate": 7.986200124702454e-06, "loss": 0.0093, "step": 86840 }, { "epoch": 0.7333600726182685, "grad_norm": 0.3450140357017517, "learning_rate": 7.985609066921283e-06, "loss": 0.0086, "step": 86850 }, { "epoch": 0.7334445124654324, "grad_norm": 0.604846179485321, "learning_rate": 7.985017944293296e-06, "loss": 0.0231, "step": 86860 }, { "epoch": 0.7335289523125963, "grad_norm": 0.25405818223953247, "learning_rate": 7.98442675683133e-06, "loss": 0.0074, "step": 86870 }, { "epoch": 0.7336133921597602, "grad_norm": 0.2183246910572052, "learning_rate": 7.983835504548225e-06, "loss": 0.0168, "step": 86880 }, { "epoch": 0.7336978320069241, "grad_norm": 0.16477885842323303, "learning_rate": 7.983244187456826e-06, "loss": 0.0154, "step": 86890 }, { "epoch": 0.733782271854088, "grad_norm": 0.48725515604019165, "learning_rate": 7.982652805569974e-06, "loss": 0.0081, "step": 86900 }, { "epoch": 0.7338667117012518, "grad_norm": 0.1912836879491806, "learning_rate": 7.982061358900515e-06, "loss": 0.0073, "step": 86910 }, { "epoch": 0.7339511515484157, "grad_norm": 0.7524123191833496, "learning_rate": 7.981469847461293e-06, "loss": 0.0102, "step": 86920 }, { "epoch": 0.7340355913955796, "grad_norm": 0.28615593910217285, "learning_rate": 7.98087827126516e-06, "loss": 0.0101, "step": 86930 }, { "epoch": 0.7341200312427435, "grad_norm": 0.41718989610671997, "learning_rate": 7.980286630324959e-06, "loss": 0.0058, "step": 86940 }, { "epoch": 0.7342044710899073, "grad_norm": 0.11382339894771576, "learning_rate": 7.979694924653545e-06, "loss": 0.0071, "step": 86950 }, { "epoch": 0.7342889109370712, "grad_norm": 0.40984633564949036, "learning_rate": 7.979103154263766e-06, "loss": 0.0123, "step": 86960 }, { "epoch": 0.734373350784235, "grad_norm": 0.15764182806015015, "learning_rate": 7.978511319168477e-06, "loss": 0.0211, "step": 86970 }, { "epoch": 0.7344577906313989, "grad_norm": 0.35391783714294434, "learning_rate": 7.977919419380534e-06, "loss": 0.0161, "step": 86980 }, { "epoch": 0.7345422304785628, "grad_norm": 0.569646954536438, "learning_rate": 7.977327454912791e-06, "loss": 0.0123, "step": 86990 }, { "epoch": 0.7346266703257267, "grad_norm": 0.45986929535865784, "learning_rate": 7.976735425778104e-06, "loss": 0.013, "step": 87000 }, { "epoch": 0.7347111101728906, "grad_norm": 0.0034051020629704, "learning_rate": 7.976143331989337e-06, "loss": 0.0129, "step": 87010 }, { "epoch": 0.7347955500200545, "grad_norm": 0.5452712178230286, "learning_rate": 7.975551173559344e-06, "loss": 0.0111, "step": 87020 }, { "epoch": 0.7348799898672184, "grad_norm": 0.327096164226532, "learning_rate": 7.974958950500989e-06, "loss": 0.0089, "step": 87030 }, { "epoch": 0.7349644297143823, "grad_norm": 0.5414690971374512, "learning_rate": 7.974366662827137e-06, "loss": 0.0184, "step": 87040 }, { "epoch": 0.7350488695615461, "grad_norm": 0.34357932209968567, "learning_rate": 7.97377431055065e-06, "loss": 0.0082, "step": 87050 }, { "epoch": 0.73513330940871, "grad_norm": 0.855033814907074, "learning_rate": 7.973181893684393e-06, "loss": 0.0178, "step": 87060 }, { "epoch": 0.7352177492558738, "grad_norm": 0.3110605478286743, "learning_rate": 7.972589412241236e-06, "loss": 0.0131, "step": 87070 }, { "epoch": 0.7353021891030377, "grad_norm": 0.22737771272659302, "learning_rate": 7.971996866234046e-06, "loss": 0.0131, "step": 87080 }, { "epoch": 0.7353866289502016, "grad_norm": 0.389635294675827, "learning_rate": 7.971404255675693e-06, "loss": 0.0092, "step": 87090 }, { "epoch": 0.7354710687973655, "grad_norm": 0.27716371417045593, "learning_rate": 7.970811580579047e-06, "loss": 0.0065, "step": 87100 }, { "epoch": 0.7355555086445293, "grad_norm": 0.3259392976760864, "learning_rate": 7.970218840956985e-06, "loss": 0.014, "step": 87110 }, { "epoch": 0.7356399484916932, "grad_norm": 0.6497021913528442, "learning_rate": 7.969626036822377e-06, "loss": 0.0259, "step": 87120 }, { "epoch": 0.7357243883388571, "grad_norm": 0.25126558542251587, "learning_rate": 7.9690331681881e-06, "loss": 0.0183, "step": 87130 }, { "epoch": 0.735808828186021, "grad_norm": 0.8622070550918579, "learning_rate": 7.96844023506703e-06, "loss": 0.0169, "step": 87140 }, { "epoch": 0.7358932680331849, "grad_norm": 0.476689875125885, "learning_rate": 7.967847237472047e-06, "loss": 0.0102, "step": 87150 }, { "epoch": 0.7359777078803488, "grad_norm": 0.2581923305988312, "learning_rate": 7.96725417541603e-06, "loss": 0.0111, "step": 87160 }, { "epoch": 0.7360621477275127, "grad_norm": 0.14051605761051178, "learning_rate": 7.966661048911861e-06, "loss": 0.0068, "step": 87170 }, { "epoch": 0.7361465875746764, "grad_norm": 0.26263126730918884, "learning_rate": 7.966067857972423e-06, "loss": 0.0074, "step": 87180 }, { "epoch": 0.7362310274218403, "grad_norm": 0.8073062896728516, "learning_rate": 7.965474602610598e-06, "loss": 0.0099, "step": 87190 }, { "epoch": 0.7363154672690042, "grad_norm": 0.2580782175064087, "learning_rate": 7.964881282839272e-06, "loss": 0.0113, "step": 87200 }, { "epoch": 0.7363999071161681, "grad_norm": 0.27262434363365173, "learning_rate": 7.964287898671332e-06, "loss": 0.022, "step": 87210 }, { "epoch": 0.736484346963332, "grad_norm": 0.9495628476142883, "learning_rate": 7.963694450119666e-06, "loss": 0.0134, "step": 87220 }, { "epoch": 0.7365687868104959, "grad_norm": 0.2950515151023865, "learning_rate": 7.963100937197165e-06, "loss": 0.0073, "step": 87230 }, { "epoch": 0.7366532266576598, "grad_norm": 0.3620468080043793, "learning_rate": 7.962507359916718e-06, "loss": 0.0059, "step": 87240 }, { "epoch": 0.7367376665048236, "grad_norm": 0.6492351293563843, "learning_rate": 7.961913718291218e-06, "loss": 0.0105, "step": 87250 }, { "epoch": 0.7368221063519875, "grad_norm": 0.4311179518699646, "learning_rate": 7.96132001233356e-06, "loss": 0.0057, "step": 87260 }, { "epoch": 0.7369065461991514, "grad_norm": 0.49871090054512024, "learning_rate": 7.960726242056637e-06, "loss": 0.0113, "step": 87270 }, { "epoch": 0.7369909860463153, "grad_norm": 0.2690944969654083, "learning_rate": 7.960132407473347e-06, "loss": 0.016, "step": 87280 }, { "epoch": 0.7370754258934792, "grad_norm": 0.06369420886039734, "learning_rate": 7.959538508596587e-06, "loss": 0.0117, "step": 87290 }, { "epoch": 0.737159865740643, "grad_norm": 0.3028205633163452, "learning_rate": 7.95894454543926e-06, "loss": 0.013, "step": 87300 }, { "epoch": 0.7372443055878068, "grad_norm": 0.45665016770362854, "learning_rate": 7.958350518014263e-06, "loss": 0.0226, "step": 87310 }, { "epoch": 0.7373287454349707, "grad_norm": 0.1694893091917038, "learning_rate": 7.957756426334496e-06, "loss": 0.0085, "step": 87320 }, { "epoch": 0.7374131852821346, "grad_norm": 0.15013240277767181, "learning_rate": 7.957162270412868e-06, "loss": 0.0141, "step": 87330 }, { "epoch": 0.7374976251292985, "grad_norm": 2.0048294067382812, "learning_rate": 7.956568050262281e-06, "loss": 0.018, "step": 87340 }, { "epoch": 0.7375820649764624, "grad_norm": 0.18960054218769073, "learning_rate": 7.955973765895642e-06, "loss": 0.0166, "step": 87350 }, { "epoch": 0.7376665048236263, "grad_norm": 0.41937628388404846, "learning_rate": 7.955379417325856e-06, "loss": 0.0131, "step": 87360 }, { "epoch": 0.7377509446707902, "grad_norm": 0.6369764804840088, "learning_rate": 7.954785004565837e-06, "loss": 0.0121, "step": 87370 }, { "epoch": 0.737835384517954, "grad_norm": 0.9462361335754395, "learning_rate": 7.954190527628493e-06, "loss": 0.009, "step": 87380 }, { "epoch": 0.7379198243651179, "grad_norm": 0.40336576104164124, "learning_rate": 7.953595986526737e-06, "loss": 0.009, "step": 87390 }, { "epoch": 0.7380042642122818, "grad_norm": 0.3289884030818939, "learning_rate": 7.95300138127348e-06, "loss": 0.0097, "step": 87400 }, { "epoch": 0.7380887040594456, "grad_norm": 0.5430086851119995, "learning_rate": 7.952406711881639e-06, "loss": 0.0111, "step": 87410 }, { "epoch": 0.7381731439066095, "grad_norm": 0.45659253001213074, "learning_rate": 7.951811978364131e-06, "loss": 0.0148, "step": 87420 }, { "epoch": 0.7382575837537734, "grad_norm": 0.16008736193180084, "learning_rate": 7.951217180733867e-06, "loss": 0.0098, "step": 87430 }, { "epoch": 0.7383420236009373, "grad_norm": 0.11975302547216415, "learning_rate": 7.950622319003773e-06, "loss": 0.0036, "step": 87440 }, { "epoch": 0.7384264634481011, "grad_norm": 0.35898277163505554, "learning_rate": 7.950027393186767e-06, "loss": 0.0085, "step": 87450 }, { "epoch": 0.738510903295265, "grad_norm": 0.23549386858940125, "learning_rate": 7.94943240329577e-06, "loss": 0.0092, "step": 87460 }, { "epoch": 0.7385953431424289, "grad_norm": 0.40738388895988464, "learning_rate": 7.948837349343708e-06, "loss": 0.0097, "step": 87470 }, { "epoch": 0.7386797829895928, "grad_norm": 0.19575929641723633, "learning_rate": 7.948242231343501e-06, "loss": 0.0186, "step": 87480 }, { "epoch": 0.7387642228367567, "grad_norm": 0.34021908044815063, "learning_rate": 7.947647049308077e-06, "loss": 0.0114, "step": 87490 }, { "epoch": 0.7388486626839206, "grad_norm": 0.24566198885440826, "learning_rate": 7.947051803250363e-06, "loss": 0.0083, "step": 87500 }, { "epoch": 0.7389331025310845, "grad_norm": 0.34893929958343506, "learning_rate": 7.946456493183287e-06, "loss": 0.008, "step": 87510 }, { "epoch": 0.7390175423782483, "grad_norm": 0.2562847435474396, "learning_rate": 7.945861119119783e-06, "loss": 0.0086, "step": 87520 }, { "epoch": 0.7391019822254121, "grad_norm": 0.46123775839805603, "learning_rate": 7.945265681072777e-06, "loss": 0.0116, "step": 87530 }, { "epoch": 0.739186422072576, "grad_norm": 0.45615875720977783, "learning_rate": 7.944670179055203e-06, "loss": 0.0161, "step": 87540 }, { "epoch": 0.7392708619197399, "grad_norm": 0.33569806814193726, "learning_rate": 7.944074613079997e-06, "loss": 0.0097, "step": 87550 }, { "epoch": 0.7393553017669038, "grad_norm": 0.04565145820379257, "learning_rate": 7.943478983160093e-06, "loss": 0.0069, "step": 87560 }, { "epoch": 0.7394397416140677, "grad_norm": 0.1812274307012558, "learning_rate": 7.942883289308431e-06, "loss": 0.0093, "step": 87570 }, { "epoch": 0.7395241814612316, "grad_norm": 0.4132445454597473, "learning_rate": 7.942287531537945e-06, "loss": 0.0117, "step": 87580 }, { "epoch": 0.7396086213083954, "grad_norm": 0.3600015640258789, "learning_rate": 7.941691709861577e-06, "loss": 0.012, "step": 87590 }, { "epoch": 0.7396930611555593, "grad_norm": 0.4545721709728241, "learning_rate": 7.941095824292268e-06, "loss": 0.008, "step": 87600 }, { "epoch": 0.7397775010027232, "grad_norm": 0.37921345233917236, "learning_rate": 7.94049987484296e-06, "loss": 0.0047, "step": 87610 }, { "epoch": 0.7398619408498871, "grad_norm": 0.34250378608703613, "learning_rate": 7.939903861526597e-06, "loss": 0.0117, "step": 87620 }, { "epoch": 0.739946380697051, "grad_norm": 0.9611367583274841, "learning_rate": 7.939307784356125e-06, "loss": 0.0159, "step": 87630 }, { "epoch": 0.7400308205442148, "grad_norm": 0.4732019603252411, "learning_rate": 7.938711643344492e-06, "loss": 0.0103, "step": 87640 }, { "epoch": 0.7401152603913786, "grad_norm": 0.2781355082988739, "learning_rate": 7.93811543850464e-06, "loss": 0.0128, "step": 87650 }, { "epoch": 0.7401997002385425, "grad_norm": 0.34232115745544434, "learning_rate": 7.937519169849529e-06, "loss": 0.0184, "step": 87660 }, { "epoch": 0.7402841400857064, "grad_norm": 0.4524278938770294, "learning_rate": 7.9369228373921e-06, "loss": 0.0074, "step": 87670 }, { "epoch": 0.7403685799328703, "grad_norm": 0.2772931456565857, "learning_rate": 7.936326441145308e-06, "loss": 0.0166, "step": 87680 }, { "epoch": 0.7404530197800342, "grad_norm": 0.12466172128915787, "learning_rate": 7.935729981122109e-06, "loss": 0.0148, "step": 87690 }, { "epoch": 0.7405374596271981, "grad_norm": 0.27791649103164673, "learning_rate": 7.935133457335455e-06, "loss": 0.0149, "step": 87700 }, { "epoch": 0.740621899474362, "grad_norm": 0.21466557681560516, "learning_rate": 7.934536869798305e-06, "loss": 0.0136, "step": 87710 }, { "epoch": 0.7407063393215259, "grad_norm": 0.1839444637298584, "learning_rate": 7.933940218523614e-06, "loss": 0.0085, "step": 87720 }, { "epoch": 0.7407907791686897, "grad_norm": 0.19670678675174713, "learning_rate": 7.933343503524345e-06, "loss": 0.0108, "step": 87730 }, { "epoch": 0.7408752190158536, "grad_norm": 0.3643207550048828, "learning_rate": 7.932746724813454e-06, "loss": 0.009, "step": 87740 }, { "epoch": 0.7409596588630175, "grad_norm": 0.3637222945690155, "learning_rate": 7.932149882403905e-06, "loss": 0.008, "step": 87750 }, { "epoch": 0.7410440987101813, "grad_norm": 0.015356576070189476, "learning_rate": 7.931552976308663e-06, "loss": 0.0088, "step": 87760 }, { "epoch": 0.7411285385573452, "grad_norm": 0.21164332330226898, "learning_rate": 7.93095600654069e-06, "loss": 0.0121, "step": 87770 }, { "epoch": 0.7412129784045091, "grad_norm": 0.32732954621315, "learning_rate": 7.930358973112956e-06, "loss": 0.0041, "step": 87780 }, { "epoch": 0.741297418251673, "grad_norm": 0.3913794457912445, "learning_rate": 7.929761876038421e-06, "loss": 0.0073, "step": 87790 }, { "epoch": 0.7413818580988368, "grad_norm": 0.267338365316391, "learning_rate": 7.929164715330063e-06, "loss": 0.0097, "step": 87800 }, { "epoch": 0.7414662979460007, "grad_norm": 0.07290028035640717, "learning_rate": 7.928567491000844e-06, "loss": 0.0079, "step": 87810 }, { "epoch": 0.7415507377931646, "grad_norm": 0.29384055733680725, "learning_rate": 7.92797020306374e-06, "loss": 0.0099, "step": 87820 }, { "epoch": 0.7416351776403285, "grad_norm": 0.353538453578949, "learning_rate": 7.927372851531725e-06, "loss": 0.0159, "step": 87830 }, { "epoch": 0.7417196174874924, "grad_norm": 0.21766388416290283, "learning_rate": 7.92677543641777e-06, "loss": 0.0166, "step": 87840 }, { "epoch": 0.7418040573346563, "grad_norm": 0.2646865248680115, "learning_rate": 7.926177957734852e-06, "loss": 0.0094, "step": 87850 }, { "epoch": 0.7418884971818202, "grad_norm": 0.04843255877494812, "learning_rate": 7.925580415495949e-06, "loss": 0.0087, "step": 87860 }, { "epoch": 0.7419729370289839, "grad_norm": 0.5243197083473206, "learning_rate": 7.92498280971404e-06, "loss": 0.0179, "step": 87870 }, { "epoch": 0.7420573768761478, "grad_norm": 0.1613481640815735, "learning_rate": 7.924385140402102e-06, "loss": 0.0111, "step": 87880 }, { "epoch": 0.7421418167233117, "grad_norm": 0.36550891399383545, "learning_rate": 7.923787407573118e-06, "loss": 0.0109, "step": 87890 }, { "epoch": 0.7422262565704756, "grad_norm": 0.0780896320939064, "learning_rate": 7.923189611240071e-06, "loss": 0.0164, "step": 87900 }, { "epoch": 0.7423106964176395, "grad_norm": 0.140274778008461, "learning_rate": 7.922591751415944e-06, "loss": 0.0069, "step": 87910 }, { "epoch": 0.7423951362648034, "grad_norm": 0.45007994771003723, "learning_rate": 7.921993828113725e-06, "loss": 0.0095, "step": 87920 }, { "epoch": 0.7424795761119672, "grad_norm": 0.21238046884536743, "learning_rate": 7.921395841346398e-06, "loss": 0.0116, "step": 87930 }, { "epoch": 0.7425640159591311, "grad_norm": 0.5057246685028076, "learning_rate": 7.92079779112695e-06, "loss": 0.0096, "step": 87940 }, { "epoch": 0.742648455806295, "grad_norm": 0.8982477784156799, "learning_rate": 7.920199677468376e-06, "loss": 0.0115, "step": 87950 }, { "epoch": 0.7427328956534589, "grad_norm": 0.17028582096099854, "learning_rate": 7.91960150038366e-06, "loss": 0.0175, "step": 87960 }, { "epoch": 0.7428173355006228, "grad_norm": 0.5256276726722717, "learning_rate": 7.9190032598858e-06, "loss": 0.0098, "step": 87970 }, { "epoch": 0.7429017753477867, "grad_norm": 0.156707763671875, "learning_rate": 7.918404955987788e-06, "loss": 0.0197, "step": 87980 }, { "epoch": 0.7429862151949505, "grad_norm": 0.14858856797218323, "learning_rate": 7.917806588702617e-06, "loss": 0.0104, "step": 87990 }, { "epoch": 0.7430706550421143, "grad_norm": 0.61899733543396, "learning_rate": 7.917208158043283e-06, "loss": 0.0118, "step": 88000 }, { "epoch": 0.7431550948892782, "grad_norm": 0.44958508014678955, "learning_rate": 7.91660966402279e-06, "loss": 0.0105, "step": 88010 }, { "epoch": 0.7432395347364421, "grad_norm": 0.12308090180158615, "learning_rate": 7.916011106654128e-06, "loss": 0.0098, "step": 88020 }, { "epoch": 0.743323974583606, "grad_norm": 0.21056754887104034, "learning_rate": 7.915412485950305e-06, "loss": 0.0103, "step": 88030 }, { "epoch": 0.7434084144307699, "grad_norm": 0.38771864771842957, "learning_rate": 7.914813801924321e-06, "loss": 0.0123, "step": 88040 }, { "epoch": 0.7434928542779338, "grad_norm": 0.5336720943450928, "learning_rate": 7.914215054589177e-06, "loss": 0.0111, "step": 88050 }, { "epoch": 0.7435772941250977, "grad_norm": 0.6987167596817017, "learning_rate": 7.91361624395788e-06, "loss": 0.0132, "step": 88060 }, { "epoch": 0.7436617339722615, "grad_norm": 0.48462584614753723, "learning_rate": 7.913017370043435e-06, "loss": 0.0106, "step": 88070 }, { "epoch": 0.7437461738194254, "grad_norm": 0.10187336057424545, "learning_rate": 7.912418432858849e-06, "loss": 0.0101, "step": 88080 }, { "epoch": 0.7438306136665893, "grad_norm": 0.5871703624725342, "learning_rate": 7.911819432417131e-06, "loss": 0.0127, "step": 88090 }, { "epoch": 0.7439150535137531, "grad_norm": 0.3164505958557129, "learning_rate": 7.911220368731293e-06, "loss": 0.0092, "step": 88100 }, { "epoch": 0.743999493360917, "grad_norm": 0.4733808636665344, "learning_rate": 7.910621241814345e-06, "loss": 0.0136, "step": 88110 }, { "epoch": 0.7440839332080809, "grad_norm": 0.32382121682167053, "learning_rate": 7.910022051679299e-06, "loss": 0.0144, "step": 88120 }, { "epoch": 0.7441683730552447, "grad_norm": 0.3713063597679138, "learning_rate": 7.909422798339172e-06, "loss": 0.0086, "step": 88130 }, { "epoch": 0.7442528129024086, "grad_norm": 0.4584559500217438, "learning_rate": 7.908823481806978e-06, "loss": 0.0096, "step": 88140 }, { "epoch": 0.7443372527495725, "grad_norm": 0.20714685320854187, "learning_rate": 7.908224102095731e-06, "loss": 0.0096, "step": 88150 }, { "epoch": 0.7444216925967364, "grad_norm": 0.495391845703125, "learning_rate": 7.907624659218456e-06, "loss": 0.0117, "step": 88160 }, { "epoch": 0.7445061324439003, "grad_norm": 0.28193429112434387, "learning_rate": 7.907025153188167e-06, "loss": 0.0154, "step": 88170 }, { "epoch": 0.7445905722910642, "grad_norm": 0.1951211541891098, "learning_rate": 7.906425584017888e-06, "loss": 0.0061, "step": 88180 }, { "epoch": 0.7446750121382281, "grad_norm": 1.253271460533142, "learning_rate": 7.90582595172064e-06, "loss": 0.0163, "step": 88190 }, { "epoch": 0.744759451985392, "grad_norm": 0.4807746708393097, "learning_rate": 7.90522625630945e-06, "loss": 0.007, "step": 88200 }, { "epoch": 0.7448438918325558, "grad_norm": 0.36988192796707153, "learning_rate": 7.904626497797341e-06, "loss": 0.0127, "step": 88210 }, { "epoch": 0.7449283316797196, "grad_norm": 0.2683667540550232, "learning_rate": 7.904026676197339e-06, "loss": 0.0066, "step": 88220 }, { "epoch": 0.7450127715268835, "grad_norm": 0.7473675012588501, "learning_rate": 7.903426791522471e-06, "loss": 0.0196, "step": 88230 }, { "epoch": 0.7450972113740474, "grad_norm": 0.4154859483242035, "learning_rate": 7.90282684378577e-06, "loss": 0.0087, "step": 88240 }, { "epoch": 0.7451816512212113, "grad_norm": 0.35767602920532227, "learning_rate": 7.902226833000264e-06, "loss": 0.0096, "step": 88250 }, { "epoch": 0.7452660910683752, "grad_norm": 0.5167533159255981, "learning_rate": 7.901626759178987e-06, "loss": 0.0167, "step": 88260 }, { "epoch": 0.745350530915539, "grad_norm": 0.4552498161792755, "learning_rate": 7.90102662233497e-06, "loss": 0.0072, "step": 88270 }, { "epoch": 0.7454349707627029, "grad_norm": 0.0016732299700379372, "learning_rate": 7.90042642248125e-06, "loss": 0.012, "step": 88280 }, { "epoch": 0.7455194106098668, "grad_norm": 0.19992391765117645, "learning_rate": 7.899826159630862e-06, "loss": 0.0157, "step": 88290 }, { "epoch": 0.7456038504570307, "grad_norm": 0.39547672867774963, "learning_rate": 7.899225833796843e-06, "loss": 0.0165, "step": 88300 }, { "epoch": 0.7456882903041946, "grad_norm": 0.3706544041633606, "learning_rate": 7.898625444992234e-06, "loss": 0.0125, "step": 88310 }, { "epoch": 0.7457727301513585, "grad_norm": 0.26691994071006775, "learning_rate": 7.898024993230075e-06, "loss": 0.0132, "step": 88320 }, { "epoch": 0.7458571699985223, "grad_norm": 0.538378119468689, "learning_rate": 7.897424478523407e-06, "loss": 0.0087, "step": 88330 }, { "epoch": 0.7459416098456861, "grad_norm": 0.527824878692627, "learning_rate": 7.896823900885272e-06, "loss": 0.0094, "step": 88340 }, { "epoch": 0.74602604969285, "grad_norm": 0.13316847383975983, "learning_rate": 7.896223260328716e-06, "loss": 0.0067, "step": 88350 }, { "epoch": 0.7461104895400139, "grad_norm": 0.07664944231510162, "learning_rate": 7.895622556866784e-06, "loss": 0.0085, "step": 88360 }, { "epoch": 0.7461949293871778, "grad_norm": 0.5336474180221558, "learning_rate": 7.895021790512525e-06, "loss": 0.0132, "step": 88370 }, { "epoch": 0.7462793692343417, "grad_norm": 0.3269382119178772, "learning_rate": 7.894420961278986e-06, "loss": 0.0056, "step": 88380 }, { "epoch": 0.7463638090815056, "grad_norm": 0.2566206157207489, "learning_rate": 7.893820069179214e-06, "loss": 0.0106, "step": 88390 }, { "epoch": 0.7464482489286695, "grad_norm": 0.1642303317785263, "learning_rate": 7.893219114226265e-06, "loss": 0.0137, "step": 88400 }, { "epoch": 0.7465326887758333, "grad_norm": 0.26515424251556396, "learning_rate": 7.892618096433189e-06, "loss": 0.0072, "step": 88410 }, { "epoch": 0.7466171286229972, "grad_norm": 0.34303009510040283, "learning_rate": 7.892017015813041e-06, "loss": 0.0145, "step": 88420 }, { "epoch": 0.7467015684701611, "grad_norm": 0.4552285671234131, "learning_rate": 7.891415872378876e-06, "loss": 0.0093, "step": 88430 }, { "epoch": 0.746786008317325, "grad_norm": 0.4641534388065338, "learning_rate": 7.890814666143751e-06, "loss": 0.0122, "step": 88440 }, { "epoch": 0.7468704481644888, "grad_norm": 0.20216473937034607, "learning_rate": 7.890213397120725e-06, "loss": 0.0128, "step": 88450 }, { "epoch": 0.7469548880116527, "grad_norm": 0.351805180311203, "learning_rate": 7.889612065322853e-06, "loss": 0.011, "step": 88460 }, { "epoch": 0.7470393278588165, "grad_norm": 0.07905690371990204, "learning_rate": 7.889010670763201e-06, "loss": 0.0142, "step": 88470 }, { "epoch": 0.7471237677059804, "grad_norm": 0.32494470477104187, "learning_rate": 7.88840921345483e-06, "loss": 0.0171, "step": 88480 }, { "epoch": 0.7472082075531443, "grad_norm": 0.3235076665878296, "learning_rate": 7.887807693410803e-06, "loss": 0.0102, "step": 88490 }, { "epoch": 0.7472926474003082, "grad_norm": 0.34236764907836914, "learning_rate": 7.887206110644183e-06, "loss": 0.0084, "step": 88500 }, { "epoch": 0.7473770872474721, "grad_norm": 0.2380935102701187, "learning_rate": 7.886604465168039e-06, "loss": 0.008, "step": 88510 }, { "epoch": 0.747461527094636, "grad_norm": 0.14925995469093323, "learning_rate": 7.886002756995438e-06, "loss": 0.0111, "step": 88520 }, { "epoch": 0.7475459669417999, "grad_norm": 0.4157719016075134, "learning_rate": 7.885400986139448e-06, "loss": 0.0241, "step": 88530 }, { "epoch": 0.7476304067889638, "grad_norm": 0.7118277549743652, "learning_rate": 7.884799152613138e-06, "loss": 0.0116, "step": 88540 }, { "epoch": 0.7477148466361276, "grad_norm": 0.24267444014549255, "learning_rate": 7.884197256429584e-06, "loss": 0.0071, "step": 88550 }, { "epoch": 0.7477992864832914, "grad_norm": 0.46073344349861145, "learning_rate": 7.883595297601856e-06, "loss": 0.0129, "step": 88560 }, { "epoch": 0.7478837263304553, "grad_norm": 0.16120943427085876, "learning_rate": 7.882993276143029e-06, "loss": 0.0083, "step": 88570 }, { "epoch": 0.7479681661776192, "grad_norm": 0.3644396960735321, "learning_rate": 7.882391192066179e-06, "loss": 0.0106, "step": 88580 }, { "epoch": 0.7480526060247831, "grad_norm": 0.5161688327789307, "learning_rate": 7.881789045384382e-06, "loss": 0.0204, "step": 88590 }, { "epoch": 0.748137045871947, "grad_norm": 0.36267465353012085, "learning_rate": 7.881186836110718e-06, "loss": 0.0097, "step": 88600 }, { "epoch": 0.7482214857191108, "grad_norm": 0.19664345681667328, "learning_rate": 7.880584564258267e-06, "loss": 0.0098, "step": 88610 }, { "epoch": 0.7483059255662747, "grad_norm": 0.35704636573791504, "learning_rate": 7.879982229840108e-06, "loss": 0.0065, "step": 88620 }, { "epoch": 0.7483903654134386, "grad_norm": 0.51207435131073, "learning_rate": 7.879379832869327e-06, "loss": 0.0187, "step": 88630 }, { "epoch": 0.7484748052606025, "grad_norm": 0.27125680446624756, "learning_rate": 7.878777373359006e-06, "loss": 0.011, "step": 88640 }, { "epoch": 0.7485592451077664, "grad_norm": 0.5489634871482849, "learning_rate": 7.87817485132223e-06, "loss": 0.024, "step": 88650 }, { "epoch": 0.7486436849549303, "grad_norm": 0.39806032180786133, "learning_rate": 7.877572266772084e-06, "loss": 0.0204, "step": 88660 }, { "epoch": 0.748728124802094, "grad_norm": 0.09005700796842575, "learning_rate": 7.876969619721659e-06, "loss": 0.0074, "step": 88670 }, { "epoch": 0.7488125646492579, "grad_norm": 0.23346322774887085, "learning_rate": 7.876366910184046e-06, "loss": 0.0115, "step": 88680 }, { "epoch": 0.7488970044964218, "grad_norm": 0.11299215257167816, "learning_rate": 7.87576413817233e-06, "loss": 0.0143, "step": 88690 }, { "epoch": 0.7489814443435857, "grad_norm": 0.34680330753326416, "learning_rate": 7.875161303699608e-06, "loss": 0.0099, "step": 88700 }, { "epoch": 0.7490658841907496, "grad_norm": 0.5930879712104797, "learning_rate": 7.87455840677897e-06, "loss": 0.0091, "step": 88710 }, { "epoch": 0.7491503240379135, "grad_norm": 0.312641441822052, "learning_rate": 7.873955447423514e-06, "loss": 0.0067, "step": 88720 }, { "epoch": 0.7492347638850774, "grad_norm": 0.19481134414672852, "learning_rate": 7.873352425646335e-06, "loss": 0.0157, "step": 88730 }, { "epoch": 0.7493192037322413, "grad_norm": 0.28677111864089966, "learning_rate": 7.872749341460529e-06, "loss": 0.0092, "step": 88740 }, { "epoch": 0.7494036435794051, "grad_norm": 0.16967524588108063, "learning_rate": 7.872146194879196e-06, "loss": 0.0184, "step": 88750 }, { "epoch": 0.749488083426569, "grad_norm": 0.14488136768341064, "learning_rate": 7.871542985915436e-06, "loss": 0.0115, "step": 88760 }, { "epoch": 0.7495725232737329, "grad_norm": 1.3550702333450317, "learning_rate": 7.87093971458235e-06, "loss": 0.0132, "step": 88770 }, { "epoch": 0.7496569631208968, "grad_norm": 0.09752330929040909, "learning_rate": 7.870336380893044e-06, "loss": 0.0065, "step": 88780 }, { "epoch": 0.7497414029680606, "grad_norm": 0.2920394241809845, "learning_rate": 7.869732984860618e-06, "loss": 0.0086, "step": 88790 }, { "epoch": 0.7498258428152245, "grad_norm": 0.32339194416999817, "learning_rate": 7.869129526498182e-06, "loss": 0.0116, "step": 88800 }, { "epoch": 0.7499102826623883, "grad_norm": 0.24495074152946472, "learning_rate": 7.868526005818839e-06, "loss": 0.012, "step": 88810 }, { "epoch": 0.7499947225095522, "grad_norm": 0.38200145959854126, "learning_rate": 7.8679224228357e-06, "loss": 0.0103, "step": 88820 }, { "epoch": 0.7500791623567161, "grad_norm": 0.29957419633865356, "learning_rate": 7.867318777561872e-06, "loss": 0.0096, "step": 88830 }, { "epoch": 0.75016360220388, "grad_norm": 0.14282315969467163, "learning_rate": 7.86671507001047e-06, "loss": 0.0071, "step": 88840 }, { "epoch": 0.7502480420510439, "grad_norm": 0.26720353960990906, "learning_rate": 7.866111300194601e-06, "loss": 0.0123, "step": 88850 }, { "epoch": 0.7503324818982078, "grad_norm": 0.09083972126245499, "learning_rate": 7.865507468127383e-06, "loss": 0.0087, "step": 88860 }, { "epoch": 0.7504169217453717, "grad_norm": 0.19372332096099854, "learning_rate": 7.864903573821931e-06, "loss": 0.0093, "step": 88870 }, { "epoch": 0.7505013615925356, "grad_norm": 0.7817671895027161, "learning_rate": 7.86429961729136e-06, "loss": 0.0117, "step": 88880 }, { "epoch": 0.7505858014396994, "grad_norm": 0.3031810522079468, "learning_rate": 7.863695598548789e-06, "loss": 0.0059, "step": 88890 }, { "epoch": 0.7506702412868632, "grad_norm": 0.5106714963912964, "learning_rate": 7.863091517607336e-06, "loss": 0.0127, "step": 88900 }, { "epoch": 0.7507546811340271, "grad_norm": 0.25629374384880066, "learning_rate": 7.862487374480121e-06, "loss": 0.0133, "step": 88910 }, { "epoch": 0.750839120981191, "grad_norm": 0.9147878289222717, "learning_rate": 7.861883169180266e-06, "loss": 0.0143, "step": 88920 }, { "epoch": 0.7509235608283549, "grad_norm": 0.22839848697185516, "learning_rate": 7.861278901720896e-06, "loss": 0.0079, "step": 88930 }, { "epoch": 0.7510080006755188, "grad_norm": 0.30186721682548523, "learning_rate": 7.860674572115136e-06, "loss": 0.0078, "step": 88940 }, { "epoch": 0.7510924405226826, "grad_norm": 0.13304418325424194, "learning_rate": 7.860070180376109e-06, "loss": 0.0108, "step": 88950 }, { "epoch": 0.7511768803698465, "grad_norm": 0.043570905923843384, "learning_rate": 7.859465726516946e-06, "loss": 0.01, "step": 88960 }, { "epoch": 0.7512613202170104, "grad_norm": 0.5093977451324463, "learning_rate": 7.858861210550771e-06, "loss": 0.0134, "step": 88970 }, { "epoch": 0.7513457600641743, "grad_norm": 0.7747193574905396, "learning_rate": 7.858256632490717e-06, "loss": 0.0129, "step": 88980 }, { "epoch": 0.7514301999113382, "grad_norm": 0.31056874990463257, "learning_rate": 7.857651992349915e-06, "loss": 0.0154, "step": 88990 }, { "epoch": 0.7515146397585021, "grad_norm": 0.09119686484336853, "learning_rate": 7.857047290141497e-06, "loss": 0.0092, "step": 89000 }, { "epoch": 0.751599079605666, "grad_norm": 0.16187454760074615, "learning_rate": 7.856442525878597e-06, "loss": 0.0098, "step": 89010 }, { "epoch": 0.7516835194528297, "grad_norm": 0.29079994559288025, "learning_rate": 7.855837699574352e-06, "loss": 0.0067, "step": 89020 }, { "epoch": 0.7517679592999936, "grad_norm": 0.7495241165161133, "learning_rate": 7.855232811241895e-06, "loss": 0.0109, "step": 89030 }, { "epoch": 0.7518523991471575, "grad_norm": 0.38913848996162415, "learning_rate": 7.854627860894369e-06, "loss": 0.013, "step": 89040 }, { "epoch": 0.7519368389943214, "grad_norm": 0.35791322588920593, "learning_rate": 7.85402284854491e-06, "loss": 0.0157, "step": 89050 }, { "epoch": 0.7520212788414853, "grad_norm": 0.06697939336299896, "learning_rate": 7.853417774206657e-06, "loss": 0.0095, "step": 89060 }, { "epoch": 0.7521057186886492, "grad_norm": 0.3949160873889923, "learning_rate": 7.852812637892757e-06, "loss": 0.0127, "step": 89070 }, { "epoch": 0.7521901585358131, "grad_norm": 0.20523495972156525, "learning_rate": 7.852207439616352e-06, "loss": 0.015, "step": 89080 }, { "epoch": 0.752274598382977, "grad_norm": 0.4311440885066986, "learning_rate": 7.851602179390585e-06, "loss": 0.0182, "step": 89090 }, { "epoch": 0.7523590382301408, "grad_norm": 0.29139968752861023, "learning_rate": 7.850996857228604e-06, "loss": 0.0155, "step": 89100 }, { "epoch": 0.7524434780773047, "grad_norm": 0.6924179792404175, "learning_rate": 7.850391473143552e-06, "loss": 0.013, "step": 89110 }, { "epoch": 0.7525279179244686, "grad_norm": 0.012276029214262962, "learning_rate": 7.849786027148584e-06, "loss": 0.0089, "step": 89120 }, { "epoch": 0.7526123577716324, "grad_norm": 0.798468828201294, "learning_rate": 7.849180519256848e-06, "loss": 0.0122, "step": 89130 }, { "epoch": 0.7526967976187963, "grad_norm": 0.7853600382804871, "learning_rate": 7.848574949481493e-06, "loss": 0.0116, "step": 89140 }, { "epoch": 0.7527812374659602, "grad_norm": 0.5061559677124023, "learning_rate": 7.847969317835675e-06, "loss": 0.0113, "step": 89150 }, { "epoch": 0.752865677313124, "grad_norm": 0.4045909345149994, "learning_rate": 7.847363624332547e-06, "loss": 0.0105, "step": 89160 }, { "epoch": 0.7529501171602879, "grad_norm": 0.17245930433273315, "learning_rate": 7.846757868985265e-06, "loss": 0.0092, "step": 89170 }, { "epoch": 0.7530345570074518, "grad_norm": 0.6876760721206665, "learning_rate": 7.846152051806985e-06, "loss": 0.0143, "step": 89180 }, { "epoch": 0.7531189968546157, "grad_norm": 0.18891696631908417, "learning_rate": 7.845546172810865e-06, "loss": 0.0092, "step": 89190 }, { "epoch": 0.7532034367017796, "grad_norm": 0.5576713681221008, "learning_rate": 7.844940232010067e-06, "loss": 0.0118, "step": 89200 }, { "epoch": 0.7532878765489435, "grad_norm": 0.017253611236810684, "learning_rate": 7.844334229417749e-06, "loss": 0.0053, "step": 89210 }, { "epoch": 0.7533723163961074, "grad_norm": 0.5943980813026428, "learning_rate": 7.843728165047074e-06, "loss": 0.0124, "step": 89220 }, { "epoch": 0.7534567562432712, "grad_norm": 0.18126900494098663, "learning_rate": 7.843122038911206e-06, "loss": 0.0138, "step": 89230 }, { "epoch": 0.7535411960904351, "grad_norm": 0.180501788854599, "learning_rate": 7.842515851023312e-06, "loss": 0.0121, "step": 89240 }, { "epoch": 0.7536256359375989, "grad_norm": 0.11516611278057098, "learning_rate": 7.841909601396554e-06, "loss": 0.008, "step": 89250 }, { "epoch": 0.7537100757847628, "grad_norm": 0.2972223162651062, "learning_rate": 7.841303290044102e-06, "loss": 0.0073, "step": 89260 }, { "epoch": 0.7537945156319267, "grad_norm": 0.4505378007888794, "learning_rate": 7.840696916979124e-06, "loss": 0.014, "step": 89270 }, { "epoch": 0.7538789554790906, "grad_norm": 0.39686131477355957, "learning_rate": 7.840090482214792e-06, "loss": 0.0099, "step": 89280 }, { "epoch": 0.7539633953262544, "grad_norm": 0.3548177480697632, "learning_rate": 7.839483985764278e-06, "loss": 0.0101, "step": 89290 }, { "epoch": 0.7540478351734183, "grad_norm": 0.03706886246800423, "learning_rate": 7.838877427640752e-06, "loss": 0.0112, "step": 89300 }, { "epoch": 0.7541322750205822, "grad_norm": 0.12058810144662857, "learning_rate": 7.838270807857391e-06, "loss": 0.0068, "step": 89310 }, { "epoch": 0.7542167148677461, "grad_norm": 0.329941987991333, "learning_rate": 7.83766412642737e-06, "loss": 0.0162, "step": 89320 }, { "epoch": 0.75430115471491, "grad_norm": 0.365945041179657, "learning_rate": 7.837057383363863e-06, "loss": 0.0093, "step": 89330 }, { "epoch": 0.7543855945620739, "grad_norm": 0.14962348341941833, "learning_rate": 7.836450578680055e-06, "loss": 0.0159, "step": 89340 }, { "epoch": 0.7544700344092378, "grad_norm": 0.11854171752929688, "learning_rate": 7.835843712389118e-06, "loss": 0.0046, "step": 89350 }, { "epoch": 0.7545544742564015, "grad_norm": 0.22734513878822327, "learning_rate": 7.835236784504239e-06, "loss": 0.0145, "step": 89360 }, { "epoch": 0.7546389141035654, "grad_norm": 0.2686118483543396, "learning_rate": 7.834629795038598e-06, "loss": 0.0141, "step": 89370 }, { "epoch": 0.7547233539507293, "grad_norm": 0.08939797431230545, "learning_rate": 7.834022744005377e-06, "loss": 0.0091, "step": 89380 }, { "epoch": 0.7548077937978932, "grad_norm": 0.9570135474205017, "learning_rate": 7.833415631417764e-06, "loss": 0.0124, "step": 89390 }, { "epoch": 0.7548922336450571, "grad_norm": 0.2442178726196289, "learning_rate": 7.832808457288944e-06, "loss": 0.0103, "step": 89400 }, { "epoch": 0.754976673492221, "grad_norm": 0.3196796774864197, "learning_rate": 7.832201221632104e-06, "loss": 0.0191, "step": 89410 }, { "epoch": 0.7550611133393849, "grad_norm": 0.535068690776825, "learning_rate": 7.831593924460435e-06, "loss": 0.0105, "step": 89420 }, { "epoch": 0.7551455531865487, "grad_norm": 0.4327767491340637, "learning_rate": 7.830986565787126e-06, "loss": 0.0108, "step": 89430 }, { "epoch": 0.7552299930337126, "grad_norm": 0.21491201221942902, "learning_rate": 7.830379145625368e-06, "loss": 0.0105, "step": 89440 }, { "epoch": 0.7553144328808765, "grad_norm": 0.1696225106716156, "learning_rate": 7.829771663988356e-06, "loss": 0.015, "step": 89450 }, { "epoch": 0.7553988727280404, "grad_norm": 0.020074976608157158, "learning_rate": 7.829164120889284e-06, "loss": 0.0076, "step": 89460 }, { "epoch": 0.7554833125752043, "grad_norm": 0.24342477321624756, "learning_rate": 7.828556516341345e-06, "loss": 0.0083, "step": 89470 }, { "epoch": 0.7555677524223681, "grad_norm": 0.7913918495178223, "learning_rate": 7.827948850357741e-06, "loss": 0.0151, "step": 89480 }, { "epoch": 0.755652192269532, "grad_norm": 0.3931572437286377, "learning_rate": 7.827341122951663e-06, "loss": 0.0066, "step": 89490 }, { "epoch": 0.7557366321166958, "grad_norm": 0.42775487899780273, "learning_rate": 7.826733334136318e-06, "loss": 0.0079, "step": 89500 }, { "epoch": 0.7558210719638597, "grad_norm": 0.15754498541355133, "learning_rate": 7.826125483924905e-06, "loss": 0.0082, "step": 89510 }, { "epoch": 0.7559055118110236, "grad_norm": 0.4388115406036377, "learning_rate": 7.825517572330625e-06, "loss": 0.0145, "step": 89520 }, { "epoch": 0.7559899516581875, "grad_norm": 0.439988911151886, "learning_rate": 7.824909599366681e-06, "loss": 0.0098, "step": 89530 }, { "epoch": 0.7560743915053514, "grad_norm": 0.6359113454818726, "learning_rate": 7.82430156504628e-06, "loss": 0.0084, "step": 89540 }, { "epoch": 0.7561588313525153, "grad_norm": 0.6839362978935242, "learning_rate": 7.823693469382629e-06, "loss": 0.0151, "step": 89550 }, { "epoch": 0.7562432711996792, "grad_norm": 0.1276668906211853, "learning_rate": 7.823085312388934e-06, "loss": 0.0117, "step": 89560 }, { "epoch": 0.756327711046843, "grad_norm": 0.8621481657028198, "learning_rate": 7.822477094078405e-06, "loss": 0.0169, "step": 89570 }, { "epoch": 0.7564121508940069, "grad_norm": 0.1863410919904709, "learning_rate": 7.82186881446425e-06, "loss": 0.0096, "step": 89580 }, { "epoch": 0.7564965907411707, "grad_norm": 0.4008706510066986, "learning_rate": 7.821260473559683e-06, "loss": 0.008, "step": 89590 }, { "epoch": 0.7565810305883346, "grad_norm": 0.3094399571418762, "learning_rate": 7.820652071377918e-06, "loss": 0.0159, "step": 89600 }, { "epoch": 0.7566654704354985, "grad_norm": 0.22847333550453186, "learning_rate": 7.820043607932166e-06, "loss": 0.0107, "step": 89610 }, { "epoch": 0.7567499102826624, "grad_norm": 0.3020540773868561, "learning_rate": 7.819435083235647e-06, "loss": 0.0164, "step": 89620 }, { "epoch": 0.7568343501298262, "grad_norm": 0.5323520302772522, "learning_rate": 7.818826497301575e-06, "loss": 0.0101, "step": 89630 }, { "epoch": 0.7569187899769901, "grad_norm": 0.008896374143660069, "learning_rate": 7.818217850143168e-06, "loss": 0.0087, "step": 89640 }, { "epoch": 0.757003229824154, "grad_norm": 0.17978402972221375, "learning_rate": 7.817609141773648e-06, "loss": 0.0086, "step": 89650 }, { "epoch": 0.7570876696713179, "grad_norm": 0.37977105379104614, "learning_rate": 7.817000372206233e-06, "loss": 0.0068, "step": 89660 }, { "epoch": 0.7571721095184818, "grad_norm": 0.8069555163383484, "learning_rate": 7.81639154145415e-06, "loss": 0.0151, "step": 89670 }, { "epoch": 0.7572565493656457, "grad_norm": 0.08917049318552017, "learning_rate": 7.815782649530619e-06, "loss": 0.0057, "step": 89680 }, { "epoch": 0.7573409892128096, "grad_norm": 0.6197305917739868, "learning_rate": 7.815173696448865e-06, "loss": 0.0105, "step": 89690 }, { "epoch": 0.7574254290599735, "grad_norm": 0.26093780994415283, "learning_rate": 7.814564682222116e-06, "loss": 0.0141, "step": 89700 }, { "epoch": 0.7575098689071372, "grad_norm": 0.27875733375549316, "learning_rate": 7.813955606863598e-06, "loss": 0.0117, "step": 89710 }, { "epoch": 0.7575943087543011, "grad_norm": 0.3964492082595825, "learning_rate": 7.81334647038654e-06, "loss": 0.0151, "step": 89720 }, { "epoch": 0.757678748601465, "grad_norm": 0.781352162361145, "learning_rate": 7.812737272804176e-06, "loss": 0.0131, "step": 89730 }, { "epoch": 0.7577631884486289, "grad_norm": 0.2918989956378937, "learning_rate": 7.812128014129734e-06, "loss": 0.0092, "step": 89740 }, { "epoch": 0.7578476282957928, "grad_norm": 0.41652992367744446, "learning_rate": 7.811518694376445e-06, "loss": 0.0087, "step": 89750 }, { "epoch": 0.7579320681429567, "grad_norm": 0.19544050097465515, "learning_rate": 7.810909313557548e-06, "loss": 0.0177, "step": 89760 }, { "epoch": 0.7580165079901205, "grad_norm": 0.45422425866127014, "learning_rate": 7.810299871686278e-06, "loss": 0.0099, "step": 89770 }, { "epoch": 0.7581009478372844, "grad_norm": 0.1523500233888626, "learning_rate": 7.809690368775868e-06, "loss": 0.0046, "step": 89780 }, { "epoch": 0.7581853876844483, "grad_norm": 0.5750439763069153, "learning_rate": 7.809080804839561e-06, "loss": 0.0107, "step": 89790 }, { "epoch": 0.7582698275316122, "grad_norm": 0.5538508892059326, "learning_rate": 7.808471179890595e-06, "loss": 0.016, "step": 89800 }, { "epoch": 0.7583542673787761, "grad_norm": 0.5191295742988586, "learning_rate": 7.80786149394221e-06, "loss": 0.0232, "step": 89810 }, { "epoch": 0.7584387072259399, "grad_norm": 0.5136842727661133, "learning_rate": 7.807251747007646e-06, "loss": 0.0106, "step": 89820 }, { "epoch": 0.7585231470731038, "grad_norm": 0.5871850848197937, "learning_rate": 7.806641939100153e-06, "loss": 0.0113, "step": 89830 }, { "epoch": 0.7586075869202676, "grad_norm": 0.3480895459651947, "learning_rate": 7.806032070232972e-06, "loss": 0.0091, "step": 89840 }, { "epoch": 0.7586920267674315, "grad_norm": 0.5352044701576233, "learning_rate": 7.805422140419348e-06, "loss": 0.008, "step": 89850 }, { "epoch": 0.7587764666145954, "grad_norm": 0.22786912322044373, "learning_rate": 7.804812149672529e-06, "loss": 0.0086, "step": 89860 }, { "epoch": 0.7588609064617593, "grad_norm": 0.6555438041687012, "learning_rate": 7.804202098005767e-06, "loss": 0.0151, "step": 89870 }, { "epoch": 0.7589453463089232, "grad_norm": 0.20243315398693085, "learning_rate": 7.803591985432308e-06, "loss": 0.0127, "step": 89880 }, { "epoch": 0.7590297861560871, "grad_norm": 0.5741400122642517, "learning_rate": 7.802981811965407e-06, "loss": 0.0137, "step": 89890 }, { "epoch": 0.759114226003251, "grad_norm": 0.4167797267436981, "learning_rate": 7.802371577618317e-06, "loss": 0.0205, "step": 89900 }, { "epoch": 0.7591986658504148, "grad_norm": 0.4420183300971985, "learning_rate": 7.801761282404288e-06, "loss": 0.0045, "step": 89910 }, { "epoch": 0.7592831056975787, "grad_norm": 0.42534661293029785, "learning_rate": 7.801150926336579e-06, "loss": 0.0231, "step": 89920 }, { "epoch": 0.7593675455447426, "grad_norm": 0.1483539193868637, "learning_rate": 7.800540509428446e-06, "loss": 0.0078, "step": 89930 }, { "epoch": 0.7594519853919064, "grad_norm": 0.34618064761161804, "learning_rate": 7.799930031693146e-06, "loss": 0.0111, "step": 89940 }, { "epoch": 0.7595364252390703, "grad_norm": 0.4129757285118103, "learning_rate": 7.79931949314394e-06, "loss": 0.0079, "step": 89950 }, { "epoch": 0.7596208650862342, "grad_norm": 0.2676324248313904, "learning_rate": 7.79870889379409e-06, "loss": 0.016, "step": 89960 }, { "epoch": 0.759705304933398, "grad_norm": 0.3946526348590851, "learning_rate": 7.798098233656854e-06, "loss": 0.0111, "step": 89970 }, { "epoch": 0.7597897447805619, "grad_norm": 0.7065615057945251, "learning_rate": 7.797487512745498e-06, "loss": 0.0214, "step": 89980 }, { "epoch": 0.7598741846277258, "grad_norm": 0.6522395610809326, "learning_rate": 7.796876731073287e-06, "loss": 0.0165, "step": 89990 }, { "epoch": 0.7599586244748897, "grad_norm": 0.26651108264923096, "learning_rate": 7.796265888653488e-06, "loss": 0.0076, "step": 90000 }, { "epoch": 0.7600430643220536, "grad_norm": 0.539939284324646, "learning_rate": 7.795654985499364e-06, "loss": 0.0166, "step": 90010 }, { "epoch": 0.7601275041692175, "grad_norm": 0.6289499402046204, "learning_rate": 7.79504402162419e-06, "loss": 0.016, "step": 90020 }, { "epoch": 0.7602119440163814, "grad_norm": 0.005043610464781523, "learning_rate": 7.79443299704123e-06, "loss": 0.0076, "step": 90030 }, { "epoch": 0.7602963838635453, "grad_norm": 0.27895137667655945, "learning_rate": 7.793821911763758e-06, "loss": 0.01, "step": 90040 }, { "epoch": 0.760380823710709, "grad_norm": 0.6691794991493225, "learning_rate": 7.793210765805048e-06, "loss": 0.0201, "step": 90050 }, { "epoch": 0.7604652635578729, "grad_norm": 0.2068815976381302, "learning_rate": 7.792599559178371e-06, "loss": 0.0069, "step": 90060 }, { "epoch": 0.7605497034050368, "grad_norm": 0.31127724051475525, "learning_rate": 7.791988291897005e-06, "loss": 0.0133, "step": 90070 }, { "epoch": 0.7606341432522007, "grad_norm": 0.25620728731155396, "learning_rate": 7.791376963974223e-06, "loss": 0.0083, "step": 90080 }, { "epoch": 0.7607185830993646, "grad_norm": 0.18761341273784637, "learning_rate": 7.790765575423309e-06, "loss": 0.0108, "step": 90090 }, { "epoch": 0.7608030229465285, "grad_norm": 0.28384703397750854, "learning_rate": 7.790154126257536e-06, "loss": 0.0157, "step": 90100 }, { "epoch": 0.7608874627936923, "grad_norm": 0.1597013771533966, "learning_rate": 7.789542616490188e-06, "loss": 0.0073, "step": 90110 }, { "epoch": 0.7609719026408562, "grad_norm": 0.4798146188259125, "learning_rate": 7.788931046134546e-06, "loss": 0.0104, "step": 90120 }, { "epoch": 0.7610563424880201, "grad_norm": 0.6900001168251038, "learning_rate": 7.788319415203894e-06, "loss": 0.0154, "step": 90130 }, { "epoch": 0.761140782335184, "grad_norm": 0.4429200291633606, "learning_rate": 7.787707723711515e-06, "loss": 0.0101, "step": 90140 }, { "epoch": 0.7612252221823479, "grad_norm": 0.1990441083908081, "learning_rate": 7.787095971670696e-06, "loss": 0.0064, "step": 90150 }, { "epoch": 0.7613096620295118, "grad_norm": 0.8437617421150208, "learning_rate": 7.786484159094723e-06, "loss": 0.0088, "step": 90160 }, { "epoch": 0.7613941018766756, "grad_norm": 0.38079550862312317, "learning_rate": 7.785872285996885e-06, "loss": 0.0074, "step": 90170 }, { "epoch": 0.7614785417238394, "grad_norm": 0.2846391499042511, "learning_rate": 7.785260352390472e-06, "loss": 0.0082, "step": 90180 }, { "epoch": 0.7615629815710033, "grad_norm": 0.3402736186981201, "learning_rate": 7.784648358288775e-06, "loss": 0.0089, "step": 90190 }, { "epoch": 0.7616474214181672, "grad_norm": 0.3236958086490631, "learning_rate": 7.784036303705087e-06, "loss": 0.0117, "step": 90200 }, { "epoch": 0.7617318612653311, "grad_norm": 0.2203202098608017, "learning_rate": 7.783424188652699e-06, "loss": 0.0132, "step": 90210 }, { "epoch": 0.761816301112495, "grad_norm": 0.653739869594574, "learning_rate": 7.782812013144908e-06, "loss": 0.0098, "step": 90220 }, { "epoch": 0.7619007409596589, "grad_norm": 0.29294928908348083, "learning_rate": 7.782199777195013e-06, "loss": 0.0117, "step": 90230 }, { "epoch": 0.7619851808068228, "grad_norm": 0.2976645827293396, "learning_rate": 7.781587480816307e-06, "loss": 0.0079, "step": 90240 }, { "epoch": 0.7620696206539866, "grad_norm": 0.24025699496269226, "learning_rate": 7.780975124022092e-06, "loss": 0.0085, "step": 90250 }, { "epoch": 0.7621540605011505, "grad_norm": 0.6049690842628479, "learning_rate": 7.780362706825667e-06, "loss": 0.0065, "step": 90260 }, { "epoch": 0.7622385003483144, "grad_norm": 0.5311673879623413, "learning_rate": 7.779750229240332e-06, "loss": 0.009, "step": 90270 }, { "epoch": 0.7623229401954782, "grad_norm": 0.13145050406455994, "learning_rate": 7.779137691279395e-06, "loss": 0.0055, "step": 90280 }, { "epoch": 0.7624073800426421, "grad_norm": 0.22085072100162506, "learning_rate": 7.778525092956154e-06, "loss": 0.0082, "step": 90290 }, { "epoch": 0.762491819889806, "grad_norm": 0.7466312646865845, "learning_rate": 7.77791243428392e-06, "loss": 0.0127, "step": 90300 }, { "epoch": 0.7625762597369699, "grad_norm": 0.40993842482566833, "learning_rate": 7.777299715275993e-06, "loss": 0.0156, "step": 90310 }, { "epoch": 0.7626606995841337, "grad_norm": 0.28517305850982666, "learning_rate": 7.776686935945687e-06, "loss": 0.0101, "step": 90320 }, { "epoch": 0.7627451394312976, "grad_norm": 0.5348262190818787, "learning_rate": 7.77607409630631e-06, "loss": 0.008, "step": 90330 }, { "epoch": 0.7628295792784615, "grad_norm": 0.4010855555534363, "learning_rate": 7.775461196371174e-06, "loss": 0.0132, "step": 90340 }, { "epoch": 0.7629140191256254, "grad_norm": 0.48390740156173706, "learning_rate": 7.774848236153589e-06, "loss": 0.011, "step": 90350 }, { "epoch": 0.7629984589727893, "grad_norm": 0.39709341526031494, "learning_rate": 7.774235215666867e-06, "loss": 0.0102, "step": 90360 }, { "epoch": 0.7630828988199532, "grad_norm": 0.10485260933637619, "learning_rate": 7.773622134924327e-06, "loss": 0.0165, "step": 90370 }, { "epoch": 0.7631673386671171, "grad_norm": 0.4760212004184723, "learning_rate": 7.773008993939282e-06, "loss": 0.0065, "step": 90380 }, { "epoch": 0.7632517785142809, "grad_norm": 0.3920793831348419, "learning_rate": 7.772395792725049e-06, "loss": 0.018, "step": 90390 }, { "epoch": 0.7633362183614447, "grad_norm": 0.31366023421287537, "learning_rate": 7.77178253129495e-06, "loss": 0.0233, "step": 90400 }, { "epoch": 0.7634206582086086, "grad_norm": 0.12234637886285782, "learning_rate": 7.771169209662299e-06, "loss": 0.0123, "step": 90410 }, { "epoch": 0.7635050980557725, "grad_norm": 0.48293063044548035, "learning_rate": 7.770555827840422e-06, "loss": 0.0107, "step": 90420 }, { "epoch": 0.7635895379029364, "grad_norm": 0.47712594270706177, "learning_rate": 7.76994238584264e-06, "loss": 0.02, "step": 90430 }, { "epoch": 0.7636739777501003, "grad_norm": 0.19398075342178345, "learning_rate": 7.769328883682276e-06, "loss": 0.0079, "step": 90440 }, { "epoch": 0.7637584175972641, "grad_norm": 0.31003591418266296, "learning_rate": 7.768715321372659e-06, "loss": 0.0122, "step": 90450 }, { "epoch": 0.763842857444428, "grad_norm": 0.3096877634525299, "learning_rate": 7.76810169892711e-06, "loss": 0.0162, "step": 90460 }, { "epoch": 0.7639272972915919, "grad_norm": 0.273796409368515, "learning_rate": 7.76748801635896e-06, "loss": 0.0084, "step": 90470 }, { "epoch": 0.7640117371387558, "grad_norm": 0.3468668758869171, "learning_rate": 7.766874273681538e-06, "loss": 0.0164, "step": 90480 }, { "epoch": 0.7640961769859197, "grad_norm": 0.33907467126846313, "learning_rate": 7.766260470908173e-06, "loss": 0.015, "step": 90490 }, { "epoch": 0.7641806168330836, "grad_norm": 0.21840721368789673, "learning_rate": 7.7656466080522e-06, "loss": 0.014, "step": 90500 }, { "epoch": 0.7642650566802474, "grad_norm": 1.0549236536026, "learning_rate": 7.765032685126945e-06, "loss": 0.0179, "step": 90510 }, { "epoch": 0.7643494965274112, "grad_norm": 0.37718889117240906, "learning_rate": 7.76441870214575e-06, "loss": 0.0062, "step": 90520 }, { "epoch": 0.7644339363745751, "grad_norm": 0.6700727939605713, "learning_rate": 7.763804659121947e-06, "loss": 0.012, "step": 90530 }, { "epoch": 0.764518376221739, "grad_norm": 1.0435210466384888, "learning_rate": 7.763190556068871e-06, "loss": 0.0189, "step": 90540 }, { "epoch": 0.7646028160689029, "grad_norm": 0.3960329294204712, "learning_rate": 7.762576392999864e-06, "loss": 0.0154, "step": 90550 }, { "epoch": 0.7646872559160668, "grad_norm": 0.2714386284351349, "learning_rate": 7.761962169928262e-06, "loss": 0.0157, "step": 90560 }, { "epoch": 0.7647716957632307, "grad_norm": 0.2360585331916809, "learning_rate": 7.76134788686741e-06, "loss": 0.0077, "step": 90570 }, { "epoch": 0.7648561356103946, "grad_norm": 0.1510002315044403, "learning_rate": 7.760733543830648e-06, "loss": 0.0115, "step": 90580 }, { "epoch": 0.7649405754575584, "grad_norm": 0.23425166308879852, "learning_rate": 7.760119140831317e-06, "loss": 0.0082, "step": 90590 }, { "epoch": 0.7650250153047223, "grad_norm": 0.5660430788993835, "learning_rate": 7.759504677882765e-06, "loss": 0.0179, "step": 90600 }, { "epoch": 0.7651094551518862, "grad_norm": 0.2383926808834076, "learning_rate": 7.75889015499834e-06, "loss": 0.0114, "step": 90610 }, { "epoch": 0.7651938949990501, "grad_norm": 0.3136890232563019, "learning_rate": 7.758275572191381e-06, "loss": 0.0059, "step": 90620 }, { "epoch": 0.7652783348462139, "grad_norm": 0.22256216406822205, "learning_rate": 7.757660929475244e-06, "loss": 0.0087, "step": 90630 }, { "epoch": 0.7653627746933778, "grad_norm": 0.3575599789619446, "learning_rate": 7.757046226863277e-06, "loss": 0.0133, "step": 90640 }, { "epoch": 0.7654472145405417, "grad_norm": 0.39906764030456543, "learning_rate": 7.75643146436883e-06, "loss": 0.009, "step": 90650 }, { "epoch": 0.7655316543877055, "grad_norm": 0.45095551013946533, "learning_rate": 7.755816642005258e-06, "loss": 0.0091, "step": 90660 }, { "epoch": 0.7656160942348694, "grad_norm": 0.35351282358169556, "learning_rate": 7.75520175978591e-06, "loss": 0.0169, "step": 90670 }, { "epoch": 0.7657005340820333, "grad_norm": 0.4657156765460968, "learning_rate": 7.754586817724147e-06, "loss": 0.0184, "step": 90680 }, { "epoch": 0.7657849739291972, "grad_norm": 0.10823739320039749, "learning_rate": 7.753971815833323e-06, "loss": 0.0115, "step": 90690 }, { "epoch": 0.7658694137763611, "grad_norm": 0.15676623582839966, "learning_rate": 7.753356754126795e-06, "loss": 0.0089, "step": 90700 }, { "epoch": 0.765953853623525, "grad_norm": 0.18925480544567108, "learning_rate": 7.752741632617922e-06, "loss": 0.0121, "step": 90710 }, { "epoch": 0.7660382934706889, "grad_norm": 0.41310548782348633, "learning_rate": 7.752126451320065e-06, "loss": 0.0109, "step": 90720 }, { "epoch": 0.7661227333178527, "grad_norm": 0.31565770506858826, "learning_rate": 7.751511210246587e-06, "loss": 0.0182, "step": 90730 }, { "epoch": 0.7662071731650165, "grad_norm": 0.800092875957489, "learning_rate": 7.750895909410847e-06, "loss": 0.0156, "step": 90740 }, { "epoch": 0.7662916130121804, "grad_norm": 0.15218140184879303, "learning_rate": 7.750280548826212e-06, "loss": 0.0114, "step": 90750 }, { "epoch": 0.7663760528593443, "grad_norm": 0.18917784094810486, "learning_rate": 7.749665128506047e-06, "loss": 0.0142, "step": 90760 }, { "epoch": 0.7664604927065082, "grad_norm": 0.14547403156757355, "learning_rate": 7.74904964846372e-06, "loss": 0.0102, "step": 90770 }, { "epoch": 0.7665449325536721, "grad_norm": 0.3626301884651184, "learning_rate": 7.748434108712598e-06, "loss": 0.0143, "step": 90780 }, { "epoch": 0.766629372400836, "grad_norm": 0.01596161723136902, "learning_rate": 7.74781850926605e-06, "loss": 0.0059, "step": 90790 }, { "epoch": 0.7667138122479998, "grad_norm": 0.22212953865528107, "learning_rate": 7.747202850137448e-06, "loss": 0.0081, "step": 90800 }, { "epoch": 0.7667982520951637, "grad_norm": 0.22956250607967377, "learning_rate": 7.74658713134016e-06, "loss": 0.007, "step": 90810 }, { "epoch": 0.7668826919423276, "grad_norm": 0.37648099660873413, "learning_rate": 7.745971352887565e-06, "loss": 0.0085, "step": 90820 }, { "epoch": 0.7669671317894915, "grad_norm": 0.20187298953533173, "learning_rate": 7.745355514793034e-06, "loss": 0.0125, "step": 90830 }, { "epoch": 0.7670515716366554, "grad_norm": 0.45831796526908875, "learning_rate": 7.744739617069944e-06, "loss": 0.0224, "step": 90840 }, { "epoch": 0.7671360114838193, "grad_norm": 0.25900962948799133, "learning_rate": 7.744123659731673e-06, "loss": 0.0115, "step": 90850 }, { "epoch": 0.767220451330983, "grad_norm": 0.598459005355835, "learning_rate": 7.743507642791596e-06, "loss": 0.0115, "step": 90860 }, { "epoch": 0.7673048911781469, "grad_norm": 0.2682592272758484, "learning_rate": 7.742891566263098e-06, "loss": 0.0155, "step": 90870 }, { "epoch": 0.7673893310253108, "grad_norm": 0.343114972114563, "learning_rate": 7.742275430159555e-06, "loss": 0.0065, "step": 90880 }, { "epoch": 0.7674737708724747, "grad_norm": 0.05014533922076225, "learning_rate": 7.741659234494351e-06, "loss": 0.0057, "step": 90890 }, { "epoch": 0.7675582107196386, "grad_norm": 0.09519422054290771, "learning_rate": 7.741042979280873e-06, "loss": 0.0116, "step": 90900 }, { "epoch": 0.7676426505668025, "grad_norm": 0.23969241976737976, "learning_rate": 7.740426664532501e-06, "loss": 0.0137, "step": 90910 }, { "epoch": 0.7677270904139664, "grad_norm": 0.2007368505001068, "learning_rate": 7.739810290262625e-06, "loss": 0.013, "step": 90920 }, { "epoch": 0.7678115302611302, "grad_norm": 0.4270741641521454, "learning_rate": 7.73919385648463e-06, "loss": 0.01, "step": 90930 }, { "epoch": 0.7678959701082941, "grad_norm": 0.26381850242614746, "learning_rate": 7.738577363211905e-06, "loss": 0.0123, "step": 90940 }, { "epoch": 0.767980409955458, "grad_norm": 1.1021881103515625, "learning_rate": 7.737960810457843e-06, "loss": 0.0174, "step": 90950 }, { "epoch": 0.7680648498026219, "grad_norm": 0.6672292351722717, "learning_rate": 7.737344198235832e-06, "loss": 0.015, "step": 90960 }, { "epoch": 0.7681492896497857, "grad_norm": 0.46059104800224304, "learning_rate": 7.736727526559267e-06, "loss": 0.01, "step": 90970 }, { "epoch": 0.7682337294969496, "grad_norm": 0.31848394870758057, "learning_rate": 7.736110795441539e-06, "loss": 0.0108, "step": 90980 }, { "epoch": 0.7683181693441135, "grad_norm": 0.21734851598739624, "learning_rate": 7.735494004896046e-06, "loss": 0.0165, "step": 90990 }, { "epoch": 0.7684026091912773, "grad_norm": 0.23119813203811646, "learning_rate": 7.734877154936185e-06, "loss": 0.0095, "step": 91000 }, { "epoch": 0.7684870490384412, "grad_norm": 0.1704513281583786, "learning_rate": 7.734260245575352e-06, "loss": 0.0097, "step": 91010 }, { "epoch": 0.7685714888856051, "grad_norm": 0.23055967688560486, "learning_rate": 7.733643276826946e-06, "loss": 0.0111, "step": 91020 }, { "epoch": 0.768655928732769, "grad_norm": 0.33509767055511475, "learning_rate": 7.733026248704367e-06, "loss": 0.0089, "step": 91030 }, { "epoch": 0.7687403685799329, "grad_norm": 0.3628711998462677, "learning_rate": 7.73240916122102e-06, "loss": 0.0056, "step": 91040 }, { "epoch": 0.7688248084270968, "grad_norm": 0.39126771688461304, "learning_rate": 7.731792014390306e-06, "loss": 0.0158, "step": 91050 }, { "epoch": 0.7689092482742607, "grad_norm": 0.6945012211799622, "learning_rate": 7.731174808225628e-06, "loss": 0.0104, "step": 91060 }, { "epoch": 0.7689936881214245, "grad_norm": 0.460935115814209, "learning_rate": 7.730557542740394e-06, "loss": 0.0107, "step": 91070 }, { "epoch": 0.7690781279685884, "grad_norm": 0.27671751379966736, "learning_rate": 7.72994021794801e-06, "loss": 0.0138, "step": 91080 }, { "epoch": 0.7691625678157522, "grad_norm": 0.010623154230415821, "learning_rate": 7.729322833861882e-06, "loss": 0.0159, "step": 91090 }, { "epoch": 0.7692470076629161, "grad_norm": 0.02852027863264084, "learning_rate": 7.728705390495424e-06, "loss": 0.0102, "step": 91100 }, { "epoch": 0.76933144751008, "grad_norm": 0.22921745479106903, "learning_rate": 7.728087887862042e-06, "loss": 0.0113, "step": 91110 }, { "epoch": 0.7694158873572439, "grad_norm": 0.26003381609916687, "learning_rate": 7.727470325975151e-06, "loss": 0.0086, "step": 91120 }, { "epoch": 0.7695003272044078, "grad_norm": 0.3854292631149292, "learning_rate": 7.726852704848163e-06, "loss": 0.0151, "step": 91130 }, { "epoch": 0.7695847670515716, "grad_norm": 0.13535092771053314, "learning_rate": 7.726235024494492e-06, "loss": 0.0102, "step": 91140 }, { "epoch": 0.7696692068987355, "grad_norm": 0.38414034247398376, "learning_rate": 7.725617284927556e-06, "loss": 0.0128, "step": 91150 }, { "epoch": 0.7697536467458994, "grad_norm": 0.5334053635597229, "learning_rate": 7.724999486160771e-06, "loss": 0.0119, "step": 91160 }, { "epoch": 0.7698380865930633, "grad_norm": 0.5224205851554871, "learning_rate": 7.724381628207555e-06, "loss": 0.0085, "step": 91170 }, { "epoch": 0.7699225264402272, "grad_norm": 0.1041782945394516, "learning_rate": 7.72376371108133e-06, "loss": 0.0066, "step": 91180 }, { "epoch": 0.7700069662873911, "grad_norm": 0.4178987741470337, "learning_rate": 7.723145734795514e-06, "loss": 0.0148, "step": 91190 }, { "epoch": 0.7700914061345548, "grad_norm": 0.35757869482040405, "learning_rate": 7.72252769936353e-06, "loss": 0.0177, "step": 91200 }, { "epoch": 0.7701758459817187, "grad_norm": 0.4899786114692688, "learning_rate": 7.721909604798805e-06, "loss": 0.0101, "step": 91210 }, { "epoch": 0.7702602858288826, "grad_norm": 0.8973730206489563, "learning_rate": 7.721291451114759e-06, "loss": 0.0142, "step": 91220 }, { "epoch": 0.7703447256760465, "grad_norm": 0.2160560041666031, "learning_rate": 7.72067323832482e-06, "loss": 0.0096, "step": 91230 }, { "epoch": 0.7704291655232104, "grad_norm": 0.47424089908599854, "learning_rate": 7.720054966442417e-06, "loss": 0.0141, "step": 91240 }, { "epoch": 0.7705136053703743, "grad_norm": 0.2649015188217163, "learning_rate": 7.719436635480978e-06, "loss": 0.0165, "step": 91250 }, { "epoch": 0.7705980452175382, "grad_norm": 0.4369942843914032, "learning_rate": 7.718818245453931e-06, "loss": 0.0112, "step": 91260 }, { "epoch": 0.770682485064702, "grad_norm": 0.34450873732566833, "learning_rate": 7.71819979637471e-06, "loss": 0.0122, "step": 91270 }, { "epoch": 0.7707669249118659, "grad_norm": 0.2915295362472534, "learning_rate": 7.717581288256745e-06, "loss": 0.0076, "step": 91280 }, { "epoch": 0.7708513647590298, "grad_norm": 0.5396221280097961, "learning_rate": 7.716962721113472e-06, "loss": 0.0091, "step": 91290 }, { "epoch": 0.7709358046061937, "grad_norm": 0.4422382116317749, "learning_rate": 7.716344094958326e-06, "loss": 0.0064, "step": 91300 }, { "epoch": 0.7710202444533575, "grad_norm": 0.5494523048400879, "learning_rate": 7.715725409804741e-06, "loss": 0.0145, "step": 91310 }, { "epoch": 0.7711046843005214, "grad_norm": 0.26120296120643616, "learning_rate": 7.715106665666161e-06, "loss": 0.0097, "step": 91320 }, { "epoch": 0.7711891241476853, "grad_norm": 0.13232603669166565, "learning_rate": 7.714487862556017e-06, "loss": 0.0138, "step": 91330 }, { "epoch": 0.7712735639948491, "grad_norm": 0.6677103638648987, "learning_rate": 7.713869000487753e-06, "loss": 0.0182, "step": 91340 }, { "epoch": 0.771358003842013, "grad_norm": 0.1806616485118866, "learning_rate": 7.71325007947481e-06, "loss": 0.0085, "step": 91350 }, { "epoch": 0.7714424436891769, "grad_norm": 0.2018352895975113, "learning_rate": 7.712631099530635e-06, "loss": 0.0109, "step": 91360 }, { "epoch": 0.7715268835363408, "grad_norm": 0.21845555305480957, "learning_rate": 7.712012060668665e-06, "loss": 0.0111, "step": 91370 }, { "epoch": 0.7716113233835047, "grad_norm": 0.08816181868314743, "learning_rate": 7.711392962902349e-06, "loss": 0.0089, "step": 91380 }, { "epoch": 0.7716957632306686, "grad_norm": 0.26717379689216614, "learning_rate": 7.710773806245133e-06, "loss": 0.0092, "step": 91390 }, { "epoch": 0.7717802030778325, "grad_norm": 0.4537869989871979, "learning_rate": 7.710154590710468e-06, "loss": 0.0127, "step": 91400 }, { "epoch": 0.7718646429249963, "grad_norm": 0.26153257489204407, "learning_rate": 7.709535316311798e-06, "loss": 0.0072, "step": 91410 }, { "epoch": 0.7719490827721602, "grad_norm": 0.1861245334148407, "learning_rate": 7.708915983062577e-06, "loss": 0.0228, "step": 91420 }, { "epoch": 0.772033522619324, "grad_norm": 0.5391172766685486, "learning_rate": 7.708296590976257e-06, "loss": 0.009, "step": 91430 }, { "epoch": 0.7721179624664879, "grad_norm": 0.6048523783683777, "learning_rate": 7.70767714006629e-06, "loss": 0.0106, "step": 91440 }, { "epoch": 0.7722024023136518, "grad_norm": 0.33606526255607605, "learning_rate": 7.707057630346129e-06, "loss": 0.0142, "step": 91450 }, { "epoch": 0.7722868421608157, "grad_norm": 0.16585925221443176, "learning_rate": 7.706438061829231e-06, "loss": 0.0274, "step": 91460 }, { "epoch": 0.7723712820079796, "grad_norm": 0.2767567038536072, "learning_rate": 7.705818434529053e-06, "loss": 0.0081, "step": 91470 }, { "epoch": 0.7724557218551434, "grad_norm": 0.3924160897731781, "learning_rate": 7.705198748459054e-06, "loss": 0.0096, "step": 91480 }, { "epoch": 0.7725401617023073, "grad_norm": 0.05111759155988693, "learning_rate": 7.704579003632692e-06, "loss": 0.008, "step": 91490 }, { "epoch": 0.7726246015494712, "grad_norm": 0.3000834882259369, "learning_rate": 7.70395920006343e-06, "loss": 0.0043, "step": 91500 }, { "epoch": 0.7727090413966351, "grad_norm": 0.29215043783187866, "learning_rate": 7.703339337764725e-06, "loss": 0.0087, "step": 91510 }, { "epoch": 0.772793481243799, "grad_norm": 1.9322826862335205, "learning_rate": 7.702719416750044e-06, "loss": 0.011, "step": 91520 }, { "epoch": 0.7728779210909629, "grad_norm": 0.18462665379047394, "learning_rate": 7.702099437032852e-06, "loss": 0.0148, "step": 91530 }, { "epoch": 0.7729623609381266, "grad_norm": 0.45262208580970764, "learning_rate": 7.701479398626615e-06, "loss": 0.0122, "step": 91540 }, { "epoch": 0.7730468007852905, "grad_norm": 1.1635643243789673, "learning_rate": 7.700859301544799e-06, "loss": 0.0136, "step": 91550 }, { "epoch": 0.7731312406324544, "grad_norm": 0.07139170169830322, "learning_rate": 7.700239145800872e-06, "loss": 0.0227, "step": 91560 }, { "epoch": 0.7732156804796183, "grad_norm": 0.27320054173469543, "learning_rate": 7.699618931408304e-06, "loss": 0.0086, "step": 91570 }, { "epoch": 0.7733001203267822, "grad_norm": 0.44249552488327026, "learning_rate": 7.698998658380565e-06, "loss": 0.0122, "step": 91580 }, { "epoch": 0.7733845601739461, "grad_norm": 0.5619689226150513, "learning_rate": 7.69837832673113e-06, "loss": 0.0086, "step": 91590 }, { "epoch": 0.77346900002111, "grad_norm": 0.4312300384044647, "learning_rate": 7.697757936473469e-06, "loss": 0.0092, "step": 91600 }, { "epoch": 0.7735534398682739, "grad_norm": 0.33418044447898865, "learning_rate": 7.697137487621058e-06, "loss": 0.0172, "step": 91610 }, { "epoch": 0.7736378797154377, "grad_norm": 0.10100852698087692, "learning_rate": 7.696516980187376e-06, "loss": 0.0066, "step": 91620 }, { "epoch": 0.7737223195626016, "grad_norm": 0.5252349972724915, "learning_rate": 7.695896414185897e-06, "loss": 0.0125, "step": 91630 }, { "epoch": 0.7738067594097655, "grad_norm": 0.4120596647262573, "learning_rate": 7.6952757896301e-06, "loss": 0.0096, "step": 91640 }, { "epoch": 0.7738911992569294, "grad_norm": 0.6588190197944641, "learning_rate": 7.694655106533463e-06, "loss": 0.0129, "step": 91650 }, { "epoch": 0.7739756391040932, "grad_norm": 0.6628325581550598, "learning_rate": 7.69403436490947e-06, "loss": 0.0146, "step": 91660 }, { "epoch": 0.7740600789512571, "grad_norm": 0.3697108328342438, "learning_rate": 7.693413564771605e-06, "loss": 0.0075, "step": 91670 }, { "epoch": 0.7741445187984209, "grad_norm": 0.6749153733253479, "learning_rate": 7.69279270613335e-06, "loss": 0.0117, "step": 91680 }, { "epoch": 0.7742289586455848, "grad_norm": 0.09355434775352478, "learning_rate": 7.692171789008186e-06, "loss": 0.0152, "step": 91690 }, { "epoch": 0.7743133984927487, "grad_norm": 0.2220313847064972, "learning_rate": 7.691550813409603e-06, "loss": 0.0106, "step": 91700 }, { "epoch": 0.7743978383399126, "grad_norm": 0.4156566262245178, "learning_rate": 7.69092977935109e-06, "loss": 0.016, "step": 91710 }, { "epoch": 0.7744822781870765, "grad_norm": 0.8983721733093262, "learning_rate": 7.69030868684613e-06, "loss": 0.009, "step": 91720 }, { "epoch": 0.7745667180342404, "grad_norm": 0.3294638693332672, "learning_rate": 7.68968753590822e-06, "loss": 0.0129, "step": 91730 }, { "epoch": 0.7746511578814043, "grad_norm": 0.08946523815393448, "learning_rate": 7.689066326550847e-06, "loss": 0.0096, "step": 91740 }, { "epoch": 0.7747355977285681, "grad_norm": 0.7444349527359009, "learning_rate": 7.688445058787504e-06, "loss": 0.011, "step": 91750 }, { "epoch": 0.774820037575732, "grad_norm": 0.2017284780740738, "learning_rate": 7.687823732631685e-06, "loss": 0.0093, "step": 91760 }, { "epoch": 0.7749044774228958, "grad_norm": 0.30601397156715393, "learning_rate": 7.687202348096886e-06, "loss": 0.0103, "step": 91770 }, { "epoch": 0.7749889172700597, "grad_norm": 0.02184474840760231, "learning_rate": 7.686580905196601e-06, "loss": 0.0162, "step": 91780 }, { "epoch": 0.7750733571172236, "grad_norm": 0.3244497776031494, "learning_rate": 7.685959403944332e-06, "loss": 0.0129, "step": 91790 }, { "epoch": 0.7751577969643875, "grad_norm": 0.4902385473251343, "learning_rate": 7.685337844353576e-06, "loss": 0.0118, "step": 91800 }, { "epoch": 0.7752422368115514, "grad_norm": 0.458231657743454, "learning_rate": 7.684716226437828e-06, "loss": 0.0102, "step": 91810 }, { "epoch": 0.7753266766587152, "grad_norm": 0.2707454264163971, "learning_rate": 7.684094550210595e-06, "loss": 0.0087, "step": 91820 }, { "epoch": 0.7754111165058791, "grad_norm": 0.1802917718887329, "learning_rate": 7.68347281568538e-06, "loss": 0.0115, "step": 91830 }, { "epoch": 0.775495556353043, "grad_norm": 0.4130752682685852, "learning_rate": 7.682851022875685e-06, "loss": 0.0144, "step": 91840 }, { "epoch": 0.7755799962002069, "grad_norm": 0.08309891074895859, "learning_rate": 7.682229171795014e-06, "loss": 0.0103, "step": 91850 }, { "epoch": 0.7756644360473708, "grad_norm": 0.04340028390288353, "learning_rate": 7.681607262456876e-06, "loss": 0.0188, "step": 91860 }, { "epoch": 0.7757488758945347, "grad_norm": 0.4734213948249817, "learning_rate": 7.680985294874778e-06, "loss": 0.0064, "step": 91870 }, { "epoch": 0.7758333157416986, "grad_norm": 0.07051458209753036, "learning_rate": 7.68036326906223e-06, "loss": 0.0103, "step": 91880 }, { "epoch": 0.7759177555888623, "grad_norm": 0.19621126353740692, "learning_rate": 7.679741185032739e-06, "loss": 0.0112, "step": 91890 }, { "epoch": 0.7760021954360262, "grad_norm": 0.28436291217803955, "learning_rate": 7.679119042799819e-06, "loss": 0.0157, "step": 91900 }, { "epoch": 0.7760866352831901, "grad_norm": 0.22782233357429504, "learning_rate": 7.678496842376983e-06, "loss": 0.0148, "step": 91910 }, { "epoch": 0.776171075130354, "grad_norm": 0.2907313406467438, "learning_rate": 7.677874583777745e-06, "loss": 0.011, "step": 91920 }, { "epoch": 0.7762555149775179, "grad_norm": 0.587626576423645, "learning_rate": 7.677252267015618e-06, "loss": 0.0169, "step": 91930 }, { "epoch": 0.7763399548246818, "grad_norm": 0.3801296353340149, "learning_rate": 7.676629892104122e-06, "loss": 0.034, "step": 91940 }, { "epoch": 0.7764243946718457, "grad_norm": 0.5232008695602417, "learning_rate": 7.676007459056771e-06, "loss": 0.014, "step": 91950 }, { "epoch": 0.7765088345190095, "grad_norm": 0.40655356645584106, "learning_rate": 7.675384967887089e-06, "loss": 0.018, "step": 91960 }, { "epoch": 0.7765932743661734, "grad_norm": 0.19578850269317627, "learning_rate": 7.674762418608593e-06, "loss": 0.0157, "step": 91970 }, { "epoch": 0.7766777142133373, "grad_norm": 0.6736367344856262, "learning_rate": 7.674139811234804e-06, "loss": 0.0129, "step": 91980 }, { "epoch": 0.7767621540605012, "grad_norm": 0.38765019178390503, "learning_rate": 7.673517145779248e-06, "loss": 0.0181, "step": 91990 }, { "epoch": 0.776846593907665, "grad_norm": 0.1304485946893692, "learning_rate": 7.672894422255447e-06, "loss": 0.0183, "step": 92000 }, { "epoch": 0.7769310337548289, "grad_norm": 0.5298327207565308, "learning_rate": 7.672271640676927e-06, "loss": 0.0132, "step": 92010 }, { "epoch": 0.7770154736019927, "grad_norm": 0.3705943822860718, "learning_rate": 7.671648801057213e-06, "loss": 0.0196, "step": 92020 }, { "epoch": 0.7770999134491566, "grad_norm": 0.3064890503883362, "learning_rate": 7.671025903409837e-06, "loss": 0.0132, "step": 92030 }, { "epoch": 0.7771843532963205, "grad_norm": 0.1612306833267212, "learning_rate": 7.670402947748325e-06, "loss": 0.0081, "step": 92040 }, { "epoch": 0.7772687931434844, "grad_norm": 0.5830971598625183, "learning_rate": 7.669779934086208e-06, "loss": 0.0174, "step": 92050 }, { "epoch": 0.7773532329906483, "grad_norm": 0.20800185203552246, "learning_rate": 7.669156862437018e-06, "loss": 0.006, "step": 92060 }, { "epoch": 0.7774376728378122, "grad_norm": 0.31928834319114685, "learning_rate": 7.668533732814284e-06, "loss": 0.0186, "step": 92070 }, { "epoch": 0.7775221126849761, "grad_norm": 0.33794206380844116, "learning_rate": 7.66791054523155e-06, "loss": 0.0099, "step": 92080 }, { "epoch": 0.77760655253214, "grad_norm": 0.23924978077411652, "learning_rate": 7.667287299702344e-06, "loss": 0.0085, "step": 92090 }, { "epoch": 0.7776909923793038, "grad_norm": 0.35619643330574036, "learning_rate": 7.666663996240203e-06, "loss": 0.0128, "step": 92100 }, { "epoch": 0.7777754322264677, "grad_norm": 0.14347180724143982, "learning_rate": 7.666040634858667e-06, "loss": 0.0094, "step": 92110 }, { "epoch": 0.7778598720736315, "grad_norm": 0.18157550692558289, "learning_rate": 7.665417215571275e-06, "loss": 0.0102, "step": 92120 }, { "epoch": 0.7779443119207954, "grad_norm": 1.0554441213607788, "learning_rate": 7.664793738391567e-06, "loss": 0.0147, "step": 92130 }, { "epoch": 0.7780287517679593, "grad_norm": 0.41960951685905457, "learning_rate": 7.664170203333085e-06, "loss": 0.0069, "step": 92140 }, { "epoch": 0.7781131916151232, "grad_norm": 0.7570197582244873, "learning_rate": 7.663546610409372e-06, "loss": 0.0133, "step": 92150 }, { "epoch": 0.778197631462287, "grad_norm": 0.4793967008590698, "learning_rate": 7.662922959633974e-06, "loss": 0.0097, "step": 92160 }, { "epoch": 0.7782820713094509, "grad_norm": 0.15441332757472992, "learning_rate": 7.662299251020433e-06, "loss": 0.0161, "step": 92170 }, { "epoch": 0.7783665111566148, "grad_norm": 0.20105057954788208, "learning_rate": 7.661675484582299e-06, "loss": 0.0097, "step": 92180 }, { "epoch": 0.7784509510037787, "grad_norm": 0.26621440052986145, "learning_rate": 7.66105166033312e-06, "loss": 0.0153, "step": 92190 }, { "epoch": 0.7785353908509426, "grad_norm": 0.05608467757701874, "learning_rate": 7.660427778286441e-06, "loss": 0.0091, "step": 92200 }, { "epoch": 0.7786198306981065, "grad_norm": 0.4211657643318176, "learning_rate": 7.659803838455818e-06, "loss": 0.0101, "step": 92210 }, { "epoch": 0.7787042705452704, "grad_norm": 0.44481614232063293, "learning_rate": 7.6591798408548e-06, "loss": 0.0137, "step": 92220 }, { "epoch": 0.7787887103924341, "grad_norm": 0.22423310577869415, "learning_rate": 7.658555785496942e-06, "loss": 0.0071, "step": 92230 }, { "epoch": 0.778873150239598, "grad_norm": 0.3999490737915039, "learning_rate": 7.657931672395795e-06, "loss": 0.0137, "step": 92240 }, { "epoch": 0.7789575900867619, "grad_norm": 0.0834035873413086, "learning_rate": 7.65730750156492e-06, "loss": 0.0098, "step": 92250 }, { "epoch": 0.7790420299339258, "grad_norm": 0.4439631402492523, "learning_rate": 7.656683273017866e-06, "loss": 0.0177, "step": 92260 }, { "epoch": 0.7791264697810897, "grad_norm": 0.36717018485069275, "learning_rate": 7.6560589867682e-06, "loss": 0.0164, "step": 92270 }, { "epoch": 0.7792109096282536, "grad_norm": 0.09099413454532623, "learning_rate": 7.655434642829474e-06, "loss": 0.0073, "step": 92280 }, { "epoch": 0.7792953494754175, "grad_norm": 0.3759254813194275, "learning_rate": 7.654810241215253e-06, "loss": 0.0089, "step": 92290 }, { "epoch": 0.7793797893225813, "grad_norm": 0.1468728482723236, "learning_rate": 7.654185781939098e-06, "loss": 0.0152, "step": 92300 }, { "epoch": 0.7794642291697452, "grad_norm": 0.5336178541183472, "learning_rate": 7.653561265014572e-06, "loss": 0.0104, "step": 92310 }, { "epoch": 0.7795486690169091, "grad_norm": 0.2289794385433197, "learning_rate": 7.652936690455239e-06, "loss": 0.0105, "step": 92320 }, { "epoch": 0.779633108864073, "grad_norm": 0.22005896270275116, "learning_rate": 7.652312058274663e-06, "loss": 0.0072, "step": 92330 }, { "epoch": 0.7797175487112369, "grad_norm": 0.18161815404891968, "learning_rate": 7.651687368486413e-06, "loss": 0.0096, "step": 92340 }, { "epoch": 0.7798019885584007, "grad_norm": 0.21744754910469055, "learning_rate": 7.651062621104059e-06, "loss": 0.0082, "step": 92350 }, { "epoch": 0.7798864284055645, "grad_norm": 0.12863494455814362, "learning_rate": 7.650437816141166e-06, "loss": 0.014, "step": 92360 }, { "epoch": 0.7799708682527284, "grad_norm": 0.3103063404560089, "learning_rate": 7.649812953611307e-06, "loss": 0.0121, "step": 92370 }, { "epoch": 0.7800553080998923, "grad_norm": 0.34901806712150574, "learning_rate": 7.649188033528054e-06, "loss": 0.0126, "step": 92380 }, { "epoch": 0.7801397479470562, "grad_norm": 0.18845202028751373, "learning_rate": 7.648563055904981e-06, "loss": 0.0073, "step": 92390 }, { "epoch": 0.7802241877942201, "grad_norm": 0.02973482944071293, "learning_rate": 7.647938020755659e-06, "loss": 0.0138, "step": 92400 }, { "epoch": 0.780308627641384, "grad_norm": 0.12570495903491974, "learning_rate": 7.647312928093668e-06, "loss": 0.0072, "step": 92410 }, { "epoch": 0.7803930674885479, "grad_norm": 0.37117502093315125, "learning_rate": 7.64668777793258e-06, "loss": 0.0141, "step": 92420 }, { "epoch": 0.7804775073357118, "grad_norm": 0.8328421115875244, "learning_rate": 7.646062570285978e-06, "loss": 0.0094, "step": 92430 }, { "epoch": 0.7805619471828756, "grad_norm": 0.09852832555770874, "learning_rate": 7.645437305167439e-06, "loss": 0.0119, "step": 92440 }, { "epoch": 0.7806463870300395, "grad_norm": 0.8303796648979187, "learning_rate": 7.644811982590543e-06, "loss": 0.013, "step": 92450 }, { "epoch": 0.7807308268772033, "grad_norm": 0.36145079135894775, "learning_rate": 7.644186602568873e-06, "loss": 0.0058, "step": 92460 }, { "epoch": 0.7808152667243672, "grad_norm": 0.04628080874681473, "learning_rate": 7.643561165116011e-06, "loss": 0.0114, "step": 92470 }, { "epoch": 0.7808997065715311, "grad_norm": 0.11998693645000458, "learning_rate": 7.642935670245543e-06, "loss": 0.0075, "step": 92480 }, { "epoch": 0.780984146418695, "grad_norm": 0.4226720929145813, "learning_rate": 7.642310117971054e-06, "loss": 0.0115, "step": 92490 }, { "epoch": 0.7810685862658588, "grad_norm": 0.24212166666984558, "learning_rate": 7.641684508306131e-06, "loss": 0.0123, "step": 92500 }, { "epoch": 0.7811530261130227, "grad_norm": 0.13113617897033691, "learning_rate": 7.641058841264362e-06, "loss": 0.0108, "step": 92510 }, { "epoch": 0.7812374659601866, "grad_norm": 0.35156768560409546, "learning_rate": 7.640433116859336e-06, "loss": 0.0095, "step": 92520 }, { "epoch": 0.7813219058073505, "grad_norm": 0.2519185543060303, "learning_rate": 7.639807335104643e-06, "loss": 0.0151, "step": 92530 }, { "epoch": 0.7814063456545144, "grad_norm": 0.3552111089229584, "learning_rate": 7.639181496013877e-06, "loss": 0.0118, "step": 92540 }, { "epoch": 0.7814907855016783, "grad_norm": 0.7151720523834229, "learning_rate": 7.638555599600627e-06, "loss": 0.018, "step": 92550 }, { "epoch": 0.7815752253488422, "grad_norm": 0.2941640317440033, "learning_rate": 7.637929645878492e-06, "loss": 0.0141, "step": 92560 }, { "epoch": 0.781659665196006, "grad_norm": 0.40609216690063477, "learning_rate": 7.637303634861066e-06, "loss": 0.0078, "step": 92570 }, { "epoch": 0.7817441050431698, "grad_norm": 0.5135184526443481, "learning_rate": 7.636677566561946e-06, "loss": 0.0111, "step": 92580 }, { "epoch": 0.7818285448903337, "grad_norm": 0.2250560075044632, "learning_rate": 7.636051440994728e-06, "loss": 0.0083, "step": 92590 }, { "epoch": 0.7819129847374976, "grad_norm": 0.2336665838956833, "learning_rate": 7.635425258173013e-06, "loss": 0.0171, "step": 92600 }, { "epoch": 0.7819974245846615, "grad_norm": 0.3799213171005249, "learning_rate": 7.634799018110402e-06, "loss": 0.012, "step": 92610 }, { "epoch": 0.7820818644318254, "grad_norm": 1.4092297554016113, "learning_rate": 7.634172720820497e-06, "loss": 0.0121, "step": 92620 }, { "epoch": 0.7821663042789893, "grad_norm": 0.03507556393742561, "learning_rate": 7.6335463663169e-06, "loss": 0.0086, "step": 92630 }, { "epoch": 0.7822507441261531, "grad_norm": 0.0722632110118866, "learning_rate": 7.632919954613214e-06, "loss": 0.0103, "step": 92640 }, { "epoch": 0.782335183973317, "grad_norm": 0.47798067331314087, "learning_rate": 7.632293485723047e-06, "loss": 0.0073, "step": 92650 }, { "epoch": 0.7824196238204809, "grad_norm": 0.026301546022295952, "learning_rate": 7.631666959660005e-06, "loss": 0.0064, "step": 92660 }, { "epoch": 0.7825040636676448, "grad_norm": 0.49876928329467773, "learning_rate": 7.631040376437694e-06, "loss": 0.017, "step": 92670 }, { "epoch": 0.7825885035148087, "grad_norm": 0.2755151391029358, "learning_rate": 7.630413736069728e-06, "loss": 0.0102, "step": 92680 }, { "epoch": 0.7826729433619725, "grad_norm": 0.2199830710887909, "learning_rate": 7.629787038569713e-06, "loss": 0.0086, "step": 92690 }, { "epoch": 0.7827573832091363, "grad_norm": 0.1600845754146576, "learning_rate": 7.629160283951264e-06, "loss": 0.0073, "step": 92700 }, { "epoch": 0.7828418230563002, "grad_norm": 0.3414298892021179, "learning_rate": 7.628533472227991e-06, "loss": 0.0102, "step": 92710 }, { "epoch": 0.7829262629034641, "grad_norm": 0.37188881635665894, "learning_rate": 7.627906603413511e-06, "loss": 0.0158, "step": 92720 }, { "epoch": 0.783010702750628, "grad_norm": 0.42989712953567505, "learning_rate": 7.627279677521437e-06, "loss": 0.0094, "step": 92730 }, { "epoch": 0.7830951425977919, "grad_norm": 0.002790963277220726, "learning_rate": 7.6266526945653885e-06, "loss": 0.0075, "step": 92740 }, { "epoch": 0.7831795824449558, "grad_norm": 0.29228445887565613, "learning_rate": 7.6260256545589795e-06, "loss": 0.0071, "step": 92750 }, { "epoch": 0.7832640222921197, "grad_norm": 0.25953611731529236, "learning_rate": 7.625398557515832e-06, "loss": 0.0127, "step": 92760 }, { "epoch": 0.7833484621392836, "grad_norm": 0.24243462085723877, "learning_rate": 7.624771403449567e-06, "loss": 0.0096, "step": 92770 }, { "epoch": 0.7834329019864474, "grad_norm": 0.44395703077316284, "learning_rate": 7.624144192373804e-06, "loss": 0.0083, "step": 92780 }, { "epoch": 0.7835173418336113, "grad_norm": 0.7715117335319519, "learning_rate": 7.623516924302165e-06, "loss": 0.0083, "step": 92790 }, { "epoch": 0.7836017816807752, "grad_norm": 0.5818072557449341, "learning_rate": 7.622889599248278e-06, "loss": 0.0093, "step": 92800 }, { "epoch": 0.783686221527939, "grad_norm": 0.14918120205402374, "learning_rate": 7.622262217225767e-06, "loss": 0.0118, "step": 92810 }, { "epoch": 0.7837706613751029, "grad_norm": 0.37954798340797424, "learning_rate": 7.621634778248257e-06, "loss": 0.0192, "step": 92820 }, { "epoch": 0.7838551012222668, "grad_norm": 0.3165844678878784, "learning_rate": 7.621007282329377e-06, "loss": 0.0036, "step": 92830 }, { "epoch": 0.7839395410694306, "grad_norm": 0.23056848347187042, "learning_rate": 7.620379729482755e-06, "loss": 0.0132, "step": 92840 }, { "epoch": 0.7840239809165945, "grad_norm": 0.3318866789340973, "learning_rate": 7.6197521197220235e-06, "loss": 0.0063, "step": 92850 }, { "epoch": 0.7841084207637584, "grad_norm": 0.35486042499542236, "learning_rate": 7.6191244530608145e-06, "loss": 0.0136, "step": 92860 }, { "epoch": 0.7841928606109223, "grad_norm": 0.24538035690784454, "learning_rate": 7.618496729512757e-06, "loss": 0.0109, "step": 92870 }, { "epoch": 0.7842773004580862, "grad_norm": 0.18133951723575592, "learning_rate": 7.617868949091487e-06, "loss": 0.0104, "step": 92880 }, { "epoch": 0.7843617403052501, "grad_norm": 0.14267057180404663, "learning_rate": 7.6172411118106404e-06, "loss": 0.0077, "step": 92890 }, { "epoch": 0.784446180152414, "grad_norm": 0.1712391972541809, "learning_rate": 7.616613217683853e-06, "loss": 0.0096, "step": 92900 }, { "epoch": 0.7845306199995778, "grad_norm": 0.5580012798309326, "learning_rate": 7.615985266724761e-06, "loss": 0.0127, "step": 92910 }, { "epoch": 0.7846150598467416, "grad_norm": 0.3116091787815094, "learning_rate": 7.615357258947007e-06, "loss": 0.0119, "step": 92920 }, { "epoch": 0.7846994996939055, "grad_norm": 0.4133319854736328, "learning_rate": 7.61472919436423e-06, "loss": 0.0216, "step": 92930 }, { "epoch": 0.7847839395410694, "grad_norm": 0.4823334515094757, "learning_rate": 7.61410107299007e-06, "loss": 0.0109, "step": 92940 }, { "epoch": 0.7848683793882333, "grad_norm": 0.4149227440357208, "learning_rate": 7.61347289483817e-06, "loss": 0.0121, "step": 92950 }, { "epoch": 0.7849528192353972, "grad_norm": 0.37153780460357666, "learning_rate": 7.612844659922174e-06, "loss": 0.017, "step": 92960 }, { "epoch": 0.7850372590825611, "grad_norm": 0.5179486274719238, "learning_rate": 7.612216368255727e-06, "loss": 0.0125, "step": 92970 }, { "epoch": 0.7851216989297249, "grad_norm": 0.7750375270843506, "learning_rate": 7.6115880198524785e-06, "loss": 0.0183, "step": 92980 }, { "epoch": 0.7852061387768888, "grad_norm": 0.4370419979095459, "learning_rate": 7.61095961472607e-06, "loss": 0.0139, "step": 92990 }, { "epoch": 0.7852905786240527, "grad_norm": 0.31423819065093994, "learning_rate": 7.6103311528901555e-06, "loss": 0.0104, "step": 93000 }, { "epoch": 0.7853750184712166, "grad_norm": 0.31756505370140076, "learning_rate": 7.609702634358382e-06, "loss": 0.0089, "step": 93010 }, { "epoch": 0.7854594583183805, "grad_norm": 0.13729295134544373, "learning_rate": 7.609074059144404e-06, "loss": 0.0126, "step": 93020 }, { "epoch": 0.7855438981655444, "grad_norm": 0.5920224785804749, "learning_rate": 7.60844542726187e-06, "loss": 0.0125, "step": 93030 }, { "epoch": 0.7856283380127081, "grad_norm": 0.6550472378730774, "learning_rate": 7.6078167387244365e-06, "loss": 0.0094, "step": 93040 }, { "epoch": 0.785712777859872, "grad_norm": 0.13790519535541534, "learning_rate": 7.607187993545759e-06, "loss": 0.0105, "step": 93050 }, { "epoch": 0.7857972177070359, "grad_norm": 0.5655728578567505, "learning_rate": 7.606559191739491e-06, "loss": 0.0154, "step": 93060 }, { "epoch": 0.7858816575541998, "grad_norm": 0.2500191628932953, "learning_rate": 7.605930333319293e-06, "loss": 0.0098, "step": 93070 }, { "epoch": 0.7859660974013637, "grad_norm": 0.03315924480557442, "learning_rate": 7.60530141829882e-06, "loss": 0.0134, "step": 93080 }, { "epoch": 0.7860505372485276, "grad_norm": 0.23060481250286102, "learning_rate": 7.604672446691737e-06, "loss": 0.008, "step": 93090 }, { "epoch": 0.7861349770956915, "grad_norm": 0.2497147023677826, "learning_rate": 7.604043418511702e-06, "loss": 0.0092, "step": 93100 }, { "epoch": 0.7862194169428554, "grad_norm": 0.2190706580877304, "learning_rate": 7.603414333772374e-06, "loss": 0.0094, "step": 93110 }, { "epoch": 0.7863038567900192, "grad_norm": 0.3109746277332306, "learning_rate": 7.602785192487423e-06, "loss": 0.0152, "step": 93120 }, { "epoch": 0.7863882966371831, "grad_norm": 0.3975871205329895, "learning_rate": 7.6021559946705105e-06, "loss": 0.0136, "step": 93130 }, { "epoch": 0.786472736484347, "grad_norm": 0.1577702760696411, "learning_rate": 7.601526740335303e-06, "loss": 0.0069, "step": 93140 }, { "epoch": 0.7865571763315108, "grad_norm": 0.08247702568769455, "learning_rate": 7.600897429495467e-06, "loss": 0.0067, "step": 93150 }, { "epoch": 0.7866416161786747, "grad_norm": 0.6185604333877563, "learning_rate": 7.600268062164673e-06, "loss": 0.0112, "step": 93160 }, { "epoch": 0.7867260560258386, "grad_norm": 0.5028877854347229, "learning_rate": 7.599638638356588e-06, "loss": 0.0122, "step": 93170 }, { "epoch": 0.7868104958730024, "grad_norm": 0.13952219486236572, "learning_rate": 7.599009158084887e-06, "loss": 0.0079, "step": 93180 }, { "epoch": 0.7868949357201663, "grad_norm": 0.5087472796440125, "learning_rate": 7.598379621363238e-06, "loss": 0.0234, "step": 93190 }, { "epoch": 0.7869793755673302, "grad_norm": 0.4549327790737152, "learning_rate": 7.597750028205315e-06, "loss": 0.0091, "step": 93200 }, { "epoch": 0.7870638154144941, "grad_norm": 0.4036915898323059, "learning_rate": 7.597120378624796e-06, "loss": 0.0124, "step": 93210 }, { "epoch": 0.787148255261658, "grad_norm": 0.3146668076515198, "learning_rate": 7.596490672635355e-06, "loss": 0.0088, "step": 93220 }, { "epoch": 0.7872326951088219, "grad_norm": 0.18465450406074524, "learning_rate": 7.595860910250666e-06, "loss": 0.0066, "step": 93230 }, { "epoch": 0.7873171349559858, "grad_norm": 0.32030099630355835, "learning_rate": 7.595231091484412e-06, "loss": 0.008, "step": 93240 }, { "epoch": 0.7874015748031497, "grad_norm": 0.46571090817451477, "learning_rate": 7.594601216350268e-06, "loss": 0.0183, "step": 93250 }, { "epoch": 0.7874860146503135, "grad_norm": 0.16561070084571838, "learning_rate": 7.5939712848619205e-06, "loss": 0.0102, "step": 93260 }, { "epoch": 0.7875704544974773, "grad_norm": 0.528951108455658, "learning_rate": 7.593341297033047e-06, "loss": 0.0093, "step": 93270 }, { "epoch": 0.7876548943446412, "grad_norm": 0.6138884425163269, "learning_rate": 7.59271125287733e-06, "loss": 0.0096, "step": 93280 }, { "epoch": 0.7877393341918051, "grad_norm": 0.32585588097572327, "learning_rate": 7.592081152408458e-06, "loss": 0.0077, "step": 93290 }, { "epoch": 0.787823774038969, "grad_norm": 0.2065032720565796, "learning_rate": 7.591450995640115e-06, "loss": 0.0074, "step": 93300 }, { "epoch": 0.7879082138861329, "grad_norm": 0.12538880109786987, "learning_rate": 7.5908207825859855e-06, "loss": 0.0105, "step": 93310 }, { "epoch": 0.7879926537332967, "grad_norm": 0.1617845594882965, "learning_rate": 7.590190513259761e-06, "loss": 0.0082, "step": 93320 }, { "epoch": 0.7880770935804606, "grad_norm": 0.24632735550403595, "learning_rate": 7.589560187675129e-06, "loss": 0.0134, "step": 93330 }, { "epoch": 0.7881615334276245, "grad_norm": 0.4193480908870697, "learning_rate": 7.588929805845782e-06, "loss": 0.0072, "step": 93340 }, { "epoch": 0.7882459732747884, "grad_norm": 0.35894641280174255, "learning_rate": 7.588299367785408e-06, "loss": 0.0091, "step": 93350 }, { "epoch": 0.7883304131219523, "grad_norm": 0.47187328338623047, "learning_rate": 7.587668873507701e-06, "loss": 0.0092, "step": 93360 }, { "epoch": 0.7884148529691162, "grad_norm": 0.47996577620506287, "learning_rate": 7.5870383230263586e-06, "loss": 0.0126, "step": 93370 }, { "epoch": 0.78849929281628, "grad_norm": 0.18253521621227264, "learning_rate": 7.586407716355073e-06, "loss": 0.0096, "step": 93380 }, { "epoch": 0.7885837326634438, "grad_norm": 0.604133665561676, "learning_rate": 7.585777053507541e-06, "loss": 0.0081, "step": 93390 }, { "epoch": 0.7886681725106077, "grad_norm": 0.6225335597991943, "learning_rate": 7.585146334497463e-06, "loss": 0.0081, "step": 93400 }, { "epoch": 0.7887526123577716, "grad_norm": 0.13635045289993286, "learning_rate": 7.5845155593385355e-06, "loss": 0.0074, "step": 93410 }, { "epoch": 0.7888370522049355, "grad_norm": 0.44634902477264404, "learning_rate": 7.583884728044461e-06, "loss": 0.0126, "step": 93420 }, { "epoch": 0.7889214920520994, "grad_norm": 0.1259358674287796, "learning_rate": 7.5832538406289365e-06, "loss": 0.0067, "step": 93430 }, { "epoch": 0.7890059318992633, "grad_norm": 0.3736894726753235, "learning_rate": 7.582622897105671e-06, "loss": 0.0175, "step": 93440 }, { "epoch": 0.7890903717464272, "grad_norm": 0.09553200751543045, "learning_rate": 7.581991897488365e-06, "loss": 0.0056, "step": 93450 }, { "epoch": 0.789174811593591, "grad_norm": 0.13772791624069214, "learning_rate": 7.5813608417907236e-06, "loss": 0.0192, "step": 93460 }, { "epoch": 0.7892592514407549, "grad_norm": 0.5897870063781738, "learning_rate": 7.580729730026453e-06, "loss": 0.014, "step": 93470 }, { "epoch": 0.7893436912879188, "grad_norm": 0.1386595368385315, "learning_rate": 7.580098562209261e-06, "loss": 0.0117, "step": 93480 }, { "epoch": 0.7894281311350827, "grad_norm": 0.33240804076194763, "learning_rate": 7.5794673383528594e-06, "loss": 0.0121, "step": 93490 }, { "epoch": 0.7895125709822465, "grad_norm": 0.5695833563804626, "learning_rate": 7.578836058470954e-06, "loss": 0.0251, "step": 93500 }, { "epoch": 0.7895970108294104, "grad_norm": 0.19856132566928864, "learning_rate": 7.578204722577258e-06, "loss": 0.0057, "step": 93510 }, { "epoch": 0.7896814506765742, "grad_norm": 0.1890757977962494, "learning_rate": 7.577573330685484e-06, "loss": 0.0145, "step": 93520 }, { "epoch": 0.7897658905237381, "grad_norm": 0.5046980381011963, "learning_rate": 7.576941882809345e-06, "loss": 0.0141, "step": 93530 }, { "epoch": 0.789850330370902, "grad_norm": 0.25453880429267883, "learning_rate": 7.576310378962557e-06, "loss": 0.0089, "step": 93540 }, { "epoch": 0.7899347702180659, "grad_norm": 0.14169050753116608, "learning_rate": 7.575678819158836e-06, "loss": 0.0079, "step": 93550 }, { "epoch": 0.7900192100652298, "grad_norm": 0.10576298832893372, "learning_rate": 7.575047203411898e-06, "loss": 0.017, "step": 93560 }, { "epoch": 0.7901036499123937, "grad_norm": 0.28465962409973145, "learning_rate": 7.574415531735463e-06, "loss": 0.0184, "step": 93570 }, { "epoch": 0.7901880897595576, "grad_norm": 0.1073550432920456, "learning_rate": 7.573783804143249e-06, "loss": 0.0081, "step": 93580 }, { "epoch": 0.7902725296067215, "grad_norm": 0.2739550471305847, "learning_rate": 7.5731520206489795e-06, "loss": 0.0192, "step": 93590 }, { "epoch": 0.7903569694538853, "grad_norm": 0.8307336568832397, "learning_rate": 7.572520181266375e-06, "loss": 0.0093, "step": 93600 }, { "epoch": 0.7904414093010491, "grad_norm": 0.21213534474372864, "learning_rate": 7.571888286009159e-06, "loss": 0.0089, "step": 93610 }, { "epoch": 0.790525849148213, "grad_norm": 0.16065707802772522, "learning_rate": 7.571256334891056e-06, "loss": 0.0102, "step": 93620 }, { "epoch": 0.7906102889953769, "grad_norm": 0.2935096323490143, "learning_rate": 7.570624327925793e-06, "loss": 0.0093, "step": 93630 }, { "epoch": 0.7906947288425408, "grad_norm": 0.22578324377536774, "learning_rate": 7.569992265127097e-06, "loss": 0.0094, "step": 93640 }, { "epoch": 0.7907791686897047, "grad_norm": 0.18845371901988983, "learning_rate": 7.569360146508695e-06, "loss": 0.0025, "step": 93650 }, { "epoch": 0.7908636085368685, "grad_norm": 1.0866483449935913, "learning_rate": 7.568727972084317e-06, "loss": 0.0088, "step": 93660 }, { "epoch": 0.7909480483840324, "grad_norm": 0.4045524597167969, "learning_rate": 7.568095741867694e-06, "loss": 0.0194, "step": 93670 }, { "epoch": 0.7910324882311963, "grad_norm": 0.18659792840480804, "learning_rate": 7.567463455872557e-06, "loss": 0.0076, "step": 93680 }, { "epoch": 0.7911169280783602, "grad_norm": 0.007644291967153549, "learning_rate": 7.566831114112642e-06, "loss": 0.0162, "step": 93690 }, { "epoch": 0.7912013679255241, "grad_norm": 0.21810296177864075, "learning_rate": 7.5661987166016785e-06, "loss": 0.0161, "step": 93700 }, { "epoch": 0.791285807772688, "grad_norm": 0.08779656887054443, "learning_rate": 7.565566263353407e-06, "loss": 0.0116, "step": 93710 }, { "epoch": 0.7913702476198519, "grad_norm": 0.497681200504303, "learning_rate": 7.56493375438156e-06, "loss": 0.0078, "step": 93720 }, { "epoch": 0.7914546874670156, "grad_norm": 0.07761617004871368, "learning_rate": 7.5643011896998785e-06, "loss": 0.0094, "step": 93730 }, { "epoch": 0.7915391273141795, "grad_norm": 0.5674681067466736, "learning_rate": 7.5636685693221e-06, "loss": 0.0098, "step": 93740 }, { "epoch": 0.7916235671613434, "grad_norm": 0.12654902040958405, "learning_rate": 7.563035893261968e-06, "loss": 0.0142, "step": 93750 }, { "epoch": 0.7917080070085073, "grad_norm": 0.19640430808067322, "learning_rate": 7.562403161533218e-06, "loss": 0.0108, "step": 93760 }, { "epoch": 0.7917924468556712, "grad_norm": 0.08371733874082565, "learning_rate": 7.561770374149598e-06, "loss": 0.0068, "step": 93770 }, { "epoch": 0.7918768867028351, "grad_norm": 0.2399720698595047, "learning_rate": 7.56113753112485e-06, "loss": 0.0128, "step": 93780 }, { "epoch": 0.791961326549999, "grad_norm": 0.21049301326274872, "learning_rate": 7.560504632472721e-06, "loss": 0.0062, "step": 93790 }, { "epoch": 0.7920457663971628, "grad_norm": 0.30416521430015564, "learning_rate": 7.559871678206956e-06, "loss": 0.0075, "step": 93800 }, { "epoch": 0.7921302062443267, "grad_norm": 0.3637678027153015, "learning_rate": 7.559238668341302e-06, "loss": 0.0077, "step": 93810 }, { "epoch": 0.7922146460914906, "grad_norm": 0.541766881942749, "learning_rate": 7.558605602889508e-06, "loss": 0.0165, "step": 93820 }, { "epoch": 0.7922990859386545, "grad_norm": 0.27971988916397095, "learning_rate": 7.557972481865325e-06, "loss": 0.0111, "step": 93830 }, { "epoch": 0.7923835257858183, "grad_norm": 0.454416960477829, "learning_rate": 7.557339305282505e-06, "loss": 0.0097, "step": 93840 }, { "epoch": 0.7924679656329822, "grad_norm": 0.7421544790267944, "learning_rate": 7.556706073154798e-06, "loss": 0.011, "step": 93850 }, { "epoch": 0.792552405480146, "grad_norm": 0.37656885385513306, "learning_rate": 7.556072785495958e-06, "loss": 0.0126, "step": 93860 }, { "epoch": 0.7926368453273099, "grad_norm": 0.0052185095846652985, "learning_rate": 7.555439442319743e-06, "loss": 0.0071, "step": 93870 }, { "epoch": 0.7927212851744738, "grad_norm": 0.3611525297164917, "learning_rate": 7.554806043639906e-06, "loss": 0.0148, "step": 93880 }, { "epoch": 0.7928057250216377, "grad_norm": 0.5791281461715698, "learning_rate": 7.554172589470206e-06, "loss": 0.0124, "step": 93890 }, { "epoch": 0.7928901648688016, "grad_norm": 0.2422795295715332, "learning_rate": 7.5535390798244e-06, "loss": 0.0078, "step": 93900 }, { "epoch": 0.7929746047159655, "grad_norm": 0.4179578423500061, "learning_rate": 7.552905514716249e-06, "loss": 0.0117, "step": 93910 }, { "epoch": 0.7930590445631294, "grad_norm": 0.04004930332303047, "learning_rate": 7.5522718941595145e-06, "loss": 0.0117, "step": 93920 }, { "epoch": 0.7931434844102933, "grad_norm": 0.19127926230430603, "learning_rate": 7.551638218167956e-06, "loss": 0.0106, "step": 93930 }, { "epoch": 0.7932279242574571, "grad_norm": 0.39861640334129333, "learning_rate": 7.551004486755338e-06, "loss": 0.0124, "step": 93940 }, { "epoch": 0.7933123641046209, "grad_norm": 0.2208762764930725, "learning_rate": 7.550370699935425e-06, "loss": 0.0089, "step": 93950 }, { "epoch": 0.7933968039517848, "grad_norm": 0.7882731556892395, "learning_rate": 7.549736857721983e-06, "loss": 0.0138, "step": 93960 }, { "epoch": 0.7934812437989487, "grad_norm": 0.15059928596019745, "learning_rate": 7.54910296012878e-06, "loss": 0.0102, "step": 93970 }, { "epoch": 0.7935656836461126, "grad_norm": 0.12203349173069, "learning_rate": 7.548469007169583e-06, "loss": 0.0067, "step": 93980 }, { "epoch": 0.7936501234932765, "grad_norm": 0.5932982563972473, "learning_rate": 7.547834998858161e-06, "loss": 0.0119, "step": 93990 }, { "epoch": 0.7937345633404403, "grad_norm": 0.460191011428833, "learning_rate": 7.5472009352082854e-06, "loss": 0.011, "step": 94000 }, { "epoch": 0.7938190031876042, "grad_norm": 0.3894971013069153, "learning_rate": 7.546566816233727e-06, "loss": 0.0087, "step": 94010 }, { "epoch": 0.7939034430347681, "grad_norm": 0.7164345383644104, "learning_rate": 7.545932641948258e-06, "loss": 0.0057, "step": 94020 }, { "epoch": 0.793987882881932, "grad_norm": 0.5456240177154541, "learning_rate": 7.545298412365656e-06, "loss": 0.0106, "step": 94030 }, { "epoch": 0.7940723227290959, "grad_norm": 0.37087127566337585, "learning_rate": 7.5446641274996935e-06, "loss": 0.0117, "step": 94040 }, { "epoch": 0.7941567625762598, "grad_norm": 0.3447071313858032, "learning_rate": 7.544029787364147e-06, "loss": 0.0103, "step": 94050 }, { "epoch": 0.7942412024234237, "grad_norm": 0.07706523686647415, "learning_rate": 7.543395391972794e-06, "loss": 0.0118, "step": 94060 }, { "epoch": 0.7943256422705874, "grad_norm": 0.13125337660312653, "learning_rate": 7.542760941339415e-06, "loss": 0.0162, "step": 94070 }, { "epoch": 0.7944100821177513, "grad_norm": 0.32681095600128174, "learning_rate": 7.54212643547779e-06, "loss": 0.0154, "step": 94080 }, { "epoch": 0.7944945219649152, "grad_norm": 0.1952637881040573, "learning_rate": 7.5414918744017e-06, "loss": 0.0102, "step": 94090 }, { "epoch": 0.7945789618120791, "grad_norm": 0.06178969144821167, "learning_rate": 7.540857258124926e-06, "loss": 0.0102, "step": 94100 }, { "epoch": 0.794663401659243, "grad_norm": 0.18494820594787598, "learning_rate": 7.540222586661255e-06, "loss": 0.0152, "step": 94110 }, { "epoch": 0.7947478415064069, "grad_norm": 0.46390286087989807, "learning_rate": 7.539587860024468e-06, "loss": 0.0172, "step": 94120 }, { "epoch": 0.7948322813535708, "grad_norm": 0.13143780827522278, "learning_rate": 7.538953078228353e-06, "loss": 0.0081, "step": 94130 }, { "epoch": 0.7949167212007346, "grad_norm": 0.5162650346755981, "learning_rate": 7.538318241286698e-06, "loss": 0.0147, "step": 94140 }, { "epoch": 0.7950011610478985, "grad_norm": 0.5761793255805969, "learning_rate": 7.537683349213292e-06, "loss": 0.0127, "step": 94150 }, { "epoch": 0.7950856008950624, "grad_norm": 0.20878168940544128, "learning_rate": 7.537048402021924e-06, "loss": 0.0093, "step": 94160 }, { "epoch": 0.7951700407422263, "grad_norm": 0.062027327716350555, "learning_rate": 7.536413399726384e-06, "loss": 0.0147, "step": 94170 }, { "epoch": 0.7952544805893901, "grad_norm": 0.9682698249816895, "learning_rate": 7.535778342340463e-06, "loss": 0.0226, "step": 94180 }, { "epoch": 0.795338920436554, "grad_norm": 0.1838347464799881, "learning_rate": 7.535143229877957e-06, "loss": 0.0159, "step": 94190 }, { "epoch": 0.7954233602837179, "grad_norm": 0.16113419830799103, "learning_rate": 7.534508062352659e-06, "loss": 0.0064, "step": 94200 }, { "epoch": 0.7955078001308817, "grad_norm": 0.3056977689266205, "learning_rate": 7.533872839778367e-06, "loss": 0.0108, "step": 94210 }, { "epoch": 0.7955922399780456, "grad_norm": 0.11084245145320892, "learning_rate": 7.533237562168874e-06, "loss": 0.0115, "step": 94220 }, { "epoch": 0.7956766798252095, "grad_norm": 0.556053876876831, "learning_rate": 7.532602229537982e-06, "loss": 0.0123, "step": 94230 }, { "epoch": 0.7957611196723734, "grad_norm": 0.4178261458873749, "learning_rate": 7.5319668418994875e-06, "loss": 0.0086, "step": 94240 }, { "epoch": 0.7958455595195373, "grad_norm": 0.17079825699329376, "learning_rate": 7.531331399267192e-06, "loss": 0.0104, "step": 94250 }, { "epoch": 0.7959299993667012, "grad_norm": 0.08201871812343597, "learning_rate": 7.530695901654899e-06, "loss": 0.0093, "step": 94260 }, { "epoch": 0.796014439213865, "grad_norm": 0.21525989472866058, "learning_rate": 7.530060349076408e-06, "loss": 0.0102, "step": 94270 }, { "epoch": 0.7960988790610289, "grad_norm": 0.4617508351802826, "learning_rate": 7.529424741545526e-06, "loss": 0.0114, "step": 94280 }, { "epoch": 0.7961833189081928, "grad_norm": 0.19670818746089935, "learning_rate": 7.528789079076056e-06, "loss": 0.0159, "step": 94290 }, { "epoch": 0.7962677587553566, "grad_norm": 0.556908369064331, "learning_rate": 7.528153361681805e-06, "loss": 0.0147, "step": 94300 }, { "epoch": 0.7963521986025205, "grad_norm": 0.006940606515854597, "learning_rate": 7.52751758937658e-06, "loss": 0.0076, "step": 94310 }, { "epoch": 0.7964366384496844, "grad_norm": 0.00326887471601367, "learning_rate": 7.526881762174193e-06, "loss": 0.0088, "step": 94320 }, { "epoch": 0.7965210782968483, "grad_norm": 0.3910163938999176, "learning_rate": 7.526245880088451e-06, "loss": 0.0105, "step": 94330 }, { "epoch": 0.7966055181440121, "grad_norm": 0.12289077043533325, "learning_rate": 7.525609943133165e-06, "loss": 0.0061, "step": 94340 }, { "epoch": 0.796689957991176, "grad_norm": 0.4912528693675995, "learning_rate": 7.524973951322151e-06, "loss": 0.0155, "step": 94350 }, { "epoch": 0.7967743978383399, "grad_norm": 0.4692286550998688, "learning_rate": 7.524337904669219e-06, "loss": 0.0079, "step": 94360 }, { "epoch": 0.7968588376855038, "grad_norm": 0.23781397938728333, "learning_rate": 7.523701803188186e-06, "loss": 0.0095, "step": 94370 }, { "epoch": 0.7969432775326677, "grad_norm": 0.3708050847053528, "learning_rate": 7.523065646892865e-06, "loss": 0.0138, "step": 94380 }, { "epoch": 0.7970277173798316, "grad_norm": 0.28409186005592346, "learning_rate": 7.522429435797076e-06, "loss": 0.0071, "step": 94390 }, { "epoch": 0.7971121572269955, "grad_norm": 0.8939093947410583, "learning_rate": 7.521793169914639e-06, "loss": 0.0164, "step": 94400 }, { "epoch": 0.7971965970741592, "grad_norm": 0.42054200172424316, "learning_rate": 7.521156849259368e-06, "loss": 0.0084, "step": 94410 }, { "epoch": 0.7972810369213231, "grad_norm": 0.21510668098926544, "learning_rate": 7.520520473845088e-06, "loss": 0.008, "step": 94420 }, { "epoch": 0.797365476768487, "grad_norm": 0.8170930743217468, "learning_rate": 7.519884043685619e-06, "loss": 0.0095, "step": 94430 }, { "epoch": 0.7974499166156509, "grad_norm": 0.510037362575531, "learning_rate": 7.519247558794787e-06, "loss": 0.0063, "step": 94440 }, { "epoch": 0.7975343564628148, "grad_norm": 0.06768634170293808, "learning_rate": 7.518611019186412e-06, "loss": 0.0065, "step": 94450 }, { "epoch": 0.7976187963099787, "grad_norm": 0.2934291362762451, "learning_rate": 7.517974424874323e-06, "loss": 0.0078, "step": 94460 }, { "epoch": 0.7977032361571426, "grad_norm": 0.6526767611503601, "learning_rate": 7.517337775872344e-06, "loss": 0.0114, "step": 94470 }, { "epoch": 0.7977876760043064, "grad_norm": 0.2788064777851105, "learning_rate": 7.516701072194306e-06, "loss": 0.0104, "step": 94480 }, { "epoch": 0.7978721158514703, "grad_norm": 0.21715465188026428, "learning_rate": 7.516064313854037e-06, "loss": 0.0053, "step": 94490 }, { "epoch": 0.7979565556986342, "grad_norm": 0.26677653193473816, "learning_rate": 7.515427500865365e-06, "loss": 0.0084, "step": 94500 }, { "epoch": 0.7980409955457981, "grad_norm": 0.28707385063171387, "learning_rate": 7.514790633242124e-06, "loss": 0.015, "step": 94510 }, { "epoch": 0.798125435392962, "grad_norm": 0.6240189671516418, "learning_rate": 7.5141537109981445e-06, "loss": 0.0119, "step": 94520 }, { "epoch": 0.7982098752401258, "grad_norm": 0.32355567812919617, "learning_rate": 7.5135167341472635e-06, "loss": 0.0121, "step": 94530 }, { "epoch": 0.7982943150872897, "grad_norm": 0.024698840454220772, "learning_rate": 7.512879702703313e-06, "loss": 0.0102, "step": 94540 }, { "epoch": 0.7983787549344535, "grad_norm": 0.34574541449546814, "learning_rate": 7.51224261668013e-06, "loss": 0.0211, "step": 94550 }, { "epoch": 0.7984631947816174, "grad_norm": 0.3594900667667389, "learning_rate": 7.5116054760915534e-06, "loss": 0.0134, "step": 94560 }, { "epoch": 0.7985476346287813, "grad_norm": 0.315005362033844, "learning_rate": 7.5109682809514195e-06, "loss": 0.0091, "step": 94570 }, { "epoch": 0.7986320744759452, "grad_norm": 0.5209236741065979, "learning_rate": 7.51033103127357e-06, "loss": 0.0199, "step": 94580 }, { "epoch": 0.7987165143231091, "grad_norm": 0.7144836783409119, "learning_rate": 7.509693727071843e-06, "loss": 0.0097, "step": 94590 }, { "epoch": 0.798800954170273, "grad_norm": 0.15445846319198608, "learning_rate": 7.509056368360085e-06, "loss": 0.0054, "step": 94600 }, { "epoch": 0.7988853940174369, "grad_norm": 0.604568362236023, "learning_rate": 7.508418955152135e-06, "loss": 0.0166, "step": 94610 }, { "epoch": 0.7989698338646007, "grad_norm": 0.16489277780056, "learning_rate": 7.50778148746184e-06, "loss": 0.013, "step": 94620 }, { "epoch": 0.7990542737117646, "grad_norm": 0.3852769136428833, "learning_rate": 7.507143965303045e-06, "loss": 0.0071, "step": 94630 }, { "epoch": 0.7991387135589284, "grad_norm": 0.49151986837387085, "learning_rate": 7.506506388689597e-06, "loss": 0.0169, "step": 94640 }, { "epoch": 0.7992231534060923, "grad_norm": 0.2592375874519348, "learning_rate": 7.505868757635344e-06, "loss": 0.0086, "step": 94650 }, { "epoch": 0.7993075932532562, "grad_norm": 0.27933576703071594, "learning_rate": 7.505231072154135e-06, "loss": 0.0158, "step": 94660 }, { "epoch": 0.7993920331004201, "grad_norm": 0.2189301997423172, "learning_rate": 7.50459333225982e-06, "loss": 0.0142, "step": 94670 }, { "epoch": 0.799476472947584, "grad_norm": 0.24131405353546143, "learning_rate": 7.503955537966251e-06, "loss": 0.0124, "step": 94680 }, { "epoch": 0.7995609127947478, "grad_norm": 0.6023988127708435, "learning_rate": 7.503317689287281e-06, "loss": 0.0168, "step": 94690 }, { "epoch": 0.7996453526419117, "grad_norm": 0.5323228240013123, "learning_rate": 7.502679786236764e-06, "loss": 0.0138, "step": 94700 }, { "epoch": 0.7997297924890756, "grad_norm": 0.5573225021362305, "learning_rate": 7.502041828828555e-06, "loss": 0.0091, "step": 94710 }, { "epoch": 0.7998142323362395, "grad_norm": 0.1956973671913147, "learning_rate": 7.501403817076511e-06, "loss": 0.009, "step": 94720 }, { "epoch": 0.7998986721834034, "grad_norm": 0.6698556542396545, "learning_rate": 7.500765750994488e-06, "loss": 0.0083, "step": 94730 }, { "epoch": 0.7999831120305673, "grad_norm": 0.03462044149637222, "learning_rate": 7.5001276305963434e-06, "loss": 0.0105, "step": 94740 }, { "epoch": 0.8000675518777312, "grad_norm": 0.30313730239868164, "learning_rate": 7.499489455895942e-06, "loss": 0.0072, "step": 94750 }, { "epoch": 0.8001519917248949, "grad_norm": 0.7156240344047546, "learning_rate": 7.49885122690714e-06, "loss": 0.0133, "step": 94760 }, { "epoch": 0.8002364315720588, "grad_norm": 0.19417715072631836, "learning_rate": 7.498212943643802e-06, "loss": 0.0094, "step": 94770 }, { "epoch": 0.8003208714192227, "grad_norm": 0.2920446991920471, "learning_rate": 7.49757460611979e-06, "loss": 0.0105, "step": 94780 }, { "epoch": 0.8004053112663866, "grad_norm": 0.6674739122390747, "learning_rate": 7.4969362143489696e-06, "loss": 0.0169, "step": 94790 }, { "epoch": 0.8004897511135505, "grad_norm": 0.4082460105419159, "learning_rate": 7.496297768345207e-06, "loss": 0.0107, "step": 94800 }, { "epoch": 0.8005741909607144, "grad_norm": 0.3701830208301544, "learning_rate": 7.4956592681223684e-06, "loss": 0.0146, "step": 94810 }, { "epoch": 0.8006586308078782, "grad_norm": 0.45091259479522705, "learning_rate": 7.495020713694322e-06, "loss": 0.0095, "step": 94820 }, { "epoch": 0.8007430706550421, "grad_norm": 0.3462470769882202, "learning_rate": 7.494382105074936e-06, "loss": 0.0139, "step": 94830 }, { "epoch": 0.800827510502206, "grad_norm": 0.7274608016014099, "learning_rate": 7.4937434422780815e-06, "loss": 0.0082, "step": 94840 }, { "epoch": 0.8009119503493699, "grad_norm": 0.02983037754893303, "learning_rate": 7.493104725317633e-06, "loss": 0.0123, "step": 94850 }, { "epoch": 0.8009963901965338, "grad_norm": 0.11604012548923492, "learning_rate": 7.4924659542074585e-06, "loss": 0.0191, "step": 94860 }, { "epoch": 0.8010808300436976, "grad_norm": 0.42684802412986755, "learning_rate": 7.491827128961435e-06, "loss": 0.0116, "step": 94870 }, { "epoch": 0.8011652698908615, "grad_norm": 0.1265929937362671, "learning_rate": 7.491188249593438e-06, "loss": 0.0085, "step": 94880 }, { "epoch": 0.8012497097380253, "grad_norm": 0.3329291045665741, "learning_rate": 7.4905493161173414e-06, "loss": 0.0079, "step": 94890 }, { "epoch": 0.8013341495851892, "grad_norm": 0.21639902889728546, "learning_rate": 7.4899103285470235e-06, "loss": 0.0071, "step": 94900 }, { "epoch": 0.8014185894323531, "grad_norm": 0.8127478361129761, "learning_rate": 7.489271286896363e-06, "loss": 0.0116, "step": 94910 }, { "epoch": 0.801503029279517, "grad_norm": 0.4569375813007355, "learning_rate": 7.488632191179242e-06, "loss": 0.0128, "step": 94920 }, { "epoch": 0.8015874691266809, "grad_norm": 0.3400607705116272, "learning_rate": 7.487993041409539e-06, "loss": 0.0182, "step": 94930 }, { "epoch": 0.8016719089738448, "grad_norm": 0.5390621423721313, "learning_rate": 7.4873538376011366e-06, "loss": 0.0113, "step": 94940 }, { "epoch": 0.8017563488210087, "grad_norm": 0.42572021484375, "learning_rate": 7.4867145797679195e-06, "loss": 0.0111, "step": 94950 }, { "epoch": 0.8018407886681725, "grad_norm": 0.6180387139320374, "learning_rate": 7.48607526792377e-06, "loss": 0.0135, "step": 94960 }, { "epoch": 0.8019252285153364, "grad_norm": 0.5369223952293396, "learning_rate": 7.485435902082576e-06, "loss": 0.0101, "step": 94970 }, { "epoch": 0.8020096683625003, "grad_norm": 0.17742270231246948, "learning_rate": 7.484796482258224e-06, "loss": 0.0123, "step": 94980 }, { "epoch": 0.8020941082096641, "grad_norm": 0.2703416645526886, "learning_rate": 7.484157008464602e-06, "loss": 0.0064, "step": 94990 }, { "epoch": 0.802178548056828, "grad_norm": 0.4636884033679962, "learning_rate": 7.4835174807155975e-06, "loss": 0.0101, "step": 95000 }, { "epoch": 0.8022629879039919, "grad_norm": 0.3264748454093933, "learning_rate": 7.482877899025103e-06, "loss": 0.0156, "step": 95010 }, { "epoch": 0.8023474277511558, "grad_norm": 0.13284392654895782, "learning_rate": 7.482238263407009e-06, "loss": 0.0094, "step": 95020 }, { "epoch": 0.8024318675983196, "grad_norm": 0.19372870028018951, "learning_rate": 7.481598573875208e-06, "loss": 0.0084, "step": 95030 }, { "epoch": 0.8025163074454835, "grad_norm": 0.22240467369556427, "learning_rate": 7.4809588304435945e-06, "loss": 0.0115, "step": 95040 }, { "epoch": 0.8026007472926474, "grad_norm": 0.22320415079593658, "learning_rate": 7.480319033126065e-06, "loss": 0.0112, "step": 95050 }, { "epoch": 0.8026851871398113, "grad_norm": 0.26262572407722473, "learning_rate": 7.479679181936513e-06, "loss": 0.0154, "step": 95060 }, { "epoch": 0.8027696269869752, "grad_norm": 0.2895726263523102, "learning_rate": 7.479039276888838e-06, "loss": 0.0088, "step": 95070 }, { "epoch": 0.8028540668341391, "grad_norm": 0.3139411211013794, "learning_rate": 7.478399317996937e-06, "loss": 0.0114, "step": 95080 }, { "epoch": 0.802938506681303, "grad_norm": 0.6254485249519348, "learning_rate": 7.477759305274712e-06, "loss": 0.0109, "step": 95090 }, { "epoch": 0.8030229465284667, "grad_norm": 0.31747353076934814, "learning_rate": 7.477119238736061e-06, "loss": 0.0148, "step": 95100 }, { "epoch": 0.8031073863756306, "grad_norm": 0.2820737957954407, "learning_rate": 7.47647911839489e-06, "loss": 0.0099, "step": 95110 }, { "epoch": 0.8031918262227945, "grad_norm": 0.5936983227729797, "learning_rate": 7.475838944265099e-06, "loss": 0.0138, "step": 95120 }, { "epoch": 0.8032762660699584, "grad_norm": 0.015841418877243996, "learning_rate": 7.475198716360592e-06, "loss": 0.0128, "step": 95130 }, { "epoch": 0.8033607059171223, "grad_norm": 0.3013682961463928, "learning_rate": 7.474558434695278e-06, "loss": 0.0099, "step": 95140 }, { "epoch": 0.8034451457642862, "grad_norm": 0.19472233951091766, "learning_rate": 7.473918099283062e-06, "loss": 0.0133, "step": 95150 }, { "epoch": 0.80352958561145, "grad_norm": 0.34255480766296387, "learning_rate": 7.47327771013785e-06, "loss": 0.0094, "step": 95160 }, { "epoch": 0.8036140254586139, "grad_norm": 0.6194005608558655, "learning_rate": 7.472637267273554e-06, "loss": 0.0117, "step": 95170 }, { "epoch": 0.8036984653057778, "grad_norm": 0.15496645867824554, "learning_rate": 7.471996770704084e-06, "loss": 0.0075, "step": 95180 }, { "epoch": 0.8037829051529417, "grad_norm": 0.6090143918991089, "learning_rate": 7.47135622044335e-06, "loss": 0.0121, "step": 95190 }, { "epoch": 0.8038673450001056, "grad_norm": 1.0163651704788208, "learning_rate": 7.470715616505264e-06, "loss": 0.0178, "step": 95200 }, { "epoch": 0.8039517848472695, "grad_norm": 0.21888411045074463, "learning_rate": 7.470074958903743e-06, "loss": 0.0099, "step": 95210 }, { "epoch": 0.8040362246944333, "grad_norm": 0.326905220746994, "learning_rate": 7.4694342476527e-06, "loss": 0.0121, "step": 95220 }, { "epoch": 0.8041206645415971, "grad_norm": 0.2374274581670761, "learning_rate": 7.468793482766051e-06, "loss": 0.0095, "step": 95230 }, { "epoch": 0.804205104388761, "grad_norm": 0.3127637207508087, "learning_rate": 7.468152664257713e-06, "loss": 0.0187, "step": 95240 }, { "epoch": 0.8042895442359249, "grad_norm": 0.30584707856178284, "learning_rate": 7.467511792141604e-06, "loss": 0.0088, "step": 95250 }, { "epoch": 0.8043739840830888, "grad_norm": 0.1938660591840744, "learning_rate": 7.466870866431647e-06, "loss": 0.012, "step": 95260 }, { "epoch": 0.8044584239302527, "grad_norm": 0.29999542236328125, "learning_rate": 7.466229887141758e-06, "loss": 0.0096, "step": 95270 }, { "epoch": 0.8045428637774166, "grad_norm": 0.14696501195430756, "learning_rate": 7.465588854285862e-06, "loss": 0.014, "step": 95280 }, { "epoch": 0.8046273036245805, "grad_norm": 0.5462266206741333, "learning_rate": 7.4649477678778815e-06, "loss": 0.0059, "step": 95290 }, { "epoch": 0.8047117434717443, "grad_norm": 0.28252074122428894, "learning_rate": 7.464306627931741e-06, "loss": 0.0146, "step": 95300 }, { "epoch": 0.8047961833189082, "grad_norm": 0.267335444688797, "learning_rate": 7.4636654344613644e-06, "loss": 0.0102, "step": 95310 }, { "epoch": 0.8048806231660721, "grad_norm": 0.2558729648590088, "learning_rate": 7.4630241874806795e-06, "loss": 0.0089, "step": 95320 }, { "epoch": 0.8049650630132359, "grad_norm": 0.7353898286819458, "learning_rate": 7.462382887003615e-06, "loss": 0.011, "step": 95330 }, { "epoch": 0.8050495028603998, "grad_norm": 0.37716612219810486, "learning_rate": 7.4617415330441e-06, "loss": 0.0192, "step": 95340 }, { "epoch": 0.8051339427075637, "grad_norm": 0.4899122416973114, "learning_rate": 7.4611001256160606e-06, "loss": 0.0082, "step": 95350 }, { "epoch": 0.8052183825547276, "grad_norm": 0.13936468958854675, "learning_rate": 7.460458664733432e-06, "loss": 0.0092, "step": 95360 }, { "epoch": 0.8053028224018914, "grad_norm": 0.22858133912086487, "learning_rate": 7.459817150410144e-06, "loss": 0.0139, "step": 95370 }, { "epoch": 0.8053872622490553, "grad_norm": 0.3296854794025421, "learning_rate": 7.4591755826601345e-06, "loss": 0.011, "step": 95380 }, { "epoch": 0.8054717020962192, "grad_norm": 0.23211313784122467, "learning_rate": 7.4585339614973325e-06, "loss": 0.0095, "step": 95390 }, { "epoch": 0.8055561419433831, "grad_norm": 0.0007514585158787668, "learning_rate": 7.457892286935677e-06, "loss": 0.0069, "step": 95400 }, { "epoch": 0.805640581790547, "grad_norm": 0.7552974820137024, "learning_rate": 7.457250558989107e-06, "loss": 0.0143, "step": 95410 }, { "epoch": 0.8057250216377109, "grad_norm": 0.431273490190506, "learning_rate": 7.456608777671556e-06, "loss": 0.013, "step": 95420 }, { "epoch": 0.8058094614848748, "grad_norm": 0.3746233284473419, "learning_rate": 7.455966942996966e-06, "loss": 0.0076, "step": 95430 }, { "epoch": 0.8058939013320386, "grad_norm": 0.42365825176239014, "learning_rate": 7.4553250549792786e-06, "loss": 0.016, "step": 95440 }, { "epoch": 0.8059783411792024, "grad_norm": 0.18192997574806213, "learning_rate": 7.454683113632433e-06, "loss": 0.0072, "step": 95450 }, { "epoch": 0.8060627810263663, "grad_norm": 0.16184985637664795, "learning_rate": 7.454041118970376e-06, "loss": 0.0086, "step": 95460 }, { "epoch": 0.8061472208735302, "grad_norm": 0.43261998891830444, "learning_rate": 7.453399071007045e-06, "loss": 0.0093, "step": 95470 }, { "epoch": 0.8062316607206941, "grad_norm": 0.19869668781757355, "learning_rate": 7.4527569697563895e-06, "loss": 0.0222, "step": 95480 }, { "epoch": 0.806316100567858, "grad_norm": 0.09342189878225327, "learning_rate": 7.452114815232357e-06, "loss": 0.005, "step": 95490 }, { "epoch": 0.8064005404150218, "grad_norm": 0.1662411391735077, "learning_rate": 7.451472607448892e-06, "loss": 0.0075, "step": 95500 }, { "epoch": 0.8064849802621857, "grad_norm": 0.19395971298217773, "learning_rate": 7.450830346419944e-06, "loss": 0.0102, "step": 95510 }, { "epoch": 0.8065694201093496, "grad_norm": 0.09143432229757309, "learning_rate": 7.450188032159464e-06, "loss": 0.0122, "step": 95520 }, { "epoch": 0.8066538599565135, "grad_norm": 0.5774338841438293, "learning_rate": 7.449545664681402e-06, "loss": 0.0092, "step": 95530 }, { "epoch": 0.8067382998036774, "grad_norm": 0.44474342465400696, "learning_rate": 7.44890324399971e-06, "loss": 0.011, "step": 95540 }, { "epoch": 0.8068227396508413, "grad_norm": 0.49952006340026855, "learning_rate": 7.448260770128341e-06, "loss": 0.0115, "step": 95550 }, { "epoch": 0.8069071794980051, "grad_norm": 0.4919716715812683, "learning_rate": 7.447618243081249e-06, "loss": 0.0135, "step": 95560 }, { "epoch": 0.8069916193451689, "grad_norm": 0.3369462788105011, "learning_rate": 7.4469756628723915e-06, "loss": 0.0201, "step": 95570 }, { "epoch": 0.8070760591923328, "grad_norm": 0.18414248526096344, "learning_rate": 7.446333029515724e-06, "loss": 0.0078, "step": 95580 }, { "epoch": 0.8071604990394967, "grad_norm": 0.28925013542175293, "learning_rate": 7.445690343025205e-06, "loss": 0.0093, "step": 95590 }, { "epoch": 0.8072449388866606, "grad_norm": 0.09192784875631332, "learning_rate": 7.445047603414792e-06, "loss": 0.0067, "step": 95600 }, { "epoch": 0.8073293787338245, "grad_norm": 0.012631651014089584, "learning_rate": 7.444404810698445e-06, "loss": 0.0101, "step": 95610 }, { "epoch": 0.8074138185809884, "grad_norm": 0.19201070070266724, "learning_rate": 7.443761964890129e-06, "loss": 0.0048, "step": 95620 }, { "epoch": 0.8074982584281523, "grad_norm": 0.4598638415336609, "learning_rate": 7.443119066003802e-06, "loss": 0.0102, "step": 95630 }, { "epoch": 0.8075826982753161, "grad_norm": 0.3038511574268341, "learning_rate": 7.44247611405343e-06, "loss": 0.0065, "step": 95640 }, { "epoch": 0.80766713812248, "grad_norm": 0.3434123992919922, "learning_rate": 7.441833109052977e-06, "loss": 0.0127, "step": 95650 }, { "epoch": 0.8077515779696439, "grad_norm": 0.1425222009420395, "learning_rate": 7.44119005101641e-06, "loss": 0.0109, "step": 95660 }, { "epoch": 0.8078360178168078, "grad_norm": 0.5815756916999817, "learning_rate": 7.440546939957695e-06, "loss": 0.0202, "step": 95670 }, { "epoch": 0.8079204576639716, "grad_norm": 0.12317383289337158, "learning_rate": 7.4399037758907995e-06, "loss": 0.0126, "step": 95680 }, { "epoch": 0.8080048975111355, "grad_norm": 0.36026352643966675, "learning_rate": 7.439260558829695e-06, "loss": 0.0228, "step": 95690 }, { "epoch": 0.8080893373582994, "grad_norm": 0.4019685983657837, "learning_rate": 7.4386172887883525e-06, "loss": 0.0141, "step": 95700 }, { "epoch": 0.8081737772054632, "grad_norm": 0.16382671892642975, "learning_rate": 7.43797396578074e-06, "loss": 0.0083, "step": 95710 }, { "epoch": 0.8082582170526271, "grad_norm": 0.20739537477493286, "learning_rate": 7.437330589820834e-06, "loss": 0.009, "step": 95720 }, { "epoch": 0.808342656899791, "grad_norm": 0.15083564817905426, "learning_rate": 7.4366871609226055e-06, "loss": 0.0137, "step": 95730 }, { "epoch": 0.8084270967469549, "grad_norm": 0.639661967754364, "learning_rate": 7.436043679100032e-06, "loss": 0.0126, "step": 95740 }, { "epoch": 0.8085115365941188, "grad_norm": 0.11372920125722885, "learning_rate": 7.43540014436709e-06, "loss": 0.0116, "step": 95750 }, { "epoch": 0.8085959764412827, "grad_norm": 0.21278586983680725, "learning_rate": 7.434756556737755e-06, "loss": 0.0085, "step": 95760 }, { "epoch": 0.8086804162884466, "grad_norm": 0.4957725405693054, "learning_rate": 7.434112916226005e-06, "loss": 0.012, "step": 95770 }, { "epoch": 0.8087648561356104, "grad_norm": 0.49409034848213196, "learning_rate": 7.433469222845824e-06, "loss": 0.0056, "step": 95780 }, { "epoch": 0.8088492959827742, "grad_norm": 0.22756916284561157, "learning_rate": 7.432825476611188e-06, "loss": 0.0079, "step": 95790 }, { "epoch": 0.8089337358299381, "grad_norm": 0.38055798411369324, "learning_rate": 7.432181677536082e-06, "loss": 0.0151, "step": 95800 }, { "epoch": 0.809018175677102, "grad_norm": 0.31125739216804504, "learning_rate": 7.43153782563449e-06, "loss": 0.009, "step": 95810 }, { "epoch": 0.8091026155242659, "grad_norm": 0.5452293753623962, "learning_rate": 7.430893920920392e-06, "loss": 0.0121, "step": 95820 }, { "epoch": 0.8091870553714298, "grad_norm": 0.4062604606151581, "learning_rate": 7.4302499634077774e-06, "loss": 0.0102, "step": 95830 }, { "epoch": 0.8092714952185937, "grad_norm": 0.31642213463783264, "learning_rate": 7.429605953110631e-06, "loss": 0.0123, "step": 95840 }, { "epoch": 0.8093559350657575, "grad_norm": 0.4876386523246765, "learning_rate": 7.428961890042942e-06, "loss": 0.0146, "step": 95850 }, { "epoch": 0.8094403749129214, "grad_norm": 0.06335799396038055, "learning_rate": 7.428317774218697e-06, "loss": 0.0157, "step": 95860 }, { "epoch": 0.8095248147600853, "grad_norm": 0.048216562718153, "learning_rate": 7.427673605651891e-06, "loss": 0.0138, "step": 95870 }, { "epoch": 0.8096092546072492, "grad_norm": 0.20977123081684113, "learning_rate": 7.427029384356508e-06, "loss": 0.0065, "step": 95880 }, { "epoch": 0.8096936944544131, "grad_norm": 0.35883429646492004, "learning_rate": 7.426385110346546e-06, "loss": 0.0065, "step": 95890 }, { "epoch": 0.809778134301577, "grad_norm": 0.8547780513763428, "learning_rate": 7.425740783635995e-06, "loss": 0.0163, "step": 95900 }, { "epoch": 0.8098625741487407, "grad_norm": 0.07602819055318832, "learning_rate": 7.425096404238854e-06, "loss": 0.0121, "step": 95910 }, { "epoch": 0.8099470139959046, "grad_norm": 0.35362905263900757, "learning_rate": 7.424451972169113e-06, "loss": 0.0094, "step": 95920 }, { "epoch": 0.8100314538430685, "grad_norm": 0.13130266964435577, "learning_rate": 7.423807487440775e-06, "loss": 0.0179, "step": 95930 }, { "epoch": 0.8101158936902324, "grad_norm": 0.008636479265987873, "learning_rate": 7.423162950067833e-06, "loss": 0.0093, "step": 95940 }, { "epoch": 0.8102003335373963, "grad_norm": 0.35392770171165466, "learning_rate": 7.422518360064289e-06, "loss": 0.022, "step": 95950 }, { "epoch": 0.8102847733845602, "grad_norm": 0.3819930851459503, "learning_rate": 7.421873717444142e-06, "loss": 0.0188, "step": 95960 }, { "epoch": 0.8103692132317241, "grad_norm": 0.5036765336990356, "learning_rate": 7.421229022221395e-06, "loss": 0.0223, "step": 95970 }, { "epoch": 0.810453653078888, "grad_norm": 0.30039718747138977, "learning_rate": 7.420584274410049e-06, "loss": 0.0105, "step": 95980 }, { "epoch": 0.8105380929260518, "grad_norm": 0.08573385328054428, "learning_rate": 7.41993947402411e-06, "loss": 0.0084, "step": 95990 }, { "epoch": 0.8106225327732157, "grad_norm": 0.2976053059101105, "learning_rate": 7.41929462107758e-06, "loss": 0.0157, "step": 96000 }, { "epoch": 0.8107069726203796, "grad_norm": 0.8765338063240051, "learning_rate": 7.418649715584466e-06, "loss": 0.0183, "step": 96010 }, { "epoch": 0.8107914124675434, "grad_norm": 0.1276986449956894, "learning_rate": 7.418004757558777e-06, "loss": 0.0092, "step": 96020 }, { "epoch": 0.8108758523147073, "grad_norm": 0.3424241542816162, "learning_rate": 7.417359747014521e-06, "loss": 0.0115, "step": 96030 }, { "epoch": 0.8109602921618712, "grad_norm": 0.3416970670223236, "learning_rate": 7.4167146839657045e-06, "loss": 0.0102, "step": 96040 }, { "epoch": 0.811044732009035, "grad_norm": 0.4478282332420349, "learning_rate": 7.416069568426343e-06, "loss": 0.0185, "step": 96050 }, { "epoch": 0.8111291718561989, "grad_norm": 0.14378564059734344, "learning_rate": 7.415424400410444e-06, "loss": 0.0067, "step": 96060 }, { "epoch": 0.8112136117033628, "grad_norm": 0.5764828324317932, "learning_rate": 7.414779179932022e-06, "loss": 0.0132, "step": 96070 }, { "epoch": 0.8112980515505267, "grad_norm": 0.31099891662597656, "learning_rate": 7.414133907005091e-06, "loss": 0.0114, "step": 96080 }, { "epoch": 0.8113824913976906, "grad_norm": 0.10473142564296722, "learning_rate": 7.4134885816436665e-06, "loss": 0.0147, "step": 96090 }, { "epoch": 0.8114669312448545, "grad_norm": 0.2212350070476532, "learning_rate": 7.412843203861763e-06, "loss": 0.0111, "step": 96100 }, { "epoch": 0.8115513710920184, "grad_norm": 0.42955806851387024, "learning_rate": 7.412197773673402e-06, "loss": 0.0145, "step": 96110 }, { "epoch": 0.8116358109391822, "grad_norm": 0.3812231719493866, "learning_rate": 7.411552291092599e-06, "loss": 0.0096, "step": 96120 }, { "epoch": 0.8117202507863461, "grad_norm": 0.01742488704621792, "learning_rate": 7.410906756133374e-06, "loss": 0.0111, "step": 96130 }, { "epoch": 0.8118046906335099, "grad_norm": 0.26639002561569214, "learning_rate": 7.410261168809747e-06, "loss": 0.0121, "step": 96140 }, { "epoch": 0.8118891304806738, "grad_norm": 0.4436444044113159, "learning_rate": 7.409615529135744e-06, "loss": 0.0128, "step": 96150 }, { "epoch": 0.8119735703278377, "grad_norm": 0.21828985214233398, "learning_rate": 7.408969837125384e-06, "loss": 0.0096, "step": 96160 }, { "epoch": 0.8120580101750016, "grad_norm": 0.5103669166564941, "learning_rate": 7.408324092792694e-06, "loss": 0.0184, "step": 96170 }, { "epoch": 0.8121424500221655, "grad_norm": 0.12760908901691437, "learning_rate": 7.407678296151695e-06, "loss": 0.0076, "step": 96180 }, { "epoch": 0.8122268898693293, "grad_norm": 0.38988080620765686, "learning_rate": 7.40703244721642e-06, "loss": 0.0098, "step": 96190 }, { "epoch": 0.8123113297164932, "grad_norm": 0.7241970896720886, "learning_rate": 7.406386546000893e-06, "loss": 0.0092, "step": 96200 }, { "epoch": 0.8123957695636571, "grad_norm": 0.383541077375412, "learning_rate": 7.405740592519141e-06, "loss": 0.0103, "step": 96210 }, { "epoch": 0.812480209410821, "grad_norm": 0.4736437499523163, "learning_rate": 7.4050945867852e-06, "loss": 0.0076, "step": 96220 }, { "epoch": 0.8125646492579849, "grad_norm": 0.8306030631065369, "learning_rate": 7.404448528813095e-06, "loss": 0.01, "step": 96230 }, { "epoch": 0.8126490891051488, "grad_norm": 0.4995410740375519, "learning_rate": 7.403802418616861e-06, "loss": 0.0106, "step": 96240 }, { "epoch": 0.8127335289523125, "grad_norm": 0.4733615219593048, "learning_rate": 7.403156256210531e-06, "loss": 0.0075, "step": 96250 }, { "epoch": 0.8128179687994764, "grad_norm": 0.38510066270828247, "learning_rate": 7.402510041608139e-06, "loss": 0.0158, "step": 96260 }, { "epoch": 0.8129024086466403, "grad_norm": 0.4839872717857361, "learning_rate": 7.401863774823723e-06, "loss": 0.0097, "step": 96270 }, { "epoch": 0.8129868484938042, "grad_norm": 0.5382580757141113, "learning_rate": 7.401217455871316e-06, "loss": 0.007, "step": 96280 }, { "epoch": 0.8130712883409681, "grad_norm": 0.4161699116230011, "learning_rate": 7.40057108476496e-06, "loss": 0.0147, "step": 96290 }, { "epoch": 0.813155728188132, "grad_norm": 0.03137718513607979, "learning_rate": 7.399924661518691e-06, "loss": 0.0075, "step": 96300 }, { "epoch": 0.8132401680352959, "grad_norm": 0.33191725611686707, "learning_rate": 7.39927818614655e-06, "loss": 0.0101, "step": 96310 }, { "epoch": 0.8133246078824597, "grad_norm": 0.5979251861572266, "learning_rate": 7.39863165866258e-06, "loss": 0.0155, "step": 96320 }, { "epoch": 0.8134090477296236, "grad_norm": 1.1313775777816772, "learning_rate": 7.3979850790808196e-06, "loss": 0.0095, "step": 96330 }, { "epoch": 0.8134934875767875, "grad_norm": 0.07290250808000565, "learning_rate": 7.397338447415314e-06, "loss": 0.0061, "step": 96340 }, { "epoch": 0.8135779274239514, "grad_norm": 0.44790589809417725, "learning_rate": 7.39669176368011e-06, "loss": 0.0105, "step": 96350 }, { "epoch": 0.8136623672711153, "grad_norm": 0.5369675159454346, "learning_rate": 7.396045027889254e-06, "loss": 0.0182, "step": 96360 }, { "epoch": 0.8137468071182791, "grad_norm": 0.45406365394592285, "learning_rate": 7.395398240056788e-06, "loss": 0.0124, "step": 96370 }, { "epoch": 0.813831246965443, "grad_norm": 0.6635715365409851, "learning_rate": 7.394751400196764e-06, "loss": 0.0106, "step": 96380 }, { "epoch": 0.8139156868126068, "grad_norm": 0.3868912160396576, "learning_rate": 7.394104508323231e-06, "loss": 0.0125, "step": 96390 }, { "epoch": 0.8140001266597707, "grad_norm": 0.5137189626693726, "learning_rate": 7.393457564450238e-06, "loss": 0.0077, "step": 96400 }, { "epoch": 0.8140845665069346, "grad_norm": 0.3060660660266876, "learning_rate": 7.392810568591838e-06, "loss": 0.0149, "step": 96410 }, { "epoch": 0.8141690063540985, "grad_norm": 0.35811686515808105, "learning_rate": 7.392163520762081e-06, "loss": 0.0121, "step": 96420 }, { "epoch": 0.8142534462012624, "grad_norm": 0.26286548376083374, "learning_rate": 7.391516420975025e-06, "loss": 0.0163, "step": 96430 }, { "epoch": 0.8143378860484263, "grad_norm": 0.2074659913778305, "learning_rate": 7.390869269244721e-06, "loss": 0.0105, "step": 96440 }, { "epoch": 0.8144223258955902, "grad_norm": 0.4497930407524109, "learning_rate": 7.390222065585226e-06, "loss": 0.0114, "step": 96450 }, { "epoch": 0.814506765742754, "grad_norm": 0.1963116079568863, "learning_rate": 7.389574810010598e-06, "loss": 0.0066, "step": 96460 }, { "epoch": 0.8145912055899179, "grad_norm": 0.07193198800086975, "learning_rate": 7.388927502534894e-06, "loss": 0.0089, "step": 96470 }, { "epoch": 0.8146756454370817, "grad_norm": 0.28388577699661255, "learning_rate": 7.388280143172175e-06, "loss": 0.0098, "step": 96480 }, { "epoch": 0.8147600852842456, "grad_norm": 0.30274030566215515, "learning_rate": 7.3876327319365004e-06, "loss": 0.0078, "step": 96490 }, { "epoch": 0.8148445251314095, "grad_norm": 0.4374881386756897, "learning_rate": 7.386985268841933e-06, "loss": 0.009, "step": 96500 }, { "epoch": 0.8149289649785734, "grad_norm": 0.6280012726783752, "learning_rate": 7.386337753902534e-06, "loss": 0.0179, "step": 96510 }, { "epoch": 0.8150134048257373, "grad_norm": 0.40002575516700745, "learning_rate": 7.38569018713237e-06, "loss": 0.008, "step": 96520 }, { "epoch": 0.8150978446729011, "grad_norm": 0.38053393363952637, "learning_rate": 7.385042568545501e-06, "loss": 0.0124, "step": 96530 }, { "epoch": 0.815182284520065, "grad_norm": 0.2131030112504959, "learning_rate": 7.384394898155998e-06, "loss": 0.0109, "step": 96540 }, { "epoch": 0.8152667243672289, "grad_norm": 0.13270114362239838, "learning_rate": 7.383747175977925e-06, "loss": 0.0157, "step": 96550 }, { "epoch": 0.8153511642143928, "grad_norm": 0.21262013912200928, "learning_rate": 7.383099402025353e-06, "loss": 0.0075, "step": 96560 }, { "epoch": 0.8154356040615567, "grad_norm": 0.039684634655714035, "learning_rate": 7.382451576312349e-06, "loss": 0.0072, "step": 96570 }, { "epoch": 0.8155200439087206, "grad_norm": 0.2022140473127365, "learning_rate": 7.381803698852985e-06, "loss": 0.0121, "step": 96580 }, { "epoch": 0.8156044837558845, "grad_norm": 0.33067938685417175, "learning_rate": 7.381155769661333e-06, "loss": 0.0134, "step": 96590 }, { "epoch": 0.8156889236030482, "grad_norm": 0.5994898676872253, "learning_rate": 7.380507788751468e-06, "loss": 0.009, "step": 96600 }, { "epoch": 0.8157733634502121, "grad_norm": 0.18940965831279755, "learning_rate": 7.3798597561374594e-06, "loss": 0.0073, "step": 96610 }, { "epoch": 0.815857803297376, "grad_norm": 0.8276520371437073, "learning_rate": 7.379211671833384e-06, "loss": 0.0147, "step": 96620 }, { "epoch": 0.8159422431445399, "grad_norm": 0.35660019516944885, "learning_rate": 7.37856353585332e-06, "loss": 0.0154, "step": 96630 }, { "epoch": 0.8160266829917038, "grad_norm": 0.15258798003196716, "learning_rate": 7.377915348211343e-06, "loss": 0.0109, "step": 96640 }, { "epoch": 0.8161111228388677, "grad_norm": 1.43657648563385, "learning_rate": 7.377267108921533e-06, "loss": 0.0097, "step": 96650 }, { "epoch": 0.8161955626860316, "grad_norm": 0.33841168880462646, "learning_rate": 7.376618817997967e-06, "loss": 0.0069, "step": 96660 }, { "epoch": 0.8162800025331954, "grad_norm": 0.24742253124713898, "learning_rate": 7.375970475454728e-06, "loss": 0.0121, "step": 96670 }, { "epoch": 0.8163644423803593, "grad_norm": 0.41130709648132324, "learning_rate": 7.375322081305898e-06, "loss": 0.0102, "step": 96680 }, { "epoch": 0.8164488822275232, "grad_norm": 0.11119618266820908, "learning_rate": 7.374673635565557e-06, "loss": 0.0106, "step": 96690 }, { "epoch": 0.8165333220746871, "grad_norm": 0.16357094049453735, "learning_rate": 7.374025138247792e-06, "loss": 0.0074, "step": 96700 }, { "epoch": 0.8166177619218509, "grad_norm": 0.12514150142669678, "learning_rate": 7.3733765893666874e-06, "loss": 0.014, "step": 96710 }, { "epoch": 0.8167022017690148, "grad_norm": 0.5787712335586548, "learning_rate": 7.3727279889363314e-06, "loss": 0.0118, "step": 96720 }, { "epoch": 0.8167866416161786, "grad_norm": 0.3265331983566284, "learning_rate": 7.372079336970808e-06, "loss": 0.0137, "step": 96730 }, { "epoch": 0.8168710814633425, "grad_norm": 0.007410634774714708, "learning_rate": 7.371430633484207e-06, "loss": 0.0084, "step": 96740 }, { "epoch": 0.8169555213105064, "grad_norm": 0.7115174531936646, "learning_rate": 7.37078187849062e-06, "loss": 0.0128, "step": 96750 }, { "epoch": 0.8170399611576703, "grad_norm": 0.7017086744308472, "learning_rate": 7.370133072004136e-06, "loss": 0.014, "step": 96760 }, { "epoch": 0.8171244010048342, "grad_norm": 0.25993356108665466, "learning_rate": 7.3694842140388475e-06, "loss": 0.0059, "step": 96770 }, { "epoch": 0.8172088408519981, "grad_norm": 0.505668044090271, "learning_rate": 7.368835304608847e-06, "loss": 0.0072, "step": 96780 }, { "epoch": 0.817293280699162, "grad_norm": 0.30497437715530396, "learning_rate": 7.368186343728228e-06, "loss": 0.0092, "step": 96790 }, { "epoch": 0.8173777205463258, "grad_norm": 0.21205303072929382, "learning_rate": 7.367537331411089e-06, "loss": 0.0088, "step": 96800 }, { "epoch": 0.8174621603934897, "grad_norm": 0.14523111283779144, "learning_rate": 7.366888267671524e-06, "loss": 0.0134, "step": 96810 }, { "epoch": 0.8175466002406535, "grad_norm": 0.3481025993824005, "learning_rate": 7.36623915252363e-06, "loss": 0.0121, "step": 96820 }, { "epoch": 0.8176310400878174, "grad_norm": 0.30915045738220215, "learning_rate": 7.365589985981506e-06, "loss": 0.0107, "step": 96830 }, { "epoch": 0.8177154799349813, "grad_norm": 0.852094829082489, "learning_rate": 7.364940768059254e-06, "loss": 0.0135, "step": 96840 }, { "epoch": 0.8177999197821452, "grad_norm": 0.43036818504333496, "learning_rate": 7.364291498770972e-06, "loss": 0.0069, "step": 96850 }, { "epoch": 0.817884359629309, "grad_norm": 0.5066252946853638, "learning_rate": 7.363642178130763e-06, "loss": 0.0117, "step": 96860 }, { "epoch": 0.8179687994764729, "grad_norm": 0.23659197986125946, "learning_rate": 7.36299280615273e-06, "loss": 0.0121, "step": 96870 }, { "epoch": 0.8180532393236368, "grad_norm": 0.21389086544513702, "learning_rate": 7.362343382850978e-06, "loss": 0.0088, "step": 96880 }, { "epoch": 0.8181376791708007, "grad_norm": 0.18392600119113922, "learning_rate": 7.3616939082396134e-06, "loss": 0.0212, "step": 96890 }, { "epoch": 0.8182221190179646, "grad_norm": 0.2029021680355072, "learning_rate": 7.36104438233274e-06, "loss": 0.0148, "step": 96900 }, { "epoch": 0.8183065588651285, "grad_norm": 0.15022015571594238, "learning_rate": 7.360394805144465e-06, "loss": 0.0088, "step": 96910 }, { "epoch": 0.8183909987122924, "grad_norm": 0.12049415707588196, "learning_rate": 7.359745176688899e-06, "loss": 0.0104, "step": 96920 }, { "epoch": 0.8184754385594563, "grad_norm": 0.24310258030891418, "learning_rate": 7.359095496980153e-06, "loss": 0.0118, "step": 96930 }, { "epoch": 0.81855987840662, "grad_norm": 0.15051817893981934, "learning_rate": 7.358445766032334e-06, "loss": 0.0081, "step": 96940 }, { "epoch": 0.8186443182537839, "grad_norm": 0.4814208447933197, "learning_rate": 7.357795983859557e-06, "loss": 0.0129, "step": 96950 }, { "epoch": 0.8187287581009478, "grad_norm": 0.49341607093811035, "learning_rate": 7.357146150475934e-06, "loss": 0.0108, "step": 96960 }, { "epoch": 0.8188131979481117, "grad_norm": 0.6750840544700623, "learning_rate": 7.356496265895582e-06, "loss": 0.0086, "step": 96970 }, { "epoch": 0.8188976377952756, "grad_norm": 0.3446524441242218, "learning_rate": 7.355846330132612e-06, "loss": 0.0052, "step": 96980 }, { "epoch": 0.8189820776424395, "grad_norm": 0.6163453459739685, "learning_rate": 7.355196343201142e-06, "loss": 0.0096, "step": 96990 }, { "epoch": 0.8190665174896034, "grad_norm": 0.40925225615501404, "learning_rate": 7.354546305115291e-06, "loss": 0.0095, "step": 97000 }, { "epoch": 0.8191509573367672, "grad_norm": 0.2240791916847229, "learning_rate": 7.353896215889178e-06, "loss": 0.0105, "step": 97010 }, { "epoch": 0.8192353971839311, "grad_norm": 0.0705602690577507, "learning_rate": 7.3532460755369195e-06, "loss": 0.006, "step": 97020 }, { "epoch": 0.819319837031095, "grad_norm": 0.3302615284919739, "learning_rate": 7.352595884072639e-06, "loss": 0.013, "step": 97030 }, { "epoch": 0.8194042768782589, "grad_norm": 0.3365897238254547, "learning_rate": 7.351945641510458e-06, "loss": 0.0103, "step": 97040 }, { "epoch": 0.8194887167254227, "grad_norm": 0.09436926990747452, "learning_rate": 7.3512953478645e-06, "loss": 0.0084, "step": 97050 }, { "epoch": 0.8195731565725866, "grad_norm": 0.015660567209124565, "learning_rate": 7.350645003148888e-06, "loss": 0.0091, "step": 97060 }, { "epoch": 0.8196575964197504, "grad_norm": 0.6954323053359985, "learning_rate": 7.349994607377748e-06, "loss": 0.0065, "step": 97070 }, { "epoch": 0.8197420362669143, "grad_norm": 1.4641950130462646, "learning_rate": 7.349344160565208e-06, "loss": 0.0253, "step": 97080 }, { "epoch": 0.8198264761140782, "grad_norm": 0.4434000849723816, "learning_rate": 7.3486936627253945e-06, "loss": 0.0135, "step": 97090 }, { "epoch": 0.8199109159612421, "grad_norm": 0.0077195316553115845, "learning_rate": 7.348043113872435e-06, "loss": 0.0065, "step": 97100 }, { "epoch": 0.819995355808406, "grad_norm": 0.38051503896713257, "learning_rate": 7.347392514020459e-06, "loss": 0.018, "step": 97110 }, { "epoch": 0.8200797956555699, "grad_norm": 0.19583630561828613, "learning_rate": 7.346741863183601e-06, "loss": 0.0088, "step": 97120 }, { "epoch": 0.8201642355027338, "grad_norm": 0.31075093150138855, "learning_rate": 7.3460911613759914e-06, "loss": 0.0109, "step": 97130 }, { "epoch": 0.8202486753498976, "grad_norm": 0.0029655061662197113, "learning_rate": 7.34544040861176e-06, "loss": 0.0081, "step": 97140 }, { "epoch": 0.8203331151970615, "grad_norm": 0.0884464830160141, "learning_rate": 7.344789604905045e-06, "loss": 0.0148, "step": 97150 }, { "epoch": 0.8204175550442254, "grad_norm": 0.6326159834861755, "learning_rate": 7.344138750269978e-06, "loss": 0.0076, "step": 97160 }, { "epoch": 0.8205019948913892, "grad_norm": 0.4649217128753662, "learning_rate": 7.3434878447207e-06, "loss": 0.012, "step": 97170 }, { "epoch": 0.8205864347385531, "grad_norm": 0.196690633893013, "learning_rate": 7.3428368882713455e-06, "loss": 0.0116, "step": 97180 }, { "epoch": 0.820670874585717, "grad_norm": 0.26209139823913574, "learning_rate": 7.342185880936055e-06, "loss": 0.0177, "step": 97190 }, { "epoch": 0.8207553144328809, "grad_norm": 0.15187160670757294, "learning_rate": 7.341534822728965e-06, "loss": 0.0061, "step": 97200 }, { "epoch": 0.8208397542800447, "grad_norm": 0.2297838032245636, "learning_rate": 7.3408837136642195e-06, "loss": 0.0118, "step": 97210 }, { "epoch": 0.8209241941272086, "grad_norm": 0.3917360007762909, "learning_rate": 7.340232553755959e-06, "loss": 0.0172, "step": 97220 }, { "epoch": 0.8210086339743725, "grad_norm": 0.12549348175525665, "learning_rate": 7.3395813430183296e-06, "loss": 0.0099, "step": 97230 }, { "epoch": 0.8210930738215364, "grad_norm": 0.18198688328266144, "learning_rate": 7.3389300814654695e-06, "loss": 0.0083, "step": 97240 }, { "epoch": 0.8211775136687003, "grad_norm": 0.7852096557617188, "learning_rate": 7.338278769111529e-06, "loss": 0.0191, "step": 97250 }, { "epoch": 0.8212619535158642, "grad_norm": 0.3875158727169037, "learning_rate": 7.337627405970652e-06, "loss": 0.0158, "step": 97260 }, { "epoch": 0.8213463933630281, "grad_norm": 0.21004700660705566, "learning_rate": 7.336975992056987e-06, "loss": 0.0089, "step": 97270 }, { "epoch": 0.8214308332101918, "grad_norm": 0.35530105233192444, "learning_rate": 7.336324527384682e-06, "loss": 0.0127, "step": 97280 }, { "epoch": 0.8215152730573557, "grad_norm": 0.08803267776966095, "learning_rate": 7.335673011967889e-06, "loss": 0.0158, "step": 97290 }, { "epoch": 0.8215997129045196, "grad_norm": 0.4011888802051544, "learning_rate": 7.3350214458207544e-06, "loss": 0.0069, "step": 97300 }, { "epoch": 0.8216841527516835, "grad_norm": 0.44501376152038574, "learning_rate": 7.3343698289574325e-06, "loss": 0.0126, "step": 97310 }, { "epoch": 0.8217685925988474, "grad_norm": 0.2167043685913086, "learning_rate": 7.333718161392077e-06, "loss": 0.0129, "step": 97320 }, { "epoch": 0.8218530324460113, "grad_norm": 0.58775395154953, "learning_rate": 7.333066443138844e-06, "loss": 0.0101, "step": 97330 }, { "epoch": 0.8219374722931752, "grad_norm": 0.2838385999202728, "learning_rate": 7.332414674211882e-06, "loss": 0.0108, "step": 97340 }, { "epoch": 0.822021912140339, "grad_norm": 0.03577109053730965, "learning_rate": 7.3317628546253525e-06, "loss": 0.0105, "step": 97350 }, { "epoch": 0.8221063519875029, "grad_norm": 0.530572772026062, "learning_rate": 7.331110984393412e-06, "loss": 0.0105, "step": 97360 }, { "epoch": 0.8221907918346668, "grad_norm": 0.6130296587944031, "learning_rate": 7.330459063530218e-06, "loss": 0.0105, "step": 97370 }, { "epoch": 0.8222752316818307, "grad_norm": 0.570824384689331, "learning_rate": 7.329807092049932e-06, "loss": 0.01, "step": 97380 }, { "epoch": 0.8223596715289946, "grad_norm": 0.3687428832054138, "learning_rate": 7.3291550699667115e-06, "loss": 0.0221, "step": 97390 }, { "epoch": 0.8224441113761584, "grad_norm": 0.1373954564332962, "learning_rate": 7.328502997294721e-06, "loss": 0.0074, "step": 97400 }, { "epoch": 0.8225285512233222, "grad_norm": 0.3924326002597809, "learning_rate": 7.3278508740481225e-06, "loss": 0.0084, "step": 97410 }, { "epoch": 0.8226129910704861, "grad_norm": 0.38918352127075195, "learning_rate": 7.327198700241081e-06, "loss": 0.0102, "step": 97420 }, { "epoch": 0.82269743091765, "grad_norm": 0.4673248529434204, "learning_rate": 7.326546475887759e-06, "loss": 0.0096, "step": 97430 }, { "epoch": 0.8227818707648139, "grad_norm": 0.5192568302154541, "learning_rate": 7.325894201002325e-06, "loss": 0.0148, "step": 97440 }, { "epoch": 0.8228663106119778, "grad_norm": 0.39163878560066223, "learning_rate": 7.3252418755989465e-06, "loss": 0.0099, "step": 97450 }, { "epoch": 0.8229507504591417, "grad_norm": 0.1158166229724884, "learning_rate": 7.32458949969179e-06, "loss": 0.0068, "step": 97460 }, { "epoch": 0.8230351903063056, "grad_norm": 0.4016517698764801, "learning_rate": 7.323937073295026e-06, "loss": 0.0135, "step": 97470 }, { "epoch": 0.8231196301534695, "grad_norm": 0.22922383248806, "learning_rate": 7.323284596422825e-06, "loss": 0.0097, "step": 97480 }, { "epoch": 0.8232040700006333, "grad_norm": 0.26825833320617676, "learning_rate": 7.322632069089358e-06, "loss": 0.0111, "step": 97490 }, { "epoch": 0.8232885098477972, "grad_norm": 0.2150970995426178, "learning_rate": 7.321979491308799e-06, "loss": 0.0093, "step": 97500 }, { "epoch": 0.823372949694961, "grad_norm": 0.3138466477394104, "learning_rate": 7.321326863095322e-06, "loss": 0.0234, "step": 97510 }, { "epoch": 0.8234573895421249, "grad_norm": 0.16617390513420105, "learning_rate": 7.3206741844631005e-06, "loss": 0.0089, "step": 97520 }, { "epoch": 0.8235418293892888, "grad_norm": 0.531845211982727, "learning_rate": 7.320021455426311e-06, "loss": 0.0124, "step": 97530 }, { "epoch": 0.8236262692364527, "grad_norm": 0.4745454788208008, "learning_rate": 7.319368675999132e-06, "loss": 0.0137, "step": 97540 }, { "epoch": 0.8237107090836165, "grad_norm": 0.1042683869600296, "learning_rate": 7.318715846195738e-06, "loss": 0.0101, "step": 97550 }, { "epoch": 0.8237951489307804, "grad_norm": 0.07747920602560043, "learning_rate": 7.318062966030314e-06, "loss": 0.0065, "step": 97560 }, { "epoch": 0.8238795887779443, "grad_norm": 0.3406497836112976, "learning_rate": 7.317410035517036e-06, "loss": 0.0089, "step": 97570 }, { "epoch": 0.8239640286251082, "grad_norm": 0.19534429907798767, "learning_rate": 7.316757054670087e-06, "loss": 0.0122, "step": 97580 }, { "epoch": 0.8240484684722721, "grad_norm": 0.19857051968574524, "learning_rate": 7.316104023503651e-06, "loss": 0.0102, "step": 97590 }, { "epoch": 0.824132908319436, "grad_norm": 0.33018773794174194, "learning_rate": 7.315450942031908e-06, "loss": 0.0095, "step": 97600 }, { "epoch": 0.8242173481665999, "grad_norm": 0.4010968804359436, "learning_rate": 7.314797810269046e-06, "loss": 0.0084, "step": 97610 }, { "epoch": 0.8243017880137637, "grad_norm": 0.17550118267536163, "learning_rate": 7.314144628229251e-06, "loss": 0.0071, "step": 97620 }, { "epoch": 0.8243862278609275, "grad_norm": 0.2510714530944824, "learning_rate": 7.313491395926706e-06, "loss": 0.0126, "step": 97630 }, { "epoch": 0.8244706677080914, "grad_norm": 0.4020955562591553, "learning_rate": 7.312838113375604e-06, "loss": 0.0116, "step": 97640 }, { "epoch": 0.8245551075552553, "grad_norm": 0.31103062629699707, "learning_rate": 7.3121847805901316e-06, "loss": 0.0086, "step": 97650 }, { "epoch": 0.8246395474024192, "grad_norm": 0.5167182683944702, "learning_rate": 7.31153139758448e-06, "loss": 0.0116, "step": 97660 }, { "epoch": 0.8247239872495831, "grad_norm": 0.32141047716140747, "learning_rate": 7.31087796437284e-06, "loss": 0.0078, "step": 97670 }, { "epoch": 0.824808427096747, "grad_norm": 0.06232735887169838, "learning_rate": 7.3102244809694036e-06, "loss": 0.0079, "step": 97680 }, { "epoch": 0.8248928669439108, "grad_norm": 1.1174544095993042, "learning_rate": 7.309570947388365e-06, "loss": 0.0123, "step": 97690 }, { "epoch": 0.8249773067910747, "grad_norm": 0.8025833368301392, "learning_rate": 7.30891736364392e-06, "loss": 0.0084, "step": 97700 }, { "epoch": 0.8250617466382386, "grad_norm": 0.8150472640991211, "learning_rate": 7.308263729750262e-06, "loss": 0.0083, "step": 97710 }, { "epoch": 0.8251461864854025, "grad_norm": 0.04850859194993973, "learning_rate": 7.307610045721589e-06, "loss": 0.0108, "step": 97720 }, { "epoch": 0.8252306263325664, "grad_norm": 0.5995544195175171, "learning_rate": 7.306956311572099e-06, "loss": 0.0089, "step": 97730 }, { "epoch": 0.8253150661797302, "grad_norm": 0.3896700441837311, "learning_rate": 7.30630252731599e-06, "loss": 0.0106, "step": 97740 }, { "epoch": 0.825399506026894, "grad_norm": 0.11731772869825363, "learning_rate": 7.305648692967463e-06, "loss": 0.0086, "step": 97750 }, { "epoch": 0.8254839458740579, "grad_norm": 0.7635310292243958, "learning_rate": 7.304994808540719e-06, "loss": 0.0139, "step": 97760 }, { "epoch": 0.8255683857212218, "grad_norm": 0.2681185007095337, "learning_rate": 7.304340874049959e-06, "loss": 0.0112, "step": 97770 }, { "epoch": 0.8256528255683857, "grad_norm": 0.45689812302589417, "learning_rate": 7.303686889509389e-06, "loss": 0.0096, "step": 97780 }, { "epoch": 0.8257372654155496, "grad_norm": 0.5322302579879761, "learning_rate": 7.303032854933211e-06, "loss": 0.0098, "step": 97790 }, { "epoch": 0.8258217052627135, "grad_norm": 0.4117896258831024, "learning_rate": 7.3023787703356305e-06, "loss": 0.0099, "step": 97800 }, { "epoch": 0.8259061451098774, "grad_norm": 0.14913558959960938, "learning_rate": 7.301724635730856e-06, "loss": 0.0078, "step": 97810 }, { "epoch": 0.8259905849570413, "grad_norm": 0.08513712137937546, "learning_rate": 7.301070451133094e-06, "loss": 0.0115, "step": 97820 }, { "epoch": 0.8260750248042051, "grad_norm": 0.2387849986553192, "learning_rate": 7.300416216556554e-06, "loss": 0.0176, "step": 97830 }, { "epoch": 0.826159464651369, "grad_norm": 0.4713267683982849, "learning_rate": 7.299761932015444e-06, "loss": 0.0054, "step": 97840 }, { "epoch": 0.8262439044985329, "grad_norm": 0.3299142122268677, "learning_rate": 7.299107597523977e-06, "loss": 0.0073, "step": 97850 }, { "epoch": 0.8263283443456967, "grad_norm": 0.212269589304924, "learning_rate": 7.298453213096363e-06, "loss": 0.0045, "step": 97860 }, { "epoch": 0.8264127841928606, "grad_norm": 0.2738371193408966, "learning_rate": 7.297798778746817e-06, "loss": 0.0118, "step": 97870 }, { "epoch": 0.8264972240400245, "grad_norm": 0.10433625429868698, "learning_rate": 7.297144294489552e-06, "loss": 0.0087, "step": 97880 }, { "epoch": 0.8265816638871883, "grad_norm": 0.8665652871131897, "learning_rate": 7.296489760338784e-06, "loss": 0.009, "step": 97890 }, { "epoch": 0.8266661037343522, "grad_norm": 0.11456038802862167, "learning_rate": 7.29583517630873e-06, "loss": 0.0043, "step": 97900 }, { "epoch": 0.8267505435815161, "grad_norm": 0.15074291825294495, "learning_rate": 7.2951805424136045e-06, "loss": 0.0076, "step": 97910 }, { "epoch": 0.82683498342868, "grad_norm": 0.3785398602485657, "learning_rate": 7.294525858667629e-06, "loss": 0.008, "step": 97920 }, { "epoch": 0.8269194232758439, "grad_norm": 0.6216462254524231, "learning_rate": 7.293871125085022e-06, "loss": 0.014, "step": 97930 }, { "epoch": 0.8270038631230078, "grad_norm": 0.18059588968753815, "learning_rate": 7.293216341680004e-06, "loss": 0.0122, "step": 97940 }, { "epoch": 0.8270883029701717, "grad_norm": 0.35422036051750183, "learning_rate": 7.292561508466798e-06, "loss": 0.012, "step": 97950 }, { "epoch": 0.8271727428173355, "grad_norm": 0.1895601451396942, "learning_rate": 7.291906625459624e-06, "loss": 0.014, "step": 97960 }, { "epoch": 0.8272571826644993, "grad_norm": 0.0895746499300003, "learning_rate": 7.291251692672708e-06, "loss": 0.0118, "step": 97970 }, { "epoch": 0.8273416225116632, "grad_norm": 0.2604452967643738, "learning_rate": 7.290596710120275e-06, "loss": 0.0107, "step": 97980 }, { "epoch": 0.8274260623588271, "grad_norm": 0.11374633014202118, "learning_rate": 7.289941677816552e-06, "loss": 0.0101, "step": 97990 }, { "epoch": 0.827510502205991, "grad_norm": 0.27202698588371277, "learning_rate": 7.289286595775764e-06, "loss": 0.009, "step": 98000 }, { "epoch": 0.8275949420531549, "grad_norm": 0.6454841494560242, "learning_rate": 7.28863146401214e-06, "loss": 0.0106, "step": 98010 }, { "epoch": 0.8276793819003188, "grad_norm": 0.505750298500061, "learning_rate": 7.28797628253991e-06, "loss": 0.0108, "step": 98020 }, { "epoch": 0.8277638217474826, "grad_norm": 0.2246260643005371, "learning_rate": 7.287321051373304e-06, "loss": 0.0048, "step": 98030 }, { "epoch": 0.8278482615946465, "grad_norm": 0.23113994300365448, "learning_rate": 7.286665770526553e-06, "loss": 0.0067, "step": 98040 }, { "epoch": 0.8279327014418104, "grad_norm": 0.46102941036224365, "learning_rate": 7.286010440013891e-06, "loss": 0.0083, "step": 98050 }, { "epoch": 0.8280171412889743, "grad_norm": 0.13142327964305878, "learning_rate": 7.285355059849551e-06, "loss": 0.0116, "step": 98060 }, { "epoch": 0.8281015811361382, "grad_norm": 0.06256221234798431, "learning_rate": 7.284699630047767e-06, "loss": 0.018, "step": 98070 }, { "epoch": 0.8281860209833021, "grad_norm": 0.8608816266059875, "learning_rate": 7.284044150622775e-06, "loss": 0.0121, "step": 98080 }, { "epoch": 0.8282704608304658, "grad_norm": 0.16078591346740723, "learning_rate": 7.283388621588811e-06, "loss": 0.0107, "step": 98090 }, { "epoch": 0.8283549006776297, "grad_norm": 0.072982057929039, "learning_rate": 7.282733042960115e-06, "loss": 0.0081, "step": 98100 }, { "epoch": 0.8284393405247936, "grad_norm": 0.2646220922470093, "learning_rate": 7.282077414750926e-06, "loss": 0.0123, "step": 98110 }, { "epoch": 0.8285237803719575, "grad_norm": 0.11981672048568726, "learning_rate": 7.2814217369754825e-06, "loss": 0.0119, "step": 98120 }, { "epoch": 0.8286082202191214, "grad_norm": 0.38488760590553284, "learning_rate": 7.280766009648026e-06, "loss": 0.0109, "step": 98130 }, { "epoch": 0.8286926600662853, "grad_norm": 0.2909594178199768, "learning_rate": 7.280110232782799e-06, "loss": 0.0063, "step": 98140 }, { "epoch": 0.8287770999134492, "grad_norm": 0.21383853256702423, "learning_rate": 7.279454406394045e-06, "loss": 0.0107, "step": 98150 }, { "epoch": 0.828861539760613, "grad_norm": 0.006804171483963728, "learning_rate": 7.27879853049601e-06, "loss": 0.0159, "step": 98160 }, { "epoch": 0.8289459796077769, "grad_norm": 0.004003411158919334, "learning_rate": 7.278142605102936e-06, "loss": 0.009, "step": 98170 }, { "epoch": 0.8290304194549408, "grad_norm": 0.1793626844882965, "learning_rate": 7.277486630229072e-06, "loss": 0.0091, "step": 98180 }, { "epoch": 0.8291148593021047, "grad_norm": 0.3467097878456116, "learning_rate": 7.276830605888665e-06, "loss": 0.0128, "step": 98190 }, { "epoch": 0.8291992991492685, "grad_norm": 0.19218474626541138, "learning_rate": 7.276174532095964e-06, "loss": 0.0073, "step": 98200 }, { "epoch": 0.8292837389964324, "grad_norm": 0.3940037786960602, "learning_rate": 7.275518408865219e-06, "loss": 0.0132, "step": 98210 }, { "epoch": 0.8293681788435963, "grad_norm": 0.42878979444503784, "learning_rate": 7.274862236210679e-06, "loss": 0.0063, "step": 98220 }, { "epoch": 0.8294526186907601, "grad_norm": 0.09018032997846603, "learning_rate": 7.2742060141466e-06, "loss": 0.0301, "step": 98230 }, { "epoch": 0.829537058537924, "grad_norm": 0.3370353579521179, "learning_rate": 7.273549742687229e-06, "loss": 0.0104, "step": 98240 }, { "epoch": 0.8296214983850879, "grad_norm": 0.4256303012371063, "learning_rate": 7.272893421846826e-06, "loss": 0.0072, "step": 98250 }, { "epoch": 0.8297059382322518, "grad_norm": 0.33834266662597656, "learning_rate": 7.272237051639642e-06, "loss": 0.0077, "step": 98260 }, { "epoch": 0.8297903780794157, "grad_norm": 0.09415960311889648, "learning_rate": 7.271580632079938e-06, "loss": 0.0104, "step": 98270 }, { "epoch": 0.8298748179265796, "grad_norm": 0.3720003664493561, "learning_rate": 7.270924163181965e-06, "loss": 0.0085, "step": 98280 }, { "epoch": 0.8299592577737435, "grad_norm": 0.4446553587913513, "learning_rate": 7.270267644959985e-06, "loss": 0.0092, "step": 98290 }, { "epoch": 0.8300436976209074, "grad_norm": 0.2176344394683838, "learning_rate": 7.269611077428258e-06, "loss": 0.0105, "step": 98300 }, { "epoch": 0.8301281374680712, "grad_norm": 0.5267664194107056, "learning_rate": 7.268954460601043e-06, "loss": 0.008, "step": 98310 }, { "epoch": 0.830212577315235, "grad_norm": 0.2310788631439209, "learning_rate": 7.268297794492603e-06, "loss": 0.01, "step": 98320 }, { "epoch": 0.8302970171623989, "grad_norm": 0.002625856315717101, "learning_rate": 7.267641079117198e-06, "loss": 0.0097, "step": 98330 }, { "epoch": 0.8303814570095628, "grad_norm": 0.22741009294986725, "learning_rate": 7.266984314489094e-06, "loss": 0.0085, "step": 98340 }, { "epoch": 0.8304658968567267, "grad_norm": 0.3582814335823059, "learning_rate": 7.266327500622555e-06, "loss": 0.0068, "step": 98350 }, { "epoch": 0.8305503367038906, "grad_norm": 0.3834122121334076, "learning_rate": 7.265670637531848e-06, "loss": 0.0163, "step": 98360 }, { "epoch": 0.8306347765510544, "grad_norm": 0.2552168071269989, "learning_rate": 7.265013725231238e-06, "loss": 0.0103, "step": 98370 }, { "epoch": 0.8307192163982183, "grad_norm": 0.2108568698167801, "learning_rate": 7.2643567637349944e-06, "loss": 0.015, "step": 98380 }, { "epoch": 0.8308036562453822, "grad_norm": 0.4329606294631958, "learning_rate": 7.263699753057387e-06, "loss": 0.0149, "step": 98390 }, { "epoch": 0.8308880960925461, "grad_norm": 0.1681220829486847, "learning_rate": 7.263042693212683e-06, "loss": 0.0086, "step": 98400 }, { "epoch": 0.83097253593971, "grad_norm": 0.3743755519390106, "learning_rate": 7.262385584215157e-06, "loss": 0.0104, "step": 98410 }, { "epoch": 0.8310569757868739, "grad_norm": 0.6492622494697571, "learning_rate": 7.261728426079079e-06, "loss": 0.013, "step": 98420 }, { "epoch": 0.8311414156340377, "grad_norm": 0.10399723798036575, "learning_rate": 7.261071218818723e-06, "loss": 0.0076, "step": 98430 }, { "epoch": 0.8312258554812015, "grad_norm": 0.4037829339504242, "learning_rate": 7.260413962448365e-06, "loss": 0.0083, "step": 98440 }, { "epoch": 0.8313102953283654, "grad_norm": 0.5899246335029602, "learning_rate": 7.259756656982276e-06, "loss": 0.0137, "step": 98450 }, { "epoch": 0.8313947351755293, "grad_norm": 0.2850863039493561, "learning_rate": 7.259099302434738e-06, "loss": 0.009, "step": 98460 }, { "epoch": 0.8314791750226932, "grad_norm": 0.39914238452911377, "learning_rate": 7.258441898820025e-06, "loss": 0.0087, "step": 98470 }, { "epoch": 0.8315636148698571, "grad_norm": 0.43015819787979126, "learning_rate": 7.257784446152416e-06, "loss": 0.0097, "step": 98480 }, { "epoch": 0.831648054717021, "grad_norm": 0.24147233366966248, "learning_rate": 7.257126944446193e-06, "loss": 0.0154, "step": 98490 }, { "epoch": 0.8317324945641849, "grad_norm": 0.4696778953075409, "learning_rate": 7.256469393715634e-06, "loss": 0.011, "step": 98500 }, { "epoch": 0.8318169344113487, "grad_norm": 0.3685465455055237, "learning_rate": 7.255811793975024e-06, "loss": 0.0074, "step": 98510 }, { "epoch": 0.8319013742585126, "grad_norm": 0.22873719036579132, "learning_rate": 7.255154145238642e-06, "loss": 0.0098, "step": 98520 }, { "epoch": 0.8319858141056765, "grad_norm": 0.3931303322315216, "learning_rate": 7.2544964475207754e-06, "loss": 0.0096, "step": 98530 }, { "epoch": 0.8320702539528404, "grad_norm": 0.4283108115196228, "learning_rate": 7.253838700835708e-06, "loss": 0.0113, "step": 98540 }, { "epoch": 0.8321546938000042, "grad_norm": 0.33155983686447144, "learning_rate": 7.253180905197725e-06, "loss": 0.0137, "step": 98550 }, { "epoch": 0.8322391336471681, "grad_norm": 0.4346943199634552, "learning_rate": 7.252523060621115e-06, "loss": 0.0163, "step": 98560 }, { "epoch": 0.832323573494332, "grad_norm": 0.1381939798593521, "learning_rate": 7.2518651671201655e-06, "loss": 0.0082, "step": 98570 }, { "epoch": 0.8324080133414958, "grad_norm": 0.22346021234989166, "learning_rate": 7.251207224709166e-06, "loss": 0.0147, "step": 98580 }, { "epoch": 0.8324924531886597, "grad_norm": 0.5102558732032776, "learning_rate": 7.250549233402406e-06, "loss": 0.0113, "step": 98590 }, { "epoch": 0.8325768930358236, "grad_norm": 0.4034309387207031, "learning_rate": 7.24989119321418e-06, "loss": 0.0105, "step": 98600 }, { "epoch": 0.8326613328829875, "grad_norm": 0.754840075969696, "learning_rate": 7.249233104158777e-06, "loss": 0.0114, "step": 98610 }, { "epoch": 0.8327457727301514, "grad_norm": 0.21625050902366638, "learning_rate": 7.248574966250493e-06, "loss": 0.0128, "step": 98620 }, { "epoch": 0.8328302125773153, "grad_norm": 0.406115859746933, "learning_rate": 7.24791677950362e-06, "loss": 0.0071, "step": 98630 }, { "epoch": 0.8329146524244792, "grad_norm": 0.22851352393627167, "learning_rate": 7.247258543932456e-06, "loss": 0.0134, "step": 98640 }, { "epoch": 0.832999092271643, "grad_norm": 0.21053236722946167, "learning_rate": 7.246600259551298e-06, "loss": 0.0146, "step": 98650 }, { "epoch": 0.8330835321188068, "grad_norm": 0.6197178363800049, "learning_rate": 7.2459419263744404e-06, "loss": 0.0172, "step": 98660 }, { "epoch": 0.8331679719659707, "grad_norm": 0.3625965714454651, "learning_rate": 7.245283544416185e-06, "loss": 0.0123, "step": 98670 }, { "epoch": 0.8332524118131346, "grad_norm": 0.36638492345809937, "learning_rate": 7.244625113690832e-06, "loss": 0.0127, "step": 98680 }, { "epoch": 0.8333368516602985, "grad_norm": 0.15885338187217712, "learning_rate": 7.24396663421268e-06, "loss": 0.0079, "step": 98690 }, { "epoch": 0.8334212915074624, "grad_norm": 0.370194673538208, "learning_rate": 7.243308105996034e-06, "loss": 0.0084, "step": 98700 }, { "epoch": 0.8335057313546262, "grad_norm": 0.36284497380256653, "learning_rate": 7.242649529055195e-06, "loss": 0.0117, "step": 98710 }, { "epoch": 0.8335901712017901, "grad_norm": 0.10360197722911835, "learning_rate": 7.241990903404469e-06, "loss": 0.0131, "step": 98720 }, { "epoch": 0.833674611048954, "grad_norm": 0.04987889528274536, "learning_rate": 7.241332229058158e-06, "loss": 0.0079, "step": 98730 }, { "epoch": 0.8337590508961179, "grad_norm": 0.2926906943321228, "learning_rate": 7.240673506030571e-06, "loss": 0.008, "step": 98740 }, { "epoch": 0.8338434907432818, "grad_norm": 0.12862958014011383, "learning_rate": 7.240014734336015e-06, "loss": 0.0109, "step": 98750 }, { "epoch": 0.8339279305904457, "grad_norm": 0.7086201310157776, "learning_rate": 7.2393559139887985e-06, "loss": 0.0078, "step": 98760 }, { "epoch": 0.8340123704376096, "grad_norm": 0.5897554159164429, "learning_rate": 7.23869704500323e-06, "loss": 0.0159, "step": 98770 }, { "epoch": 0.8340968102847733, "grad_norm": 0.21005083620548248, "learning_rate": 7.23803812739362e-06, "loss": 0.0101, "step": 98780 }, { "epoch": 0.8341812501319372, "grad_norm": 0.2943255603313446, "learning_rate": 7.237379161174281e-06, "loss": 0.0096, "step": 98790 }, { "epoch": 0.8342656899791011, "grad_norm": 0.1748390942811966, "learning_rate": 7.236720146359526e-06, "loss": 0.0067, "step": 98800 }, { "epoch": 0.834350129826265, "grad_norm": 0.5216745734214783, "learning_rate": 7.236061082963666e-06, "loss": 0.0052, "step": 98810 }, { "epoch": 0.8344345696734289, "grad_norm": 0.2474871426820755, "learning_rate": 7.235401971001017e-06, "loss": 0.0141, "step": 98820 }, { "epoch": 0.8345190095205928, "grad_norm": 0.5614848732948303, "learning_rate": 7.234742810485897e-06, "loss": 0.0092, "step": 98830 }, { "epoch": 0.8346034493677567, "grad_norm": 0.2489587813615799, "learning_rate": 7.234083601432621e-06, "loss": 0.0075, "step": 98840 }, { "epoch": 0.8346878892149205, "grad_norm": 0.2500274181365967, "learning_rate": 7.233424343855507e-06, "loss": 0.0117, "step": 98850 }, { "epoch": 0.8347723290620844, "grad_norm": 0.21460479497909546, "learning_rate": 7.232765037768872e-06, "loss": 0.0131, "step": 98860 }, { "epoch": 0.8348567689092483, "grad_norm": 0.0976046696305275, "learning_rate": 7.2321056831870386e-06, "loss": 0.0135, "step": 98870 }, { "epoch": 0.8349412087564122, "grad_norm": 0.2483498454093933, "learning_rate": 7.231446280124329e-06, "loss": 0.0096, "step": 98880 }, { "epoch": 0.835025648603576, "grad_norm": 0.5415226817131042, "learning_rate": 7.230786828595063e-06, "loss": 0.0107, "step": 98890 }, { "epoch": 0.8351100884507399, "grad_norm": 0.2719568908214569, "learning_rate": 7.230127328613563e-06, "loss": 0.0079, "step": 98900 }, { "epoch": 0.8351945282979037, "grad_norm": 0.574739933013916, "learning_rate": 7.229467780194155e-06, "loss": 0.017, "step": 98910 }, { "epoch": 0.8352789681450676, "grad_norm": 0.3553348779678345, "learning_rate": 7.228808183351164e-06, "loss": 0.0104, "step": 98920 }, { "epoch": 0.8353634079922315, "grad_norm": 0.3424268066883087, "learning_rate": 7.228148538098916e-06, "loss": 0.011, "step": 98930 }, { "epoch": 0.8354478478393954, "grad_norm": 0.007685559336096048, "learning_rate": 7.2274888444517375e-06, "loss": 0.0067, "step": 98940 }, { "epoch": 0.8355322876865593, "grad_norm": 0.18805472552776337, "learning_rate": 7.226829102423958e-06, "loss": 0.0108, "step": 98950 }, { "epoch": 0.8356167275337232, "grad_norm": 0.4567101299762726, "learning_rate": 7.226169312029907e-06, "loss": 0.0057, "step": 98960 }, { "epoch": 0.8357011673808871, "grad_norm": 0.24289683997631073, "learning_rate": 7.225509473283915e-06, "loss": 0.0132, "step": 98970 }, { "epoch": 0.835785607228051, "grad_norm": 0.22028599679470062, "learning_rate": 7.224849586200312e-06, "loss": 0.013, "step": 98980 }, { "epoch": 0.8358700470752148, "grad_norm": 0.13443320989608765, "learning_rate": 7.2241896507934324e-06, "loss": 0.0115, "step": 98990 }, { "epoch": 0.8359544869223787, "grad_norm": 0.6360293626785278, "learning_rate": 7.223529667077611e-06, "loss": 0.01, "step": 99000 }, { "epoch": 0.8360389267695425, "grad_norm": 0.2708393335342407, "learning_rate": 7.22286963506718e-06, "loss": 0.0094, "step": 99010 }, { "epoch": 0.8361233666167064, "grad_norm": 0.11556339263916016, "learning_rate": 7.222209554776476e-06, "loss": 0.0109, "step": 99020 }, { "epoch": 0.8362078064638703, "grad_norm": 0.2671622931957245, "learning_rate": 7.221549426219835e-06, "loss": 0.0123, "step": 99030 }, { "epoch": 0.8362922463110342, "grad_norm": 0.7287681102752686, "learning_rate": 7.220889249411595e-06, "loss": 0.0092, "step": 99040 }, { "epoch": 0.836376686158198, "grad_norm": 0.4092808663845062, "learning_rate": 7.220229024366099e-06, "loss": 0.0128, "step": 99050 }, { "epoch": 0.8364611260053619, "grad_norm": 0.32684823870658875, "learning_rate": 7.219568751097681e-06, "loss": 0.0095, "step": 99060 }, { "epoch": 0.8365455658525258, "grad_norm": 0.2671264111995697, "learning_rate": 7.218908429620685e-06, "loss": 0.0134, "step": 99070 }, { "epoch": 0.8366300056996897, "grad_norm": 0.20087289810180664, "learning_rate": 7.218248059949453e-06, "loss": 0.0101, "step": 99080 }, { "epoch": 0.8367144455468536, "grad_norm": 0.3822290003299713, "learning_rate": 7.217587642098327e-06, "loss": 0.0056, "step": 99090 }, { "epoch": 0.8367988853940175, "grad_norm": 0.295960009098053, "learning_rate": 7.216927176081652e-06, "loss": 0.0061, "step": 99100 }, { "epoch": 0.8368833252411814, "grad_norm": 0.21420319378376007, "learning_rate": 7.216266661913772e-06, "loss": 0.0085, "step": 99110 }, { "epoch": 0.8369677650883451, "grad_norm": 0.3769579827785492, "learning_rate": 7.215606099609036e-06, "loss": 0.0197, "step": 99120 }, { "epoch": 0.837052204935509, "grad_norm": 0.25335896015167236, "learning_rate": 7.2149454891817905e-06, "loss": 0.0079, "step": 99130 }, { "epoch": 0.8371366447826729, "grad_norm": 0.16779769957065582, "learning_rate": 7.214284830646381e-06, "loss": 0.0128, "step": 99140 }, { "epoch": 0.8372210846298368, "grad_norm": 0.42155516147613525, "learning_rate": 7.213624124017158e-06, "loss": 0.0097, "step": 99150 }, { "epoch": 0.8373055244770007, "grad_norm": 0.2681193947792053, "learning_rate": 7.212963369308475e-06, "loss": 0.0134, "step": 99160 }, { "epoch": 0.8373899643241646, "grad_norm": 0.29992565512657166, "learning_rate": 7.212302566534679e-06, "loss": 0.0146, "step": 99170 }, { "epoch": 0.8374744041713285, "grad_norm": 0.3149823844432831, "learning_rate": 7.211641715710125e-06, "loss": 0.0095, "step": 99180 }, { "epoch": 0.8375588440184923, "grad_norm": 0.20071427524089813, "learning_rate": 7.2109808168491666e-06, "loss": 0.0092, "step": 99190 }, { "epoch": 0.8376432838656562, "grad_norm": 0.25952181220054626, "learning_rate": 7.210319869966158e-06, "loss": 0.0094, "step": 99200 }, { "epoch": 0.8377277237128201, "grad_norm": 0.32363834977149963, "learning_rate": 7.209658875075455e-06, "loss": 0.0103, "step": 99210 }, { "epoch": 0.837812163559984, "grad_norm": 0.3029402494430542, "learning_rate": 7.208997832191413e-06, "loss": 0.0117, "step": 99220 }, { "epoch": 0.8378966034071479, "grad_norm": 0.21269062161445618, "learning_rate": 7.208336741328391e-06, "loss": 0.0099, "step": 99230 }, { "epoch": 0.8379810432543117, "grad_norm": 0.12846645712852478, "learning_rate": 7.207675602500749e-06, "loss": 0.0139, "step": 99240 }, { "epoch": 0.8380654831014756, "grad_norm": 0.19183050096035004, "learning_rate": 7.207014415722845e-06, "loss": 0.009, "step": 99250 }, { "epoch": 0.8381499229486394, "grad_norm": 0.37628889083862305, "learning_rate": 7.206353181009039e-06, "loss": 0.0134, "step": 99260 }, { "epoch": 0.8382343627958033, "grad_norm": 0.7061870694160461, "learning_rate": 7.205691898373694e-06, "loss": 0.0078, "step": 99270 }, { "epoch": 0.8383188026429672, "grad_norm": 0.23070457577705383, "learning_rate": 7.205030567831172e-06, "loss": 0.0089, "step": 99280 }, { "epoch": 0.8384032424901311, "grad_norm": 0.24035418033599854, "learning_rate": 7.204369189395841e-06, "loss": 0.0149, "step": 99290 }, { "epoch": 0.838487682337295, "grad_norm": 0.07565970718860626, "learning_rate": 7.203707763082062e-06, "loss": 0.0102, "step": 99300 }, { "epoch": 0.8385721221844589, "grad_norm": 0.2614142894744873, "learning_rate": 7.2030462889042e-06, "loss": 0.0093, "step": 99310 }, { "epoch": 0.8386565620316228, "grad_norm": 0.3471250534057617, "learning_rate": 7.202384766876625e-06, "loss": 0.0095, "step": 99320 }, { "epoch": 0.8387410018787866, "grad_norm": 0.34914541244506836, "learning_rate": 7.201723197013704e-06, "loss": 0.0136, "step": 99330 }, { "epoch": 0.8388254417259505, "grad_norm": 0.2653847634792328, "learning_rate": 7.2010615793298076e-06, "loss": 0.0139, "step": 99340 }, { "epoch": 0.8389098815731143, "grad_norm": 0.8185871243476868, "learning_rate": 7.2003999138393035e-06, "loss": 0.0107, "step": 99350 }, { "epoch": 0.8389943214202782, "grad_norm": 0.25430142879486084, "learning_rate": 7.199738200556565e-06, "loss": 0.0049, "step": 99360 }, { "epoch": 0.8390787612674421, "grad_norm": 0.04461457580327988, "learning_rate": 7.199076439495964e-06, "loss": 0.0076, "step": 99370 }, { "epoch": 0.839163201114606, "grad_norm": 0.28877946734428406, "learning_rate": 7.198414630671873e-06, "loss": 0.0215, "step": 99380 }, { "epoch": 0.8392476409617698, "grad_norm": 0.6796011328697205, "learning_rate": 7.197752774098665e-06, "loss": 0.0126, "step": 99390 }, { "epoch": 0.8393320808089337, "grad_norm": 0.43917176127433777, "learning_rate": 7.197090869790718e-06, "loss": 0.0094, "step": 99400 }, { "epoch": 0.8394165206560976, "grad_norm": 0.105918750166893, "learning_rate": 7.1964289177624084e-06, "loss": 0.0106, "step": 99410 }, { "epoch": 0.8395009605032615, "grad_norm": 0.08646237850189209, "learning_rate": 7.195766918028112e-06, "loss": 0.0089, "step": 99420 }, { "epoch": 0.8395854003504254, "grad_norm": 0.19466397166252136, "learning_rate": 7.195104870602208e-06, "loss": 0.0167, "step": 99430 }, { "epoch": 0.8396698401975893, "grad_norm": 1.0859122276306152, "learning_rate": 7.194442775499077e-06, "loss": 0.0105, "step": 99440 }, { "epoch": 0.8397542800447532, "grad_norm": 0.46283435821533203, "learning_rate": 7.193780632733098e-06, "loss": 0.0071, "step": 99450 }, { "epoch": 0.8398387198919169, "grad_norm": 0.6811185479164124, "learning_rate": 7.1931184423186525e-06, "loss": 0.0095, "step": 99460 }, { "epoch": 0.8399231597390808, "grad_norm": 0.6483776569366455, "learning_rate": 7.192456204270124e-06, "loss": 0.0084, "step": 99470 }, { "epoch": 0.8400075995862447, "grad_norm": 0.32827216386795044, "learning_rate": 7.191793918601898e-06, "loss": 0.0147, "step": 99480 }, { "epoch": 0.8400920394334086, "grad_norm": 0.1939389854669571, "learning_rate": 7.191131585328355e-06, "loss": 0.0112, "step": 99490 }, { "epoch": 0.8401764792805725, "grad_norm": 1.0476899147033691, "learning_rate": 7.190469204463885e-06, "loss": 0.0146, "step": 99500 }, { "epoch": 0.8402609191277364, "grad_norm": 0.293567419052124, "learning_rate": 7.189806776022871e-06, "loss": 0.0162, "step": 99510 }, { "epoch": 0.8403453589749003, "grad_norm": 0.9084519743919373, "learning_rate": 7.1891443000197025e-06, "loss": 0.0111, "step": 99520 }, { "epoch": 0.8404297988220641, "grad_norm": 0.1685532182455063, "learning_rate": 7.18848177646877e-06, "loss": 0.0051, "step": 99530 }, { "epoch": 0.840514238669228, "grad_norm": 0.5052360892295837, "learning_rate": 7.187819205384461e-06, "loss": 0.0217, "step": 99540 }, { "epoch": 0.8405986785163919, "grad_norm": 0.2937142550945282, "learning_rate": 7.187156586781165e-06, "loss": 0.0142, "step": 99550 }, { "epoch": 0.8406831183635558, "grad_norm": 0.28463485836982727, "learning_rate": 7.186493920673279e-06, "loss": 0.0108, "step": 99560 }, { "epoch": 0.8407675582107197, "grad_norm": 0.17719793319702148, "learning_rate": 7.185831207075192e-06, "loss": 0.0049, "step": 99570 }, { "epoch": 0.8408519980578835, "grad_norm": 0.5222830772399902, "learning_rate": 7.1851684460013e-06, "loss": 0.0106, "step": 99580 }, { "epoch": 0.8409364379050474, "grad_norm": 0.19129106402397156, "learning_rate": 7.184505637465995e-06, "loss": 0.0065, "step": 99590 }, { "epoch": 0.8410208777522112, "grad_norm": 0.5028334259986877, "learning_rate": 7.183842781483678e-06, "loss": 0.0205, "step": 99600 }, { "epoch": 0.8411053175993751, "grad_norm": 0.23174616694450378, "learning_rate": 7.183179878068741e-06, "loss": 0.0097, "step": 99610 }, { "epoch": 0.841189757446539, "grad_norm": 0.484767884016037, "learning_rate": 7.1825169272355865e-06, "loss": 0.0158, "step": 99620 }, { "epoch": 0.8412741972937029, "grad_norm": 1.3766721487045288, "learning_rate": 7.18185392899861e-06, "loss": 0.0149, "step": 99630 }, { "epoch": 0.8413586371408668, "grad_norm": 0.4032990038394928, "learning_rate": 7.181190883372213e-06, "loss": 0.0118, "step": 99640 }, { "epoch": 0.8414430769880307, "grad_norm": 0.5365789532661438, "learning_rate": 7.1805277903707975e-06, "loss": 0.0132, "step": 99650 }, { "epoch": 0.8415275168351946, "grad_norm": 0.0724346786737442, "learning_rate": 7.179864650008767e-06, "loss": 0.0171, "step": 99660 }, { "epoch": 0.8416119566823584, "grad_norm": 0.031197557225823402, "learning_rate": 7.17920146230052e-06, "loss": 0.0082, "step": 99670 }, { "epoch": 0.8416963965295223, "grad_norm": 0.29389727115631104, "learning_rate": 7.178538227260465e-06, "loss": 0.0146, "step": 99680 }, { "epoch": 0.8417808363766861, "grad_norm": 0.35239377617836, "learning_rate": 7.1778749449030075e-06, "loss": 0.0101, "step": 99690 }, { "epoch": 0.84186527622385, "grad_norm": 2.52107572555542, "learning_rate": 7.177211615242551e-06, "loss": 0.0139, "step": 99700 }, { "epoch": 0.8419497160710139, "grad_norm": 0.16032841801643372, "learning_rate": 7.176548238293503e-06, "loss": 0.0044, "step": 99710 }, { "epoch": 0.8420341559181778, "grad_norm": 0.27042949199676514, "learning_rate": 7.175884814070276e-06, "loss": 0.0157, "step": 99720 }, { "epoch": 0.8421185957653416, "grad_norm": 0.1939067840576172, "learning_rate": 7.175221342587276e-06, "loss": 0.006, "step": 99730 }, { "epoch": 0.8422030356125055, "grad_norm": 0.35053110122680664, "learning_rate": 7.174557823858914e-06, "loss": 0.0117, "step": 99740 }, { "epoch": 0.8422874754596694, "grad_norm": 0.2757541239261627, "learning_rate": 7.173894257899602e-06, "loss": 0.0146, "step": 99750 }, { "epoch": 0.8423719153068333, "grad_norm": 0.5353226661682129, "learning_rate": 7.173230644723751e-06, "loss": 0.0122, "step": 99760 }, { "epoch": 0.8424563551539972, "grad_norm": 0.7170132994651794, "learning_rate": 7.172566984345777e-06, "loss": 0.0105, "step": 99770 }, { "epoch": 0.8425407950011611, "grad_norm": 0.3771297335624695, "learning_rate": 7.171903276780092e-06, "loss": 0.0102, "step": 99780 }, { "epoch": 0.842625234848325, "grad_norm": 0.12475938349962234, "learning_rate": 7.171239522041113e-06, "loss": 0.0086, "step": 99790 }, { "epoch": 0.8427096746954889, "grad_norm": 0.3254238963127136, "learning_rate": 7.170575720143258e-06, "loss": 0.0178, "step": 99800 }, { "epoch": 0.8427941145426526, "grad_norm": 0.3408792018890381, "learning_rate": 7.169911871100942e-06, "loss": 0.0112, "step": 99810 }, { "epoch": 0.8428785543898165, "grad_norm": 0.5810567736625671, "learning_rate": 7.169247974928586e-06, "loss": 0.0124, "step": 99820 }, { "epoch": 0.8429629942369804, "grad_norm": 0.1754150688648224, "learning_rate": 7.168584031640607e-06, "loss": 0.0199, "step": 99830 }, { "epoch": 0.8430474340841443, "grad_norm": 0.25545430183410645, "learning_rate": 7.167920041251428e-06, "loss": 0.0131, "step": 99840 }, { "epoch": 0.8431318739313082, "grad_norm": 0.1461503952741623, "learning_rate": 7.16725600377547e-06, "loss": 0.008, "step": 99850 }, { "epoch": 0.8432163137784721, "grad_norm": 0.48765748739242554, "learning_rate": 7.166591919227156e-06, "loss": 0.016, "step": 99860 }, { "epoch": 0.843300753625636, "grad_norm": 0.11199339479207993, "learning_rate": 7.165927787620909e-06, "loss": 0.0116, "step": 99870 }, { "epoch": 0.8433851934727998, "grad_norm": 0.3258137106895447, "learning_rate": 7.165263608971154e-06, "loss": 0.0104, "step": 99880 }, { "epoch": 0.8434696333199637, "grad_norm": 0.2928480803966522, "learning_rate": 7.164599383292317e-06, "loss": 0.0083, "step": 99890 }, { "epoch": 0.8435540731671276, "grad_norm": 0.09315995872020721, "learning_rate": 7.163935110598825e-06, "loss": 0.0064, "step": 99900 }, { "epoch": 0.8436385130142915, "grad_norm": 0.2196534425020218, "learning_rate": 7.163270790905107e-06, "loss": 0.0094, "step": 99910 }, { "epoch": 0.8437229528614553, "grad_norm": 0.37583473324775696, "learning_rate": 7.162606424225589e-06, "loss": 0.0074, "step": 99920 }, { "epoch": 0.8438073927086192, "grad_norm": 0.17694103717803955, "learning_rate": 7.161942010574703e-06, "loss": 0.0059, "step": 99930 }, { "epoch": 0.843891832555783, "grad_norm": 0.18092747032642365, "learning_rate": 7.161277549966881e-06, "loss": 0.0061, "step": 99940 }, { "epoch": 0.8439762724029469, "grad_norm": 0.6316665410995483, "learning_rate": 7.160613042416553e-06, "loss": 0.0077, "step": 99950 }, { "epoch": 0.8440607122501108, "grad_norm": 0.6789823174476624, "learning_rate": 7.159948487938152e-06, "loss": 0.0113, "step": 99960 }, { "epoch": 0.8441451520972747, "grad_norm": 0.40551960468292236, "learning_rate": 7.159283886546112e-06, "loss": 0.0112, "step": 99970 }, { "epoch": 0.8442295919444386, "grad_norm": 0.42844998836517334, "learning_rate": 7.158619238254871e-06, "loss": 0.0063, "step": 99980 }, { "epoch": 0.8443140317916025, "grad_norm": 0.4408285319805145, "learning_rate": 7.15795454307886e-06, "loss": 0.0127, "step": 99990 }, { "epoch": 0.8443984716387664, "grad_norm": 0.24868325889110565, "learning_rate": 7.157289801032519e-06, "loss": 0.0062, "step": 100000 }, { "epoch": 0.8444829114859302, "grad_norm": 0.24803663790225983, "learning_rate": 7.156625012130285e-06, "loss": 0.0128, "step": 100010 }, { "epoch": 0.8445673513330941, "grad_norm": 0.5035736560821533, "learning_rate": 7.155960176386598e-06, "loss": 0.0121, "step": 100020 }, { "epoch": 0.844651791180258, "grad_norm": 0.037863779813051224, "learning_rate": 7.155295293815898e-06, "loss": 0.0072, "step": 100030 }, { "epoch": 0.8447362310274218, "grad_norm": 0.18029144406318665, "learning_rate": 7.154630364432625e-06, "loss": 0.0117, "step": 100040 }, { "epoch": 0.8448206708745857, "grad_norm": 0.2646721303462982, "learning_rate": 7.153965388251223e-06, "loss": 0.0113, "step": 100050 }, { "epoch": 0.8449051107217496, "grad_norm": 0.34958600997924805, "learning_rate": 7.1533003652861355e-06, "loss": 0.0062, "step": 100060 }, { "epoch": 0.8449895505689135, "grad_norm": 0.014294667169451714, "learning_rate": 7.1526352955518044e-06, "loss": 0.0121, "step": 100070 }, { "epoch": 0.8450739904160773, "grad_norm": 0.30959850549697876, "learning_rate": 7.151970179062675e-06, "loss": 0.0077, "step": 100080 }, { "epoch": 0.8451584302632412, "grad_norm": 0.44595983624458313, "learning_rate": 7.151305015833195e-06, "loss": 0.0124, "step": 100090 }, { "epoch": 0.8452428701104051, "grad_norm": 0.10136277228593826, "learning_rate": 7.150639805877809e-06, "loss": 0.0068, "step": 100100 }, { "epoch": 0.845327309957569, "grad_norm": 0.2258506417274475, "learning_rate": 7.14997454921097e-06, "loss": 0.0163, "step": 100110 }, { "epoch": 0.8454117498047329, "grad_norm": 0.48463159799575806, "learning_rate": 7.149309245847123e-06, "loss": 0.0133, "step": 100120 }, { "epoch": 0.8454961896518968, "grad_norm": 0.22440488636493683, "learning_rate": 7.148643895800719e-06, "loss": 0.0074, "step": 100130 }, { "epoch": 0.8455806294990607, "grad_norm": 0.19744446873664856, "learning_rate": 7.14797849908621e-06, "loss": 0.0092, "step": 100140 }, { "epoch": 0.8456650693462244, "grad_norm": 0.4671105146408081, "learning_rate": 7.147313055718049e-06, "loss": 0.0188, "step": 100150 }, { "epoch": 0.8457495091933883, "grad_norm": 0.5012025833129883, "learning_rate": 7.146647565710687e-06, "loss": 0.007, "step": 100160 }, { "epoch": 0.8458339490405522, "grad_norm": 0.31299328804016113, "learning_rate": 7.145982029078581e-06, "loss": 0.0102, "step": 100170 }, { "epoch": 0.8459183888877161, "grad_norm": 0.366100013256073, "learning_rate": 7.145316445836185e-06, "loss": 0.0122, "step": 100180 }, { "epoch": 0.84600282873488, "grad_norm": 0.4411545693874359, "learning_rate": 7.144650815997957e-06, "loss": 0.021, "step": 100190 }, { "epoch": 0.8460872685820439, "grad_norm": 0.28266018629074097, "learning_rate": 7.143985139578351e-06, "loss": 0.0081, "step": 100200 }, { "epoch": 0.8461717084292077, "grad_norm": 0.4930814504623413, "learning_rate": 7.143319416591826e-06, "loss": 0.0085, "step": 100210 }, { "epoch": 0.8462561482763716, "grad_norm": 0.47055917978286743, "learning_rate": 7.142653647052844e-06, "loss": 0.0088, "step": 100220 }, { "epoch": 0.8463405881235355, "grad_norm": 0.4062653183937073, "learning_rate": 7.141987830975864e-06, "loss": 0.0089, "step": 100230 }, { "epoch": 0.8464250279706994, "grad_norm": 0.21276533603668213, "learning_rate": 7.141321968375346e-06, "loss": 0.0105, "step": 100240 }, { "epoch": 0.8465094678178633, "grad_norm": 0.21933192014694214, "learning_rate": 7.140656059265755e-06, "loss": 0.006, "step": 100250 }, { "epoch": 0.8465939076650272, "grad_norm": 0.1753549873828888, "learning_rate": 7.139990103661552e-06, "loss": 0.0081, "step": 100260 }, { "epoch": 0.846678347512191, "grad_norm": 0.840157151222229, "learning_rate": 7.139324101577204e-06, "loss": 0.0056, "step": 100270 }, { "epoch": 0.8467627873593548, "grad_norm": 0.33778318762779236, "learning_rate": 7.138658053027173e-06, "loss": 0.0104, "step": 100280 }, { "epoch": 0.8468472272065187, "grad_norm": 0.3883293569087982, "learning_rate": 7.1379919580259285e-06, "loss": 0.0158, "step": 100290 }, { "epoch": 0.8469316670536826, "grad_norm": 0.1561940610408783, "learning_rate": 7.1373258165879365e-06, "loss": 0.0097, "step": 100300 }, { "epoch": 0.8470161069008465, "grad_norm": 0.11887510865926743, "learning_rate": 7.136659628727667e-06, "loss": 0.0082, "step": 100310 }, { "epoch": 0.8471005467480104, "grad_norm": 0.9881830215454102, "learning_rate": 7.135993394459586e-06, "loss": 0.0224, "step": 100320 }, { "epoch": 0.8471849865951743, "grad_norm": 0.32925331592559814, "learning_rate": 7.135327113798167e-06, "loss": 0.0143, "step": 100330 }, { "epoch": 0.8472694264423382, "grad_norm": 0.12452006340026855, "learning_rate": 7.134660786757881e-06, "loss": 0.0111, "step": 100340 }, { "epoch": 0.847353866289502, "grad_norm": 0.1111137792468071, "learning_rate": 7.1339944133532e-06, "loss": 0.0206, "step": 100350 }, { "epoch": 0.8474383061366659, "grad_norm": 0.17728807032108307, "learning_rate": 7.1333279935985975e-06, "loss": 0.009, "step": 100360 }, { "epoch": 0.8475227459838298, "grad_norm": 0.3419250547885895, "learning_rate": 7.132661527508549e-06, "loss": 0.0164, "step": 100370 }, { "epoch": 0.8476071858309936, "grad_norm": 0.41455990076065063, "learning_rate": 7.131995015097527e-06, "loss": 0.0113, "step": 100380 }, { "epoch": 0.8476916256781575, "grad_norm": 0.29121682047843933, "learning_rate": 7.1313284563800126e-06, "loss": 0.0093, "step": 100390 }, { "epoch": 0.8477760655253214, "grad_norm": 0.4263349175453186, "learning_rate": 7.1306618513704795e-06, "loss": 0.0096, "step": 100400 }, { "epoch": 0.8478605053724853, "grad_norm": 0.028584159910678864, "learning_rate": 7.129995200083407e-06, "loss": 0.0093, "step": 100410 }, { "epoch": 0.8479449452196491, "grad_norm": 0.17749258875846863, "learning_rate": 7.129328502533276e-06, "loss": 0.0072, "step": 100420 }, { "epoch": 0.848029385066813, "grad_norm": 0.5334484577178955, "learning_rate": 7.128661758734569e-06, "loss": 0.0106, "step": 100430 }, { "epoch": 0.8481138249139769, "grad_norm": 0.26478341221809387, "learning_rate": 7.1279949687017614e-06, "loss": 0.0136, "step": 100440 }, { "epoch": 0.8481982647611408, "grad_norm": 0.30641239881515503, "learning_rate": 7.1273281324493395e-06, "loss": 0.0042, "step": 100450 }, { "epoch": 0.8482827046083047, "grad_norm": 0.4658963680267334, "learning_rate": 7.126661249991786e-06, "loss": 0.0128, "step": 100460 }, { "epoch": 0.8483671444554686, "grad_norm": 0.17021353542804718, "learning_rate": 7.125994321343588e-06, "loss": 0.0118, "step": 100470 }, { "epoch": 0.8484515843026325, "grad_norm": 0.18313753604888916, "learning_rate": 7.125327346519229e-06, "loss": 0.0091, "step": 100480 }, { "epoch": 0.8485360241497963, "grad_norm": 0.5794776082038879, "learning_rate": 7.1246603255331925e-06, "loss": 0.011, "step": 100490 }, { "epoch": 0.8486204639969601, "grad_norm": 0.36613017320632935, "learning_rate": 7.123993258399971e-06, "loss": 0.011, "step": 100500 }, { "epoch": 0.848704903844124, "grad_norm": 0.6157810091972351, "learning_rate": 7.123326145134052e-06, "loss": 0.0143, "step": 100510 }, { "epoch": 0.8487893436912879, "grad_norm": 0.16160860657691956, "learning_rate": 7.122658985749924e-06, "loss": 0.0048, "step": 100520 }, { "epoch": 0.8488737835384518, "grad_norm": 0.46145692467689514, "learning_rate": 7.121991780262076e-06, "loss": 0.0129, "step": 100530 }, { "epoch": 0.8489582233856157, "grad_norm": 0.21356818079948425, "learning_rate": 7.121324528685003e-06, "loss": 0.0061, "step": 100540 }, { "epoch": 0.8490426632327795, "grad_norm": 0.16285046935081482, "learning_rate": 7.120657231033196e-06, "loss": 0.0082, "step": 100550 }, { "epoch": 0.8491271030799434, "grad_norm": 0.2533700466156006, "learning_rate": 7.119989887321148e-06, "loss": 0.0085, "step": 100560 }, { "epoch": 0.8492115429271073, "grad_norm": 0.3388354778289795, "learning_rate": 7.119322497563354e-06, "loss": 0.0069, "step": 100570 }, { "epoch": 0.8492959827742712, "grad_norm": 0.20663441717624664, "learning_rate": 7.11865506177431e-06, "loss": 0.0045, "step": 100580 }, { "epoch": 0.8493804226214351, "grad_norm": 0.08427207171916962, "learning_rate": 7.117987579968513e-06, "loss": 0.0067, "step": 100590 }, { "epoch": 0.849464862468599, "grad_norm": 0.0036467942409217358, "learning_rate": 7.117320052160458e-06, "loss": 0.0084, "step": 100600 }, { "epoch": 0.8495493023157628, "grad_norm": 0.14151234924793243, "learning_rate": 7.116652478364647e-06, "loss": 0.0127, "step": 100610 }, { "epoch": 0.8496337421629266, "grad_norm": 0.9535735249519348, "learning_rate": 7.115984858595576e-06, "loss": 0.0192, "step": 100620 }, { "epoch": 0.8497181820100905, "grad_norm": 0.3413408696651459, "learning_rate": 7.115317192867749e-06, "loss": 0.0092, "step": 100630 }, { "epoch": 0.8498026218572544, "grad_norm": 0.06182561069726944, "learning_rate": 7.114649481195665e-06, "loss": 0.0094, "step": 100640 }, { "epoch": 0.8498870617044183, "grad_norm": 0.1358458250761032, "learning_rate": 7.113981723593828e-06, "loss": 0.0069, "step": 100650 }, { "epoch": 0.8499715015515822, "grad_norm": 0.4201578199863434, "learning_rate": 7.113313920076742e-06, "loss": 0.0144, "step": 100660 }, { "epoch": 0.8500559413987461, "grad_norm": 0.5797721147537231, "learning_rate": 7.11264607065891e-06, "loss": 0.0122, "step": 100670 }, { "epoch": 0.85014038124591, "grad_norm": 0.12341813743114471, "learning_rate": 7.111978175354839e-06, "loss": 0.0086, "step": 100680 }, { "epoch": 0.8502248210930738, "grad_norm": 0.30694201588630676, "learning_rate": 7.1113102341790345e-06, "loss": 0.015, "step": 100690 }, { "epoch": 0.8503092609402377, "grad_norm": 0.5761157274246216, "learning_rate": 7.110642247146004e-06, "loss": 0.0123, "step": 100700 }, { "epoch": 0.8503937007874016, "grad_norm": 0.3828522861003876, "learning_rate": 7.1099742142702556e-06, "loss": 0.0073, "step": 100710 }, { "epoch": 0.8504781406345655, "grad_norm": 0.2480269968509674, "learning_rate": 7.109306135566301e-06, "loss": 0.0086, "step": 100720 }, { "epoch": 0.8505625804817293, "grad_norm": 0.4190830886363983, "learning_rate": 7.108638011048648e-06, "loss": 0.0102, "step": 100730 }, { "epoch": 0.8506470203288932, "grad_norm": 0.3848688304424286, "learning_rate": 7.10796984073181e-06, "loss": 0.0091, "step": 100740 }, { "epoch": 0.850731460176057, "grad_norm": 0.42599692940711975, "learning_rate": 7.107301624630301e-06, "loss": 0.0076, "step": 100750 }, { "epoch": 0.8508159000232209, "grad_norm": 0.4551580250263214, "learning_rate": 7.10663336275863e-06, "loss": 0.0096, "step": 100760 }, { "epoch": 0.8509003398703848, "grad_norm": 0.3138625919818878, "learning_rate": 7.105965055131314e-06, "loss": 0.0114, "step": 100770 }, { "epoch": 0.8509847797175487, "grad_norm": 0.40874433517456055, "learning_rate": 7.10529670176287e-06, "loss": 0.01, "step": 100780 }, { "epoch": 0.8510692195647126, "grad_norm": 0.2725130617618561, "learning_rate": 7.104628302667813e-06, "loss": 0.0089, "step": 100790 }, { "epoch": 0.8511536594118765, "grad_norm": 0.18215689063072205, "learning_rate": 7.1039598578606605e-06, "loss": 0.0092, "step": 100800 }, { "epoch": 0.8512380992590404, "grad_norm": 0.5249031186103821, "learning_rate": 7.103291367355931e-06, "loss": 0.0087, "step": 100810 }, { "epoch": 0.8513225391062043, "grad_norm": 0.37232470512390137, "learning_rate": 7.102622831168142e-06, "loss": 0.0116, "step": 100820 }, { "epoch": 0.8514069789533681, "grad_norm": 0.21028843522071838, "learning_rate": 7.101954249311818e-06, "loss": 0.0083, "step": 100830 }, { "epoch": 0.8514914188005319, "grad_norm": 0.3257249891757965, "learning_rate": 7.101285621801479e-06, "loss": 0.0068, "step": 100840 }, { "epoch": 0.8515758586476958, "grad_norm": 0.6703994274139404, "learning_rate": 7.100616948651645e-06, "loss": 0.013, "step": 100850 }, { "epoch": 0.8516602984948597, "grad_norm": 0.23919284343719482, "learning_rate": 7.099948229876843e-06, "loss": 0.0063, "step": 100860 }, { "epoch": 0.8517447383420236, "grad_norm": 0.038221534341573715, "learning_rate": 7.0992794654915945e-06, "loss": 0.0133, "step": 100870 }, { "epoch": 0.8518291781891875, "grad_norm": 0.3235226571559906, "learning_rate": 7.0986106555104275e-06, "loss": 0.0098, "step": 100880 }, { "epoch": 0.8519136180363513, "grad_norm": 0.5375468134880066, "learning_rate": 7.097941799947865e-06, "loss": 0.017, "step": 100890 }, { "epoch": 0.8519980578835152, "grad_norm": 0.26464518904685974, "learning_rate": 7.097272898818439e-06, "loss": 0.0086, "step": 100900 }, { "epoch": 0.8520824977306791, "grad_norm": 0.5436113476753235, "learning_rate": 7.096603952136674e-06, "loss": 0.007, "step": 100910 }, { "epoch": 0.852166937577843, "grad_norm": 0.3211553692817688, "learning_rate": 7.095934959917102e-06, "loss": 0.0065, "step": 100920 }, { "epoch": 0.8522513774250069, "grad_norm": 0.08108919858932495, "learning_rate": 7.09526592217425e-06, "loss": 0.0069, "step": 100930 }, { "epoch": 0.8523358172721708, "grad_norm": 0.11307235807180405, "learning_rate": 7.094596838922654e-06, "loss": 0.0123, "step": 100940 }, { "epoch": 0.8524202571193347, "grad_norm": 0.771849513053894, "learning_rate": 7.093927710176841e-06, "loss": 0.0199, "step": 100950 }, { "epoch": 0.8525046969664984, "grad_norm": 0.2994544506072998, "learning_rate": 7.0932585359513505e-06, "loss": 0.0126, "step": 100960 }, { "epoch": 0.8525891368136623, "grad_norm": 0.29438528418540955, "learning_rate": 7.092589316260712e-06, "loss": 0.0122, "step": 100970 }, { "epoch": 0.8526735766608262, "grad_norm": 0.06995081156492233, "learning_rate": 7.091920051119461e-06, "loss": 0.0089, "step": 100980 }, { "epoch": 0.8527580165079901, "grad_norm": 0.28171682357788086, "learning_rate": 7.091250740542136e-06, "loss": 0.0068, "step": 100990 }, { "epoch": 0.852842456355154, "grad_norm": 0.14799253642559052, "learning_rate": 7.090581384543275e-06, "loss": 0.0078, "step": 101000 }, { "epoch": 0.8529268962023179, "grad_norm": 0.32804861664772034, "learning_rate": 7.089911983137413e-06, "loss": 0.0071, "step": 101010 }, { "epoch": 0.8530113360494818, "grad_norm": 0.43676498532295227, "learning_rate": 7.089242536339093e-06, "loss": 0.0092, "step": 101020 }, { "epoch": 0.8530957758966456, "grad_norm": 0.4286590814590454, "learning_rate": 7.088573044162851e-06, "loss": 0.0127, "step": 101030 }, { "epoch": 0.8531802157438095, "grad_norm": 0.22786825895309448, "learning_rate": 7.087903506623232e-06, "loss": 0.0127, "step": 101040 }, { "epoch": 0.8532646555909734, "grad_norm": 0.1458866447210312, "learning_rate": 7.087233923734776e-06, "loss": 0.0089, "step": 101050 }, { "epoch": 0.8533490954381373, "grad_norm": 0.9789260625839233, "learning_rate": 7.086564295512026e-06, "loss": 0.0128, "step": 101060 }, { "epoch": 0.8534335352853011, "grad_norm": 0.32432398200035095, "learning_rate": 7.0858946219695275e-06, "loss": 0.0165, "step": 101070 }, { "epoch": 0.853517975132465, "grad_norm": 0.5863024592399597, "learning_rate": 7.085224903121827e-06, "loss": 0.017, "step": 101080 }, { "epoch": 0.8536024149796289, "grad_norm": 0.32839077711105347, "learning_rate": 7.084555138983467e-06, "loss": 0.0085, "step": 101090 }, { "epoch": 0.8536868548267927, "grad_norm": 0.2495327740907669, "learning_rate": 7.083885329568997e-06, "loss": 0.0106, "step": 101100 }, { "epoch": 0.8537712946739566, "grad_norm": 0.15361367166042328, "learning_rate": 7.0832154748929635e-06, "loss": 0.0128, "step": 101110 }, { "epoch": 0.8538557345211205, "grad_norm": 0.32511165738105774, "learning_rate": 7.08254557496992e-06, "loss": 0.01, "step": 101120 }, { "epoch": 0.8539401743682844, "grad_norm": 0.20680706202983856, "learning_rate": 7.081875629814412e-06, "loss": 0.0101, "step": 101130 }, { "epoch": 0.8540246142154483, "grad_norm": 0.9847528338432312, "learning_rate": 7.0812056394409915e-06, "loss": 0.0216, "step": 101140 }, { "epoch": 0.8541090540626122, "grad_norm": 0.18282757699489594, "learning_rate": 7.080535603864211e-06, "loss": 0.0122, "step": 101150 }, { "epoch": 0.8541934939097761, "grad_norm": 0.24521403014659882, "learning_rate": 7.079865523098623e-06, "loss": 0.016, "step": 101160 }, { "epoch": 0.8542779337569399, "grad_norm": 0.18475040793418884, "learning_rate": 7.079195397158782e-06, "loss": 0.0119, "step": 101170 }, { "epoch": 0.8543623736041038, "grad_norm": 0.4782388210296631, "learning_rate": 7.078525226059244e-06, "loss": 0.0144, "step": 101180 }, { "epoch": 0.8544468134512676, "grad_norm": 0.5945777893066406, "learning_rate": 7.077855009814563e-06, "loss": 0.0075, "step": 101190 }, { "epoch": 0.8545312532984315, "grad_norm": 0.08694978058338165, "learning_rate": 7.077184748439298e-06, "loss": 0.0202, "step": 101200 }, { "epoch": 0.8546156931455954, "grad_norm": 0.38560548424720764, "learning_rate": 7.076514441948006e-06, "loss": 0.0138, "step": 101210 }, { "epoch": 0.8547001329927593, "grad_norm": 0.45623674988746643, "learning_rate": 7.075844090355244e-06, "loss": 0.0112, "step": 101220 }, { "epoch": 0.8547845728399232, "grad_norm": 0.17957139015197754, "learning_rate": 7.075173693675575e-06, "loss": 0.0112, "step": 101230 }, { "epoch": 0.854869012687087, "grad_norm": 0.329269140958786, "learning_rate": 7.074503251923559e-06, "loss": 0.0105, "step": 101240 }, { "epoch": 0.8549534525342509, "grad_norm": 0.1363704949617386, "learning_rate": 7.073832765113758e-06, "loss": 0.0101, "step": 101250 }, { "epoch": 0.8550378923814148, "grad_norm": 0.07771438360214233, "learning_rate": 7.073162233260733e-06, "loss": 0.0136, "step": 101260 }, { "epoch": 0.8551223322285787, "grad_norm": 0.2274869978427887, "learning_rate": 7.072491656379049e-06, "loss": 0.0087, "step": 101270 }, { "epoch": 0.8552067720757426, "grad_norm": 0.1132017970085144, "learning_rate": 7.071821034483272e-06, "loss": 0.0062, "step": 101280 }, { "epoch": 0.8552912119229065, "grad_norm": 0.18344546854496002, "learning_rate": 7.0711503675879664e-06, "loss": 0.0087, "step": 101290 }, { "epoch": 0.8553756517700702, "grad_norm": 0.2740356922149658, "learning_rate": 7.0704796557076985e-06, "loss": 0.0069, "step": 101300 }, { "epoch": 0.8554600916172341, "grad_norm": 0.20169714093208313, "learning_rate": 7.069808898857037e-06, "loss": 0.0106, "step": 101310 }, { "epoch": 0.855544531464398, "grad_norm": 0.40324267745018005, "learning_rate": 7.069138097050551e-06, "loss": 0.0132, "step": 101320 }, { "epoch": 0.8556289713115619, "grad_norm": 0.4160560667514801, "learning_rate": 7.06846725030281e-06, "loss": 0.0128, "step": 101330 }, { "epoch": 0.8557134111587258, "grad_norm": 0.1435122936964035, "learning_rate": 7.0677963586283824e-06, "loss": 0.0046, "step": 101340 }, { "epoch": 0.8557978510058897, "grad_norm": 0.4133776128292084, "learning_rate": 7.067125422041843e-06, "loss": 0.0103, "step": 101350 }, { "epoch": 0.8558822908530536, "grad_norm": 0.7573254108428955, "learning_rate": 7.066454440557763e-06, "loss": 0.0166, "step": 101360 }, { "epoch": 0.8559667307002174, "grad_norm": 0.22076773643493652, "learning_rate": 7.065783414190716e-06, "loss": 0.0178, "step": 101370 }, { "epoch": 0.8560511705473813, "grad_norm": 0.4713674783706665, "learning_rate": 7.065112342955276e-06, "loss": 0.022, "step": 101380 }, { "epoch": 0.8561356103945452, "grad_norm": 0.2011871188879013, "learning_rate": 7.064441226866019e-06, "loss": 0.0097, "step": 101390 }, { "epoch": 0.8562200502417091, "grad_norm": 0.5603745579719543, "learning_rate": 7.063770065937521e-06, "loss": 0.0118, "step": 101400 }, { "epoch": 0.856304490088873, "grad_norm": 0.23252913355827332, "learning_rate": 7.063098860184362e-06, "loss": 0.0149, "step": 101410 }, { "epoch": 0.8563889299360368, "grad_norm": 0.6807208061218262, "learning_rate": 7.0624276096211166e-06, "loss": 0.0112, "step": 101420 }, { "epoch": 0.8564733697832007, "grad_norm": 0.2439694106578827, "learning_rate": 7.061756314262367e-06, "loss": 0.0104, "step": 101430 }, { "epoch": 0.8565578096303645, "grad_norm": 0.14239171147346497, "learning_rate": 7.061084974122693e-06, "loss": 0.0091, "step": 101440 }, { "epoch": 0.8566422494775284, "grad_norm": 0.3297477066516876, "learning_rate": 7.0604135892166755e-06, "loss": 0.0091, "step": 101450 }, { "epoch": 0.8567266893246923, "grad_norm": 0.7925897240638733, "learning_rate": 7.0597421595588966e-06, "loss": 0.0073, "step": 101460 }, { "epoch": 0.8568111291718562, "grad_norm": 0.4182889759540558, "learning_rate": 7.05907068516394e-06, "loss": 0.0223, "step": 101470 }, { "epoch": 0.8568955690190201, "grad_norm": 0.10440466552972794, "learning_rate": 7.05839916604639e-06, "loss": 0.0142, "step": 101480 }, { "epoch": 0.856980008866184, "grad_norm": 0.3709484338760376, "learning_rate": 7.057727602220835e-06, "loss": 0.0079, "step": 101490 }, { "epoch": 0.8570644487133479, "grad_norm": 0.17881526052951813, "learning_rate": 7.057055993701855e-06, "loss": 0.0107, "step": 101500 }, { "epoch": 0.8571488885605117, "grad_norm": 0.40057721734046936, "learning_rate": 7.056384340504041e-06, "loss": 0.01, "step": 101510 }, { "epoch": 0.8572333284076756, "grad_norm": 0.26200324296951294, "learning_rate": 7.05571264264198e-06, "loss": 0.0139, "step": 101520 }, { "epoch": 0.8573177682548394, "grad_norm": 0.2265867292881012, "learning_rate": 7.0550409001302634e-06, "loss": 0.0044, "step": 101530 }, { "epoch": 0.8574022081020033, "grad_norm": 0.21235515177249908, "learning_rate": 7.054369112983477e-06, "loss": 0.0081, "step": 101540 }, { "epoch": 0.8574866479491672, "grad_norm": 0.24731676280498505, "learning_rate": 7.053697281216217e-06, "loss": 0.0083, "step": 101550 }, { "epoch": 0.8575710877963311, "grad_norm": 0.25776582956314087, "learning_rate": 7.05302540484307e-06, "loss": 0.0325, "step": 101560 }, { "epoch": 0.857655527643495, "grad_norm": 0.2706637680530548, "learning_rate": 7.052353483878634e-06, "loss": 0.0062, "step": 101570 }, { "epoch": 0.8577399674906588, "grad_norm": 0.542564868927002, "learning_rate": 7.051681518337501e-06, "loss": 0.0147, "step": 101580 }, { "epoch": 0.8578244073378227, "grad_norm": 0.4440741539001465, "learning_rate": 7.051009508234265e-06, "loss": 0.0111, "step": 101590 }, { "epoch": 0.8579088471849866, "grad_norm": 0.2741488516330719, "learning_rate": 7.050337453583522e-06, "loss": 0.0124, "step": 101600 }, { "epoch": 0.8579932870321505, "grad_norm": 0.3463991582393646, "learning_rate": 7.0496653543998715e-06, "loss": 0.0123, "step": 101610 }, { "epoch": 0.8580777268793144, "grad_norm": 0.31736624240875244, "learning_rate": 7.04899321069791e-06, "loss": 0.0128, "step": 101620 }, { "epoch": 0.8581621667264783, "grad_norm": 0.40133655071258545, "learning_rate": 7.048321022492234e-06, "loss": 0.0052, "step": 101630 }, { "epoch": 0.8582466065736422, "grad_norm": 1.135462760925293, "learning_rate": 7.047648789797446e-06, "loss": 0.0147, "step": 101640 }, { "epoch": 0.8583310464208059, "grad_norm": 0.4625755250453949, "learning_rate": 7.046976512628145e-06, "loss": 0.0104, "step": 101650 }, { "epoch": 0.8584154862679698, "grad_norm": 0.16241993010044098, "learning_rate": 7.046304190998935e-06, "loss": 0.0103, "step": 101660 }, { "epoch": 0.8584999261151337, "grad_norm": 0.3974888324737549, "learning_rate": 7.045631824924416e-06, "loss": 0.0071, "step": 101670 }, { "epoch": 0.8585843659622976, "grad_norm": 0.7321981191635132, "learning_rate": 7.044959414419194e-06, "loss": 0.017, "step": 101680 }, { "epoch": 0.8586688058094615, "grad_norm": 0.17794294655323029, "learning_rate": 7.044286959497873e-06, "loss": 0.0173, "step": 101690 }, { "epoch": 0.8587532456566254, "grad_norm": 0.19445465505123138, "learning_rate": 7.0436144601750564e-06, "loss": 0.0061, "step": 101700 }, { "epoch": 0.8588376855037892, "grad_norm": 0.1640053689479828, "learning_rate": 7.042941916465352e-06, "loss": 0.0069, "step": 101710 }, { "epoch": 0.8589221253509531, "grad_norm": 0.08281823247671127, "learning_rate": 7.04226932838337e-06, "loss": 0.0039, "step": 101720 }, { "epoch": 0.859006565198117, "grad_norm": 0.15940646827220917, "learning_rate": 7.041596695943715e-06, "loss": 0.0156, "step": 101730 }, { "epoch": 0.8590910050452809, "grad_norm": 0.2836642861366272, "learning_rate": 7.040924019161e-06, "loss": 0.0087, "step": 101740 }, { "epoch": 0.8591754448924448, "grad_norm": 0.07091309130191803, "learning_rate": 7.040251298049833e-06, "loss": 0.0095, "step": 101750 }, { "epoch": 0.8592598847396086, "grad_norm": 0.21215017139911652, "learning_rate": 7.039578532624825e-06, "loss": 0.0085, "step": 101760 }, { "epoch": 0.8593443245867725, "grad_norm": 0.8643789291381836, "learning_rate": 7.038905722900589e-06, "loss": 0.0183, "step": 101770 }, { "epoch": 0.8594287644339363, "grad_norm": 0.3001382350921631, "learning_rate": 7.03823286889174e-06, "loss": 0.0107, "step": 101780 }, { "epoch": 0.8595132042811002, "grad_norm": 0.5234253406524658, "learning_rate": 7.03755997061289e-06, "loss": 0.0122, "step": 101790 }, { "epoch": 0.8595976441282641, "grad_norm": 0.19758015871047974, "learning_rate": 7.036887028078654e-06, "loss": 0.0153, "step": 101800 }, { "epoch": 0.859682083975428, "grad_norm": 0.06467626243829727, "learning_rate": 7.03621404130365e-06, "loss": 0.0164, "step": 101810 }, { "epoch": 0.8597665238225919, "grad_norm": 0.41648393869400024, "learning_rate": 7.035541010302494e-06, "loss": 0.0126, "step": 101820 }, { "epoch": 0.8598509636697558, "grad_norm": 0.19499824941158295, "learning_rate": 7.034867935089805e-06, "loss": 0.012, "step": 101830 }, { "epoch": 0.8599354035169197, "grad_norm": 0.43143144249916077, "learning_rate": 7.034194815680202e-06, "loss": 0.0129, "step": 101840 }, { "epoch": 0.8600198433640835, "grad_norm": 0.3823971450328827, "learning_rate": 7.033521652088303e-06, "loss": 0.0137, "step": 101850 }, { "epoch": 0.8601042832112474, "grad_norm": 0.4092733860015869, "learning_rate": 7.032848444328732e-06, "loss": 0.0126, "step": 101860 }, { "epoch": 0.8601887230584113, "grad_norm": 0.35328349471092224, "learning_rate": 7.0321751924161075e-06, "loss": 0.0106, "step": 101870 }, { "epoch": 0.8602731629055751, "grad_norm": 0.19503465294837952, "learning_rate": 7.031501896365055e-06, "loss": 0.0171, "step": 101880 }, { "epoch": 0.860357602752739, "grad_norm": 0.48618263006210327, "learning_rate": 7.0308285561901965e-06, "loss": 0.0081, "step": 101890 }, { "epoch": 0.8604420425999029, "grad_norm": 0.15835370123386383, "learning_rate": 7.030155171906161e-06, "loss": 0.0075, "step": 101900 }, { "epoch": 0.8605264824470668, "grad_norm": 0.2014019936323166, "learning_rate": 7.029481743527568e-06, "loss": 0.0084, "step": 101910 }, { "epoch": 0.8606109222942306, "grad_norm": 0.12347875535488129, "learning_rate": 7.02880827106905e-06, "loss": 0.0067, "step": 101920 }, { "epoch": 0.8606953621413945, "grad_norm": 0.03255774453282356, "learning_rate": 7.028134754545229e-06, "loss": 0.0115, "step": 101930 }, { "epoch": 0.8607798019885584, "grad_norm": 0.5608644485473633, "learning_rate": 7.027461193970739e-06, "loss": 0.0077, "step": 101940 }, { "epoch": 0.8608642418357223, "grad_norm": 0.3589652180671692, "learning_rate": 7.026787589360207e-06, "loss": 0.0113, "step": 101950 }, { "epoch": 0.8609486816828862, "grad_norm": 0.6124129891395569, "learning_rate": 7.026113940728265e-06, "loss": 0.0101, "step": 101960 }, { "epoch": 0.8610331215300501, "grad_norm": 0.2648157477378845, "learning_rate": 7.0254402480895415e-06, "loss": 0.0089, "step": 101970 }, { "epoch": 0.861117561377214, "grad_norm": 0.39678671956062317, "learning_rate": 7.024766511458673e-06, "loss": 0.0094, "step": 101980 }, { "epoch": 0.8612020012243777, "grad_norm": 0.30033090710639954, "learning_rate": 7.024092730850288e-06, "loss": 0.0067, "step": 101990 }, { "epoch": 0.8612864410715416, "grad_norm": 0.30267006158828735, "learning_rate": 7.023418906279025e-06, "loss": 0.0137, "step": 102000 }, { "epoch": 0.8613708809187055, "grad_norm": 0.05974527448415756, "learning_rate": 7.022745037759518e-06, "loss": 0.0108, "step": 102010 }, { "epoch": 0.8614553207658694, "grad_norm": 0.17975933849811554, "learning_rate": 7.022071125306403e-06, "loss": 0.0152, "step": 102020 }, { "epoch": 0.8615397606130333, "grad_norm": 0.3912115693092346, "learning_rate": 7.021397168934319e-06, "loss": 0.0126, "step": 102030 }, { "epoch": 0.8616242004601972, "grad_norm": 0.5584045648574829, "learning_rate": 7.0207231686579e-06, "loss": 0.0127, "step": 102040 }, { "epoch": 0.861708640307361, "grad_norm": 0.16778920590877533, "learning_rate": 7.020049124491789e-06, "loss": 0.0047, "step": 102050 }, { "epoch": 0.8617930801545249, "grad_norm": 0.07291482388973236, "learning_rate": 7.019375036450626e-06, "loss": 0.0195, "step": 102060 }, { "epoch": 0.8618775200016888, "grad_norm": 0.5725477337837219, "learning_rate": 7.0187009045490494e-06, "loss": 0.0109, "step": 102070 }, { "epoch": 0.8619619598488527, "grad_norm": 0.36359232664108276, "learning_rate": 7.018026728801706e-06, "loss": 0.0103, "step": 102080 }, { "epoch": 0.8620463996960166, "grad_norm": 0.17467285692691803, "learning_rate": 7.017352509223232e-06, "loss": 0.0065, "step": 102090 }, { "epoch": 0.8621308395431804, "grad_norm": 0.24213476479053497, "learning_rate": 7.016678245828277e-06, "loss": 0.0098, "step": 102100 }, { "epoch": 0.8622152793903443, "grad_norm": 0.44068965315818787, "learning_rate": 7.0160039386314825e-06, "loss": 0.0147, "step": 102110 }, { "epoch": 0.8622997192375081, "grad_norm": 0.4309629499912262, "learning_rate": 7.015329587647496e-06, "loss": 0.0063, "step": 102120 }, { "epoch": 0.862384159084672, "grad_norm": 0.44668006896972656, "learning_rate": 7.0146551928909635e-06, "loss": 0.0044, "step": 102130 }, { "epoch": 0.8624685989318359, "grad_norm": 1.0135186910629272, "learning_rate": 7.013980754376534e-06, "loss": 0.0118, "step": 102140 }, { "epoch": 0.8625530387789998, "grad_norm": 0.09176613390445709, "learning_rate": 7.013306272118854e-06, "loss": 0.008, "step": 102150 }, { "epoch": 0.8626374786261637, "grad_norm": 0.06686332076787949, "learning_rate": 7.012631746132574e-06, "loss": 0.0101, "step": 102160 }, { "epoch": 0.8627219184733276, "grad_norm": 0.20264706015586853, "learning_rate": 7.011957176432347e-06, "loss": 0.007, "step": 102170 }, { "epoch": 0.8628063583204915, "grad_norm": 0.21396717429161072, "learning_rate": 7.011282563032822e-06, "loss": 0.0108, "step": 102180 }, { "epoch": 0.8628907981676553, "grad_norm": 0.15115047991275787, "learning_rate": 7.010607905948649e-06, "loss": 0.0089, "step": 102190 }, { "epoch": 0.8629752380148192, "grad_norm": 0.0975688099861145, "learning_rate": 7.009933205194488e-06, "loss": 0.0077, "step": 102200 }, { "epoch": 0.8630596778619831, "grad_norm": 0.5058521628379822, "learning_rate": 7.009258460784989e-06, "loss": 0.0151, "step": 102210 }, { "epoch": 0.8631441177091469, "grad_norm": 0.2071404606103897, "learning_rate": 7.0085836727348055e-06, "loss": 0.0105, "step": 102220 }, { "epoch": 0.8632285575563108, "grad_norm": 0.19533102214336395, "learning_rate": 7.007908841058599e-06, "loss": 0.0094, "step": 102230 }, { "epoch": 0.8633129974034747, "grad_norm": 0.2207462340593338, "learning_rate": 7.0072339657710235e-06, "loss": 0.0134, "step": 102240 }, { "epoch": 0.8633974372506386, "grad_norm": 0.37454360723495483, "learning_rate": 7.006559046886736e-06, "loss": 0.0168, "step": 102250 }, { "epoch": 0.8634818770978024, "grad_norm": 0.1267966479063034, "learning_rate": 7.005884084420398e-06, "loss": 0.0101, "step": 102260 }, { "epoch": 0.8635663169449663, "grad_norm": 0.2907615602016449, "learning_rate": 7.005209078386669e-06, "loss": 0.0051, "step": 102270 }, { "epoch": 0.8636507567921302, "grad_norm": 0.07729795575141907, "learning_rate": 7.00453402880021e-06, "loss": 0.0068, "step": 102280 }, { "epoch": 0.8637351966392941, "grad_norm": 0.30920952558517456, "learning_rate": 7.0038589356756805e-06, "loss": 0.009, "step": 102290 }, { "epoch": 0.863819636486458, "grad_norm": 0.372959703207016, "learning_rate": 7.003183799027748e-06, "loss": 0.0054, "step": 102300 }, { "epoch": 0.8639040763336219, "grad_norm": 0.010831520892679691, "learning_rate": 7.0025086188710734e-06, "loss": 0.0091, "step": 102310 }, { "epoch": 0.8639885161807858, "grad_norm": 0.20237965881824493, "learning_rate": 7.001833395220322e-06, "loss": 0.0082, "step": 102320 }, { "epoch": 0.8640729560279495, "grad_norm": 0.20518311858177185, "learning_rate": 7.001158128090159e-06, "loss": 0.0078, "step": 102330 }, { "epoch": 0.8641573958751134, "grad_norm": 0.3563382029533386, "learning_rate": 7.000482817495251e-06, "loss": 0.0073, "step": 102340 }, { "epoch": 0.8642418357222773, "grad_norm": 0.42691102623939514, "learning_rate": 6.999807463450267e-06, "loss": 0.0087, "step": 102350 }, { "epoch": 0.8643262755694412, "grad_norm": 0.4708329141139984, "learning_rate": 6.999132065969875e-06, "loss": 0.0137, "step": 102360 }, { "epoch": 0.8644107154166051, "grad_norm": 0.40407225489616394, "learning_rate": 6.998456625068743e-06, "loss": 0.0098, "step": 102370 }, { "epoch": 0.864495155263769, "grad_norm": 0.13699562847614288, "learning_rate": 6.997781140761543e-06, "loss": 0.0116, "step": 102380 }, { "epoch": 0.8645795951109329, "grad_norm": 0.18904969096183777, "learning_rate": 6.997105613062947e-06, "loss": 0.0094, "step": 102390 }, { "epoch": 0.8646640349580967, "grad_norm": 0.4480643570423126, "learning_rate": 6.996430041987625e-06, "loss": 0.0084, "step": 102400 }, { "epoch": 0.8647484748052606, "grad_norm": 0.5507737398147583, "learning_rate": 6.995754427550252e-06, "loss": 0.0128, "step": 102410 }, { "epoch": 0.8648329146524245, "grad_norm": 0.39720433950424194, "learning_rate": 6.995078769765503e-06, "loss": 0.0099, "step": 102420 }, { "epoch": 0.8649173544995884, "grad_norm": 0.7336653470993042, "learning_rate": 6.9944030686480505e-06, "loss": 0.0139, "step": 102430 }, { "epoch": 0.8650017943467523, "grad_norm": 0.26305171847343445, "learning_rate": 6.993727324212574e-06, "loss": 0.011, "step": 102440 }, { "epoch": 0.8650862341939161, "grad_norm": 0.21439415216445923, "learning_rate": 6.993051536473746e-06, "loss": 0.0108, "step": 102450 }, { "epoch": 0.86517067404108, "grad_norm": 0.3936856985092163, "learning_rate": 6.992375705446248e-06, "loss": 0.0065, "step": 102460 }, { "epoch": 0.8652551138882438, "grad_norm": 0.24791203439235687, "learning_rate": 6.991699831144758e-06, "loss": 0.0143, "step": 102470 }, { "epoch": 0.8653395537354077, "grad_norm": 0.5943911671638489, "learning_rate": 6.991023913583957e-06, "loss": 0.013, "step": 102480 }, { "epoch": 0.8654239935825716, "grad_norm": 0.4368131458759308, "learning_rate": 6.9903479527785225e-06, "loss": 0.0097, "step": 102490 }, { "epoch": 0.8655084334297355, "grad_norm": 0.14375784993171692, "learning_rate": 6.989671948743139e-06, "loss": 0.01, "step": 102500 }, { "epoch": 0.8655928732768994, "grad_norm": 0.055886734277009964, "learning_rate": 6.988995901492489e-06, "loss": 0.0076, "step": 102510 }, { "epoch": 0.8656773131240633, "grad_norm": 0.8865389823913574, "learning_rate": 6.988319811041255e-06, "loss": 0.011, "step": 102520 }, { "epoch": 0.8657617529712271, "grad_norm": 0.4324047863483429, "learning_rate": 6.9876436774041226e-06, "loss": 0.0088, "step": 102530 }, { "epoch": 0.865846192818391, "grad_norm": 0.612857460975647, "learning_rate": 6.986967500595776e-06, "loss": 0.0063, "step": 102540 }, { "epoch": 0.8659306326655549, "grad_norm": 0.22882847487926483, "learning_rate": 6.986291280630906e-06, "loss": 0.0039, "step": 102550 }, { "epoch": 0.8660150725127187, "grad_norm": 0.19347026944160461, "learning_rate": 6.9856150175241925e-06, "loss": 0.0097, "step": 102560 }, { "epoch": 0.8660995123598826, "grad_norm": 0.3569931387901306, "learning_rate": 6.984938711290329e-06, "loss": 0.014, "step": 102570 }, { "epoch": 0.8661839522070465, "grad_norm": 0.2794501781463623, "learning_rate": 6.984262361944003e-06, "loss": 0.0077, "step": 102580 }, { "epoch": 0.8662683920542104, "grad_norm": 0.13314127922058105, "learning_rate": 6.983585969499907e-06, "loss": 0.0153, "step": 102590 }, { "epoch": 0.8663528319013742, "grad_norm": 0.0635145828127861, "learning_rate": 6.982909533972727e-06, "loss": 0.0113, "step": 102600 }, { "epoch": 0.8664372717485381, "grad_norm": 0.8818572759628296, "learning_rate": 6.98223305537716e-06, "loss": 0.0103, "step": 102610 }, { "epoch": 0.866521711595702, "grad_norm": 0.2132914960384369, "learning_rate": 6.981556533727897e-06, "loss": 0.0119, "step": 102620 }, { "epoch": 0.8666061514428659, "grad_norm": 0.5204238295555115, "learning_rate": 6.980879969039633e-06, "loss": 0.0075, "step": 102630 }, { "epoch": 0.8666905912900298, "grad_norm": 0.17065922915935516, "learning_rate": 6.98020336132706e-06, "loss": 0.0067, "step": 102640 }, { "epoch": 0.8667750311371937, "grad_norm": 0.14500552415847778, "learning_rate": 6.979526710604877e-06, "loss": 0.0118, "step": 102650 }, { "epoch": 0.8668594709843576, "grad_norm": 0.1942899227142334, "learning_rate": 6.97885001688778e-06, "loss": 0.0068, "step": 102660 }, { "epoch": 0.8669439108315214, "grad_norm": 0.3733811378479004, "learning_rate": 6.9781732801904655e-06, "loss": 0.0118, "step": 102670 }, { "epoch": 0.8670283506786852, "grad_norm": 0.3777638375759125, "learning_rate": 6.977496500527634e-06, "loss": 0.0103, "step": 102680 }, { "epoch": 0.8671127905258491, "grad_norm": 0.48062580823898315, "learning_rate": 6.9768196779139816e-06, "loss": 0.0052, "step": 102690 }, { "epoch": 0.867197230373013, "grad_norm": 0.1040402501821518, "learning_rate": 6.976142812364213e-06, "loss": 0.0097, "step": 102700 }, { "epoch": 0.8672816702201769, "grad_norm": 0.2757188379764557, "learning_rate": 6.975465903893026e-06, "loss": 0.0113, "step": 102710 }, { "epoch": 0.8673661100673408, "grad_norm": 0.6048265099525452, "learning_rate": 6.974788952515125e-06, "loss": 0.0082, "step": 102720 }, { "epoch": 0.8674505499145047, "grad_norm": 0.37125498056411743, "learning_rate": 6.974111958245211e-06, "loss": 0.0099, "step": 102730 }, { "epoch": 0.8675349897616685, "grad_norm": 0.38417840003967285, "learning_rate": 6.9734349210979915e-06, "loss": 0.0119, "step": 102740 }, { "epoch": 0.8676194296088324, "grad_norm": 0.5595654249191284, "learning_rate": 6.9727578410881704e-06, "loss": 0.0089, "step": 102750 }, { "epoch": 0.8677038694559963, "grad_norm": 0.2675558924674988, "learning_rate": 6.972080718230452e-06, "loss": 0.006, "step": 102760 }, { "epoch": 0.8677883093031602, "grad_norm": 0.000486089353216812, "learning_rate": 6.971403552539544e-06, "loss": 0.0167, "step": 102770 }, { "epoch": 0.8678727491503241, "grad_norm": 0.3631397783756256, "learning_rate": 6.970726344030157e-06, "loss": 0.0079, "step": 102780 }, { "epoch": 0.8679571889974879, "grad_norm": 0.3505231738090515, "learning_rate": 6.970049092716996e-06, "loss": 0.0105, "step": 102790 }, { "epoch": 0.8680416288446517, "grad_norm": 0.19826175272464752, "learning_rate": 6.969371798614773e-06, "loss": 0.0128, "step": 102800 }, { "epoch": 0.8681260686918156, "grad_norm": 0.2969130277633667, "learning_rate": 6.968694461738199e-06, "loss": 0.0108, "step": 102810 }, { "epoch": 0.8682105085389795, "grad_norm": 0.14235751330852509, "learning_rate": 6.9680170821019836e-06, "loss": 0.0092, "step": 102820 }, { "epoch": 0.8682949483861434, "grad_norm": 0.35030072927474976, "learning_rate": 6.96733965972084e-06, "loss": 0.0073, "step": 102830 }, { "epoch": 0.8683793882333073, "grad_norm": 0.0696219801902771, "learning_rate": 6.966662194609482e-06, "loss": 0.0115, "step": 102840 }, { "epoch": 0.8684638280804712, "grad_norm": 0.3151077330112457, "learning_rate": 6.965984686782625e-06, "loss": 0.0083, "step": 102850 }, { "epoch": 0.8685482679276351, "grad_norm": 0.4652830958366394, "learning_rate": 6.965307136254983e-06, "loss": 0.0076, "step": 102860 }, { "epoch": 0.868632707774799, "grad_norm": 0.06509248912334442, "learning_rate": 6.964629543041274e-06, "loss": 0.007, "step": 102870 }, { "epoch": 0.8687171476219628, "grad_norm": 0.34973853826522827, "learning_rate": 6.963951907156212e-06, "loss": 0.0147, "step": 102880 }, { "epoch": 0.8688015874691267, "grad_norm": 0.24872314929962158, "learning_rate": 6.963274228614518e-06, "loss": 0.0063, "step": 102890 }, { "epoch": 0.8688860273162906, "grad_norm": 0.2074062079191208, "learning_rate": 6.9625965074309096e-06, "loss": 0.0086, "step": 102900 }, { "epoch": 0.8689704671634544, "grad_norm": 0.27085044980049133, "learning_rate": 6.961918743620109e-06, "loss": 0.0101, "step": 102910 }, { "epoch": 0.8690549070106183, "grad_norm": 0.2449083775281906, "learning_rate": 6.961240937196834e-06, "loss": 0.0124, "step": 102920 }, { "epoch": 0.8691393468577822, "grad_norm": 0.8629308938980103, "learning_rate": 6.960563088175807e-06, "loss": 0.0077, "step": 102930 }, { "epoch": 0.869223786704946, "grad_norm": 0.4510800242424011, "learning_rate": 6.959885196571752e-06, "loss": 0.0088, "step": 102940 }, { "epoch": 0.8693082265521099, "grad_norm": 0.5435125231742859, "learning_rate": 6.959207262399393e-06, "loss": 0.0088, "step": 102950 }, { "epoch": 0.8693926663992738, "grad_norm": 0.5481974482536316, "learning_rate": 6.958529285673453e-06, "loss": 0.0132, "step": 102960 }, { "epoch": 0.8694771062464377, "grad_norm": 0.18666790425777435, "learning_rate": 6.957851266408659e-06, "loss": 0.0087, "step": 102970 }, { "epoch": 0.8695615460936016, "grad_norm": 0.5948922038078308, "learning_rate": 6.957173204619735e-06, "loss": 0.0193, "step": 102980 }, { "epoch": 0.8696459859407655, "grad_norm": 0.38184258341789246, "learning_rate": 6.95649510032141e-06, "loss": 0.0126, "step": 102990 }, { "epoch": 0.8697304257879294, "grad_norm": 0.36723488569259644, "learning_rate": 6.955816953528415e-06, "loss": 0.0105, "step": 103000 }, { "epoch": 0.8698148656350932, "grad_norm": 0.23809146881103516, "learning_rate": 6.955138764255475e-06, "loss": 0.0089, "step": 103010 }, { "epoch": 0.869899305482257, "grad_norm": 0.5676121115684509, "learning_rate": 6.954460532517321e-06, "loss": 0.0133, "step": 103020 }, { "epoch": 0.8699837453294209, "grad_norm": 0.08703415840864182, "learning_rate": 6.953782258328685e-06, "loss": 0.016, "step": 103030 }, { "epoch": 0.8700681851765848, "grad_norm": 0.387893408536911, "learning_rate": 6.9531039417043e-06, "loss": 0.0137, "step": 103040 }, { "epoch": 0.8701526250237487, "grad_norm": 0.16619107127189636, "learning_rate": 6.952425582658897e-06, "loss": 0.0108, "step": 103050 }, { "epoch": 0.8702370648709126, "grad_norm": 0.5625027418136597, "learning_rate": 6.95174718120721e-06, "loss": 0.0071, "step": 103060 }, { "epoch": 0.8703215047180765, "grad_norm": 0.3535764515399933, "learning_rate": 6.951068737363974e-06, "loss": 0.0056, "step": 103070 }, { "epoch": 0.8704059445652403, "grad_norm": 0.180062934756279, "learning_rate": 6.950390251143926e-06, "loss": 0.0066, "step": 103080 }, { "epoch": 0.8704903844124042, "grad_norm": 0.6984698176383972, "learning_rate": 6.9497117225618e-06, "loss": 0.0098, "step": 103090 }, { "epoch": 0.8705748242595681, "grad_norm": 0.4846661388874054, "learning_rate": 6.949033151632335e-06, "loss": 0.0094, "step": 103100 }, { "epoch": 0.870659264106732, "grad_norm": 0.09086799621582031, "learning_rate": 6.94835453837027e-06, "loss": 0.006, "step": 103110 }, { "epoch": 0.8707437039538959, "grad_norm": 0.18343202769756317, "learning_rate": 6.9476758827903435e-06, "loss": 0.0085, "step": 103120 }, { "epoch": 0.8708281438010598, "grad_norm": 0.2614997625350952, "learning_rate": 6.946997184907296e-06, "loss": 0.0126, "step": 103130 }, { "epoch": 0.8709125836482235, "grad_norm": 0.5169923305511475, "learning_rate": 6.946318444735869e-06, "loss": 0.0118, "step": 103140 }, { "epoch": 0.8709970234953874, "grad_norm": 1.135657787322998, "learning_rate": 6.945639662290804e-06, "loss": 0.0152, "step": 103150 }, { "epoch": 0.8710814633425513, "grad_norm": 0.23699641227722168, "learning_rate": 6.944960837586845e-06, "loss": 0.0132, "step": 103160 }, { "epoch": 0.8711659031897152, "grad_norm": 0.8335643410682678, "learning_rate": 6.944281970638733e-06, "loss": 0.0168, "step": 103170 }, { "epoch": 0.8712503430368791, "grad_norm": 0.12860889732837677, "learning_rate": 6.943603061461217e-06, "loss": 0.0103, "step": 103180 }, { "epoch": 0.871334782884043, "grad_norm": 0.2639472186565399, "learning_rate": 6.94292411006904e-06, "loss": 0.0201, "step": 103190 }, { "epoch": 0.8714192227312069, "grad_norm": 0.35774314403533936, "learning_rate": 6.942245116476949e-06, "loss": 0.0114, "step": 103200 }, { "epoch": 0.8715036625783708, "grad_norm": 0.6584494709968567, "learning_rate": 6.941566080699693e-06, "loss": 0.0105, "step": 103210 }, { "epoch": 0.8715881024255346, "grad_norm": 0.16846176981925964, "learning_rate": 6.9408870027520195e-06, "loss": 0.0082, "step": 103220 }, { "epoch": 0.8716725422726985, "grad_norm": 0.571915328502655, "learning_rate": 6.940207882648676e-06, "loss": 0.0082, "step": 103230 }, { "epoch": 0.8717569821198624, "grad_norm": 0.2604306638240814, "learning_rate": 6.939528720404418e-06, "loss": 0.0132, "step": 103240 }, { "epoch": 0.8718414219670262, "grad_norm": 0.47681480646133423, "learning_rate": 6.938849516033992e-06, "loss": 0.0143, "step": 103250 }, { "epoch": 0.8719258618141901, "grad_norm": 0.33021560311317444, "learning_rate": 6.938170269552152e-06, "loss": 0.01, "step": 103260 }, { "epoch": 0.872010301661354, "grad_norm": 0.4549560248851776, "learning_rate": 6.937490980973652e-06, "loss": 0.012, "step": 103270 }, { "epoch": 0.8720947415085178, "grad_norm": 0.26480039954185486, "learning_rate": 6.936811650313242e-06, "loss": 0.0076, "step": 103280 }, { "epoch": 0.8721791813556817, "grad_norm": 0.20718662440776825, "learning_rate": 6.936132277585683e-06, "loss": 0.0073, "step": 103290 }, { "epoch": 0.8722636212028456, "grad_norm": 0.1652778834104538, "learning_rate": 6.935452862805726e-06, "loss": 0.01, "step": 103300 }, { "epoch": 0.8723480610500095, "grad_norm": 0.42285439372062683, "learning_rate": 6.934773405988129e-06, "loss": 0.0089, "step": 103310 }, { "epoch": 0.8724325008971734, "grad_norm": 0.1529565453529358, "learning_rate": 6.934093907147652e-06, "loss": 0.0112, "step": 103320 }, { "epoch": 0.8725169407443373, "grad_norm": 0.2945581376552582, "learning_rate": 6.93341436629905e-06, "loss": 0.0092, "step": 103330 }, { "epoch": 0.8726013805915012, "grad_norm": 0.21476629376411438, "learning_rate": 6.932734783457085e-06, "loss": 0.0044, "step": 103340 }, { "epoch": 0.872685820438665, "grad_norm": 0.7226921916007996, "learning_rate": 6.932055158636516e-06, "loss": 0.0184, "step": 103350 }, { "epoch": 0.8727702602858289, "grad_norm": 0.40699321031570435, "learning_rate": 6.931375491852106e-06, "loss": 0.0107, "step": 103360 }, { "epoch": 0.8728547001329927, "grad_norm": 0.05542469397187233, "learning_rate": 6.930695783118615e-06, "loss": 0.0117, "step": 103370 }, { "epoch": 0.8729391399801566, "grad_norm": 0.48631152510643005, "learning_rate": 6.930016032450808e-06, "loss": 0.0104, "step": 103380 }, { "epoch": 0.8730235798273205, "grad_norm": 0.24558129906654358, "learning_rate": 6.9293362398634465e-06, "loss": 0.0115, "step": 103390 }, { "epoch": 0.8731080196744844, "grad_norm": 0.031955331563949585, "learning_rate": 6.928656405371299e-06, "loss": 0.0086, "step": 103400 }, { "epoch": 0.8731924595216483, "grad_norm": 0.5653812885284424, "learning_rate": 6.9279765289891295e-06, "loss": 0.02, "step": 103410 }, { "epoch": 0.8732768993688121, "grad_norm": 0.5850641131401062, "learning_rate": 6.927296610731704e-06, "loss": 0.0157, "step": 103420 }, { "epoch": 0.873361339215976, "grad_norm": 0.28339794278144836, "learning_rate": 6.926616650613791e-06, "loss": 0.0063, "step": 103430 }, { "epoch": 0.8734457790631399, "grad_norm": 0.16818758845329285, "learning_rate": 6.92593664865016e-06, "loss": 0.0104, "step": 103440 }, { "epoch": 0.8735302189103038, "grad_norm": 0.14603467285633087, "learning_rate": 6.9252566048555805e-06, "loss": 0.0114, "step": 103450 }, { "epoch": 0.8736146587574677, "grad_norm": 0.31938251852989197, "learning_rate": 6.92457651924482e-06, "loss": 0.0092, "step": 103460 }, { "epoch": 0.8736990986046316, "grad_norm": 0.34475648403167725, "learning_rate": 6.923896391832652e-06, "loss": 0.0161, "step": 103470 }, { "epoch": 0.8737835384517953, "grad_norm": 0.27489110827445984, "learning_rate": 6.923216222633851e-06, "loss": 0.0073, "step": 103480 }, { "epoch": 0.8738679782989592, "grad_norm": 0.347377747297287, "learning_rate": 6.922536011663186e-06, "loss": 0.0144, "step": 103490 }, { "epoch": 0.8739524181461231, "grad_norm": 0.41065189242362976, "learning_rate": 6.921855758935433e-06, "loss": 0.0087, "step": 103500 }, { "epoch": 0.874036857993287, "grad_norm": 0.3601699471473694, "learning_rate": 6.9211754644653665e-06, "loss": 0.0078, "step": 103510 }, { "epoch": 0.8741212978404509, "grad_norm": 0.1693556308746338, "learning_rate": 6.9204951282677625e-06, "loss": 0.0111, "step": 103520 }, { "epoch": 0.8742057376876148, "grad_norm": 0.4349040687084198, "learning_rate": 6.9198147503573985e-06, "loss": 0.01, "step": 103530 }, { "epoch": 0.8742901775347787, "grad_norm": 0.2812679708003998, "learning_rate": 6.919134330749051e-06, "loss": 0.0045, "step": 103540 }, { "epoch": 0.8743746173819426, "grad_norm": 0.33134734630584717, "learning_rate": 6.918453869457499e-06, "loss": 0.0127, "step": 103550 }, { "epoch": 0.8744590572291064, "grad_norm": 0.04249606654047966, "learning_rate": 6.917773366497522e-06, "loss": 0.0181, "step": 103560 }, { "epoch": 0.8745434970762703, "grad_norm": 0.3074350357055664, "learning_rate": 6.917092821883902e-06, "loss": 0.0081, "step": 103570 }, { "epoch": 0.8746279369234342, "grad_norm": 0.0003676204360090196, "learning_rate": 6.9164122356314165e-06, "loss": 0.015, "step": 103580 }, { "epoch": 0.8747123767705981, "grad_norm": 0.07431615144014359, "learning_rate": 6.915731607754851e-06, "loss": 0.0061, "step": 103590 }, { "epoch": 0.8747968166177619, "grad_norm": 0.27587854862213135, "learning_rate": 6.915050938268988e-06, "loss": 0.0132, "step": 103600 }, { "epoch": 0.8748812564649258, "grad_norm": 0.07684232294559479, "learning_rate": 6.9143702271886104e-06, "loss": 0.007, "step": 103610 }, { "epoch": 0.8749656963120896, "grad_norm": 1.1134072542190552, "learning_rate": 6.913689474528505e-06, "loss": 0.0078, "step": 103620 }, { "epoch": 0.8750501361592535, "grad_norm": 0.18635539710521698, "learning_rate": 6.9130086803034545e-06, "loss": 0.0117, "step": 103630 }, { "epoch": 0.8751345760064174, "grad_norm": 0.39931029081344604, "learning_rate": 6.912327844528248e-06, "loss": 0.0138, "step": 103640 }, { "epoch": 0.8752190158535813, "grad_norm": 0.26080965995788574, "learning_rate": 6.911646967217674e-06, "loss": 0.0063, "step": 103650 }, { "epoch": 0.8753034557007452, "grad_norm": 0.28663504123687744, "learning_rate": 6.910966048386518e-06, "loss": 0.0086, "step": 103660 }, { "epoch": 0.8753878955479091, "grad_norm": 0.08033133298158646, "learning_rate": 6.910285088049571e-06, "loss": 0.0158, "step": 103670 }, { "epoch": 0.875472335395073, "grad_norm": 0.43808576464653015, "learning_rate": 6.909604086221624e-06, "loss": 0.0069, "step": 103680 }, { "epoch": 0.8755567752422369, "grad_norm": 0.5856683254241943, "learning_rate": 6.908923042917468e-06, "loss": 0.0083, "step": 103690 }, { "epoch": 0.8756412150894007, "grad_norm": 0.14246022701263428, "learning_rate": 6.9082419581518935e-06, "loss": 0.0112, "step": 103700 }, { "epoch": 0.8757256549365645, "grad_norm": 0.48532235622406006, "learning_rate": 6.9075608319396954e-06, "loss": 0.0119, "step": 103710 }, { "epoch": 0.8758100947837284, "grad_norm": 0.30562490224838257, "learning_rate": 6.906879664295667e-06, "loss": 0.0162, "step": 103720 }, { "epoch": 0.8758945346308923, "grad_norm": 0.12708865106105804, "learning_rate": 6.906198455234603e-06, "loss": 0.0095, "step": 103730 }, { "epoch": 0.8759789744780562, "grad_norm": 0.39537185430526733, "learning_rate": 6.9055172047713e-06, "loss": 0.0087, "step": 103740 }, { "epoch": 0.8760634143252201, "grad_norm": 0.25972673296928406, "learning_rate": 6.904835912920553e-06, "loss": 0.0069, "step": 103750 }, { "epoch": 0.8761478541723839, "grad_norm": 0.4766220152378082, "learning_rate": 6.90415457969716e-06, "loss": 0.0111, "step": 103760 }, { "epoch": 0.8762322940195478, "grad_norm": 0.40447768568992615, "learning_rate": 6.90347320511592e-06, "loss": 0.0082, "step": 103770 }, { "epoch": 0.8763167338667117, "grad_norm": 0.5643447637557983, "learning_rate": 6.902791789191632e-06, "loss": 0.0111, "step": 103780 }, { "epoch": 0.8764011737138756, "grad_norm": 0.31047940254211426, "learning_rate": 6.902110331939096e-06, "loss": 0.0103, "step": 103790 }, { "epoch": 0.8764856135610395, "grad_norm": 0.5016066431999207, "learning_rate": 6.9014288333731125e-06, "loss": 0.0079, "step": 103800 }, { "epoch": 0.8765700534082034, "grad_norm": 0.4067474901676178, "learning_rate": 6.900747293508486e-06, "loss": 0.0051, "step": 103810 }, { "epoch": 0.8766544932553673, "grad_norm": 0.2258472889661789, "learning_rate": 6.900065712360018e-06, "loss": 0.0076, "step": 103820 }, { "epoch": 0.876738933102531, "grad_norm": 0.41582804918289185, "learning_rate": 6.89938408994251e-06, "loss": 0.0135, "step": 103830 }, { "epoch": 0.8768233729496949, "grad_norm": 0.3290965259075165, "learning_rate": 6.89870242627077e-06, "loss": 0.0187, "step": 103840 }, { "epoch": 0.8769078127968588, "grad_norm": 0.30799052119255066, "learning_rate": 6.898020721359604e-06, "loss": 0.0111, "step": 103850 }, { "epoch": 0.8769922526440227, "grad_norm": 0.6069515347480774, "learning_rate": 6.897338975223815e-06, "loss": 0.0125, "step": 103860 }, { "epoch": 0.8770766924911866, "grad_norm": 0.23894508183002472, "learning_rate": 6.896657187878213e-06, "loss": 0.0095, "step": 103870 }, { "epoch": 0.8771611323383505, "grad_norm": 0.26054254174232483, "learning_rate": 6.895975359337604e-06, "loss": 0.0085, "step": 103880 }, { "epoch": 0.8772455721855144, "grad_norm": 0.28274092078208923, "learning_rate": 6.8952934896168e-06, "loss": 0.0059, "step": 103890 }, { "epoch": 0.8773300120326782, "grad_norm": 0.5575507283210754, "learning_rate": 6.894611578730611e-06, "loss": 0.0234, "step": 103900 }, { "epoch": 0.8774144518798421, "grad_norm": 0.1755971908569336, "learning_rate": 6.893929626693845e-06, "loss": 0.0087, "step": 103910 }, { "epoch": 0.877498891727006, "grad_norm": 0.24330824613571167, "learning_rate": 6.893247633521316e-06, "loss": 0.0133, "step": 103920 }, { "epoch": 0.8775833315741699, "grad_norm": 0.2930185794830322, "learning_rate": 6.892565599227836e-06, "loss": 0.0117, "step": 103930 }, { "epoch": 0.8776677714213337, "grad_norm": 0.1738772988319397, "learning_rate": 6.89188352382822e-06, "loss": 0.0096, "step": 103940 }, { "epoch": 0.8777522112684976, "grad_norm": 0.11770608276128769, "learning_rate": 6.89120140733728e-06, "loss": 0.0141, "step": 103950 }, { "epoch": 0.8778366511156614, "grad_norm": 0.518944501876831, "learning_rate": 6.890519249769834e-06, "loss": 0.0108, "step": 103960 }, { "epoch": 0.8779210909628253, "grad_norm": 0.37008360028266907, "learning_rate": 6.889837051140698e-06, "loss": 0.0107, "step": 103970 }, { "epoch": 0.8780055308099892, "grad_norm": 0.35441577434539795, "learning_rate": 6.889154811464689e-06, "loss": 0.0205, "step": 103980 }, { "epoch": 0.8780899706571531, "grad_norm": 0.3870915472507477, "learning_rate": 6.888472530756623e-06, "loss": 0.0097, "step": 103990 }, { "epoch": 0.878174410504317, "grad_norm": 0.1493985801935196, "learning_rate": 6.88779020903132e-06, "loss": 0.0061, "step": 104000 }, { "epoch": 0.8782588503514809, "grad_norm": 0.45620235800743103, "learning_rate": 6.8871078463036016e-06, "loss": 0.0088, "step": 104010 }, { "epoch": 0.8783432901986448, "grad_norm": 0.18691416084766388, "learning_rate": 6.8864254425882885e-06, "loss": 0.0173, "step": 104020 }, { "epoch": 0.8784277300458087, "grad_norm": 1.758941411972046, "learning_rate": 6.8857429979002e-06, "loss": 0.0169, "step": 104030 }, { "epoch": 0.8785121698929725, "grad_norm": 0.19444400072097778, "learning_rate": 6.885060512254159e-06, "loss": 0.0125, "step": 104040 }, { "epoch": 0.8785966097401364, "grad_norm": 0.09443315118551254, "learning_rate": 6.884377985664992e-06, "loss": 0.0134, "step": 104050 }, { "epoch": 0.8786810495873002, "grad_norm": 0.24077488481998444, "learning_rate": 6.88369541814752e-06, "loss": 0.011, "step": 104060 }, { "epoch": 0.8787654894344641, "grad_norm": 0.4166821539402008, "learning_rate": 6.88301280971657e-06, "loss": 0.0078, "step": 104070 }, { "epoch": 0.878849929281628, "grad_norm": 0.637093722820282, "learning_rate": 6.882330160386967e-06, "loss": 0.0179, "step": 104080 }, { "epoch": 0.8789343691287919, "grad_norm": 0.17776843905448914, "learning_rate": 6.88164747017354e-06, "loss": 0.0123, "step": 104090 }, { "epoch": 0.8790188089759557, "grad_norm": 0.5774242281913757, "learning_rate": 6.880964739091116e-06, "loss": 0.0192, "step": 104100 }, { "epoch": 0.8791032488231196, "grad_norm": 0.0811065286397934, "learning_rate": 6.880281967154522e-06, "loss": 0.0106, "step": 104110 }, { "epoch": 0.8791876886702835, "grad_norm": 0.37238457798957825, "learning_rate": 6.879599154378589e-06, "loss": 0.0076, "step": 104120 }, { "epoch": 0.8792721285174474, "grad_norm": 0.27582335472106934, "learning_rate": 6.878916300778147e-06, "loss": 0.0126, "step": 104130 }, { "epoch": 0.8793565683646113, "grad_norm": 0.21503713726997375, "learning_rate": 6.878233406368029e-06, "loss": 0.0164, "step": 104140 }, { "epoch": 0.8794410082117752, "grad_norm": 0.34859469532966614, "learning_rate": 6.877550471163066e-06, "loss": 0.0089, "step": 104150 }, { "epoch": 0.8795254480589391, "grad_norm": 0.619922399520874, "learning_rate": 6.876867495178091e-06, "loss": 0.0156, "step": 104160 }, { "epoch": 0.8796098879061028, "grad_norm": 0.2128763645887375, "learning_rate": 6.876184478427939e-06, "loss": 0.0074, "step": 104170 }, { "epoch": 0.8796943277532667, "grad_norm": 0.7410041093826294, "learning_rate": 6.875501420927445e-06, "loss": 0.0192, "step": 104180 }, { "epoch": 0.8797787676004306, "grad_norm": 0.5558664798736572, "learning_rate": 6.874818322691443e-06, "loss": 0.0192, "step": 104190 }, { "epoch": 0.8798632074475945, "grad_norm": 0.16604083776474, "learning_rate": 6.8741351837347725e-06, "loss": 0.0108, "step": 104200 }, { "epoch": 0.8799476472947584, "grad_norm": 0.07587899267673492, "learning_rate": 6.873452004072271e-06, "loss": 0.0075, "step": 104210 }, { "epoch": 0.8800320871419223, "grad_norm": 0.19960425794124603, "learning_rate": 6.872768783718775e-06, "loss": 0.0072, "step": 104220 }, { "epoch": 0.8801165269890862, "grad_norm": 0.24511153995990753, "learning_rate": 6.8720855226891244e-06, "loss": 0.0123, "step": 104230 }, { "epoch": 0.88020096683625, "grad_norm": 0.04221920296549797, "learning_rate": 6.871402220998159e-06, "loss": 0.0083, "step": 104240 }, { "epoch": 0.8802854066834139, "grad_norm": 0.26006749272346497, "learning_rate": 6.8707188786607215e-06, "loss": 0.0195, "step": 104250 }, { "epoch": 0.8803698465305778, "grad_norm": 0.32102057337760925, "learning_rate": 6.870035495691654e-06, "loss": 0.0128, "step": 104260 }, { "epoch": 0.8804542863777417, "grad_norm": 0.4367019534111023, "learning_rate": 6.869352072105798e-06, "loss": 0.0102, "step": 104270 }, { "epoch": 0.8805387262249056, "grad_norm": 0.5398213863372803, "learning_rate": 6.868668607917998e-06, "loss": 0.0098, "step": 104280 }, { "epoch": 0.8806231660720694, "grad_norm": 0.08652906119823456, "learning_rate": 6.867985103143099e-06, "loss": 0.0089, "step": 104290 }, { "epoch": 0.8807076059192332, "grad_norm": 0.45475345849990845, "learning_rate": 6.867301557795947e-06, "loss": 0.012, "step": 104300 }, { "epoch": 0.8807920457663971, "grad_norm": 0.47501519322395325, "learning_rate": 6.8666179718913875e-06, "loss": 0.012, "step": 104310 }, { "epoch": 0.880876485613561, "grad_norm": 0.13206660747528076, "learning_rate": 6.865934345444267e-06, "loss": 0.007, "step": 104320 }, { "epoch": 0.8809609254607249, "grad_norm": 0.20883969962596893, "learning_rate": 6.865250678469436e-06, "loss": 0.008, "step": 104330 }, { "epoch": 0.8810453653078888, "grad_norm": 0.09219775348901749, "learning_rate": 6.864566970981742e-06, "loss": 0.0076, "step": 104340 }, { "epoch": 0.8811298051550527, "grad_norm": 0.23133961856365204, "learning_rate": 6.863883222996037e-06, "loss": 0.0071, "step": 104350 }, { "epoch": 0.8812142450022166, "grad_norm": 0.15416398644447327, "learning_rate": 6.863199434527168e-06, "loss": 0.0051, "step": 104360 }, { "epoch": 0.8812986848493805, "grad_norm": 0.15443433821201324, "learning_rate": 6.86251560558999e-06, "loss": 0.0087, "step": 104370 }, { "epoch": 0.8813831246965443, "grad_norm": 0.36660999059677124, "learning_rate": 6.861831736199356e-06, "loss": 0.0149, "step": 104380 }, { "epoch": 0.8814675645437082, "grad_norm": 0.32092931866645813, "learning_rate": 6.861147826370116e-06, "loss": 0.0082, "step": 104390 }, { "epoch": 0.881552004390872, "grad_norm": 0.205210343003273, "learning_rate": 6.860463876117128e-06, "loss": 0.0211, "step": 104400 }, { "epoch": 0.8816364442380359, "grad_norm": 0.5356023907661438, "learning_rate": 6.859779885455244e-06, "loss": 0.0186, "step": 104410 }, { "epoch": 0.8817208840851998, "grad_norm": 0.380154013633728, "learning_rate": 6.859095854399325e-06, "loss": 0.0138, "step": 104420 }, { "epoch": 0.8818053239323637, "grad_norm": 0.32245293259620667, "learning_rate": 6.858411782964224e-06, "loss": 0.0143, "step": 104430 }, { "epoch": 0.8818897637795275, "grad_norm": 0.4775404930114746, "learning_rate": 6.8577276711648e-06, "loss": 0.013, "step": 104440 }, { "epoch": 0.8819742036266914, "grad_norm": 0.41997724771499634, "learning_rate": 6.85704351901591e-06, "loss": 0.0074, "step": 104450 }, { "epoch": 0.8820586434738553, "grad_norm": 0.0380631685256958, "learning_rate": 6.856359326532417e-06, "loss": 0.0078, "step": 104460 }, { "epoch": 0.8821430833210192, "grad_norm": 0.6776657104492188, "learning_rate": 6.85567509372918e-06, "loss": 0.0112, "step": 104470 }, { "epoch": 0.8822275231681831, "grad_norm": 0.3146539628505707, "learning_rate": 6.8549908206210596e-06, "loss": 0.0067, "step": 104480 }, { "epoch": 0.882311963015347, "grad_norm": 0.2586866319179535, "learning_rate": 6.854306507222918e-06, "loss": 0.0125, "step": 104490 }, { "epoch": 0.8823964028625109, "grad_norm": 0.6410300731658936, "learning_rate": 6.8536221535496195e-06, "loss": 0.0071, "step": 104500 }, { "epoch": 0.8824808427096748, "grad_norm": 0.2732219696044922, "learning_rate": 6.852937759616027e-06, "loss": 0.0113, "step": 104510 }, { "epoch": 0.8825652825568385, "grad_norm": 0.5541496872901917, "learning_rate": 6.852253325437007e-06, "loss": 0.0135, "step": 104520 }, { "epoch": 0.8826497224040024, "grad_norm": 0.24706102907657623, "learning_rate": 6.851568851027424e-06, "loss": 0.0068, "step": 104530 }, { "epoch": 0.8827341622511663, "grad_norm": 0.2560611069202423, "learning_rate": 6.850884336402146e-06, "loss": 0.0059, "step": 104540 }, { "epoch": 0.8828186020983302, "grad_norm": 0.7488501667976379, "learning_rate": 6.850199781576038e-06, "loss": 0.0143, "step": 104550 }, { "epoch": 0.8829030419454941, "grad_norm": 0.27667519450187683, "learning_rate": 6.849515186563971e-06, "loss": 0.008, "step": 104560 }, { "epoch": 0.882987481792658, "grad_norm": 0.3443450927734375, "learning_rate": 6.848830551380813e-06, "loss": 0.0094, "step": 104570 }, { "epoch": 0.8830719216398218, "grad_norm": 0.2657186985015869, "learning_rate": 6.848145876041433e-06, "loss": 0.0095, "step": 104580 }, { "epoch": 0.8831563614869857, "grad_norm": 0.33791109919548035, "learning_rate": 6.847461160560705e-06, "loss": 0.01, "step": 104590 }, { "epoch": 0.8832408013341496, "grad_norm": 0.6504707336425781, "learning_rate": 6.846776404953498e-06, "loss": 0.0146, "step": 104600 }, { "epoch": 0.8833252411813135, "grad_norm": 0.10863087326288223, "learning_rate": 6.8460916092346865e-06, "loss": 0.0108, "step": 104610 }, { "epoch": 0.8834096810284774, "grad_norm": 0.09432513266801834, "learning_rate": 6.845406773419143e-06, "loss": 0.0085, "step": 104620 }, { "epoch": 0.8834941208756412, "grad_norm": 0.35230427980422974, "learning_rate": 6.8447218975217435e-06, "loss": 0.0152, "step": 104630 }, { "epoch": 0.883578560722805, "grad_norm": 0.49296480417251587, "learning_rate": 6.8440369815573615e-06, "loss": 0.0157, "step": 104640 }, { "epoch": 0.8836630005699689, "grad_norm": 0.3147108554840088, "learning_rate": 6.843352025540875e-06, "loss": 0.0076, "step": 104650 }, { "epoch": 0.8837474404171328, "grad_norm": 0.10174733400344849, "learning_rate": 6.84266702948716e-06, "loss": 0.0103, "step": 104660 }, { "epoch": 0.8838318802642967, "grad_norm": 0.39087361097335815, "learning_rate": 6.841981993411093e-06, "loss": 0.0056, "step": 104670 }, { "epoch": 0.8839163201114606, "grad_norm": 1.0355411767959595, "learning_rate": 6.8412969173275576e-06, "loss": 0.0081, "step": 104680 }, { "epoch": 0.8840007599586245, "grad_norm": 0.846696674823761, "learning_rate": 6.840611801251429e-06, "loss": 0.009, "step": 104690 }, { "epoch": 0.8840851998057884, "grad_norm": 0.6753175854682922, "learning_rate": 6.8399266451975886e-06, "loss": 0.0131, "step": 104700 }, { "epoch": 0.8841696396529523, "grad_norm": 0.2915000021457672, "learning_rate": 6.83924144918092e-06, "loss": 0.0119, "step": 104710 }, { "epoch": 0.8842540795001161, "grad_norm": 0.2980114221572876, "learning_rate": 6.838556213216302e-06, "loss": 0.0084, "step": 104720 }, { "epoch": 0.88433851934728, "grad_norm": 0.16499459743499756, "learning_rate": 6.83787093731862e-06, "loss": 0.0148, "step": 104730 }, { "epoch": 0.8844229591944438, "grad_norm": 0.25231799483299255, "learning_rate": 6.837185621502759e-06, "loss": 0.0134, "step": 104740 }, { "epoch": 0.8845073990416077, "grad_norm": 0.05163130536675453, "learning_rate": 6.836500265783603e-06, "loss": 0.007, "step": 104750 }, { "epoch": 0.8845918388887716, "grad_norm": 0.11202037334442139, "learning_rate": 6.835814870176038e-06, "loss": 0.0231, "step": 104760 }, { "epoch": 0.8846762787359355, "grad_norm": 0.3954600691795349, "learning_rate": 6.835129434694948e-06, "loss": 0.0096, "step": 104770 }, { "epoch": 0.8847607185830993, "grad_norm": 0.06824646890163422, "learning_rate": 6.834443959355224e-06, "loss": 0.0044, "step": 104780 }, { "epoch": 0.8848451584302632, "grad_norm": 0.19963006675243378, "learning_rate": 6.833758444171754e-06, "loss": 0.0096, "step": 104790 }, { "epoch": 0.8849295982774271, "grad_norm": 0.5941175818443298, "learning_rate": 6.833072889159425e-06, "loss": 0.0176, "step": 104800 }, { "epoch": 0.885014038124591, "grad_norm": 0.3344625234603882, "learning_rate": 6.8323872943331295e-06, "loss": 0.0185, "step": 104810 }, { "epoch": 0.8850984779717549, "grad_norm": 0.10354939103126526, "learning_rate": 6.8317016597077565e-06, "loss": 0.0053, "step": 104820 }, { "epoch": 0.8851829178189188, "grad_norm": 0.4343968629837036, "learning_rate": 6.8310159852982e-06, "loss": 0.0086, "step": 104830 }, { "epoch": 0.8852673576660827, "grad_norm": 0.19172349572181702, "learning_rate": 6.830330271119349e-06, "loss": 0.011, "step": 104840 }, { "epoch": 0.8853517975132466, "grad_norm": 0.45634007453918457, "learning_rate": 6.829644517186103e-06, "loss": 0.0124, "step": 104850 }, { "epoch": 0.8854362373604103, "grad_norm": 0.1233663484454155, "learning_rate": 6.828958723513351e-06, "loss": 0.0068, "step": 104860 }, { "epoch": 0.8855206772075742, "grad_norm": 0.21990835666656494, "learning_rate": 6.828272890115991e-06, "loss": 0.0162, "step": 104870 }, { "epoch": 0.8856051170547381, "grad_norm": 0.1354585886001587, "learning_rate": 6.827587017008918e-06, "loss": 0.0082, "step": 104880 }, { "epoch": 0.885689556901902, "grad_norm": 0.4765644669532776, "learning_rate": 6.826901104207029e-06, "loss": 0.0228, "step": 104890 }, { "epoch": 0.8857739967490659, "grad_norm": 0.5924478769302368, "learning_rate": 6.826215151725223e-06, "loss": 0.0111, "step": 104900 }, { "epoch": 0.8858584365962298, "grad_norm": 0.3624287545681, "learning_rate": 6.825529159578399e-06, "loss": 0.0054, "step": 104910 }, { "epoch": 0.8859428764433936, "grad_norm": 0.1379382312297821, "learning_rate": 6.824843127781455e-06, "loss": 0.0037, "step": 104920 }, { "epoch": 0.8860273162905575, "grad_norm": 0.33907243609428406, "learning_rate": 6.824157056349292e-06, "loss": 0.0135, "step": 104930 }, { "epoch": 0.8861117561377214, "grad_norm": 0.5096900463104248, "learning_rate": 6.8234709452968104e-06, "loss": 0.0093, "step": 104940 }, { "epoch": 0.8861961959848853, "grad_norm": 0.37453681230545044, "learning_rate": 6.822784794638916e-06, "loss": 0.009, "step": 104950 }, { "epoch": 0.8862806358320492, "grad_norm": 0.5260475873947144, "learning_rate": 6.822098604390508e-06, "loss": 0.0096, "step": 104960 }, { "epoch": 0.886365075679213, "grad_norm": 0.6798831224441528, "learning_rate": 6.821412374566492e-06, "loss": 0.0129, "step": 104970 }, { "epoch": 0.8864495155263769, "grad_norm": 0.5362716913223267, "learning_rate": 6.820726105181772e-06, "loss": 0.007, "step": 104980 }, { "epoch": 0.8865339553735407, "grad_norm": 0.09696201980113983, "learning_rate": 6.820039796251254e-06, "loss": 0.0102, "step": 104990 }, { "epoch": 0.8866183952207046, "grad_norm": 1.3530110120773315, "learning_rate": 6.819353447789845e-06, "loss": 0.0213, "step": 105000 }, { "epoch": 0.8867028350678685, "grad_norm": 0.18425095081329346, "learning_rate": 6.818667059812452e-06, "loss": 0.0272, "step": 105010 }, { "epoch": 0.8867872749150324, "grad_norm": 0.1347292810678482, "learning_rate": 6.817980632333982e-06, "loss": 0.0149, "step": 105020 }, { "epoch": 0.8868717147621963, "grad_norm": 0.3008230924606323, "learning_rate": 6.817294165369348e-06, "loss": 0.0117, "step": 105030 }, { "epoch": 0.8869561546093602, "grad_norm": 0.5839051604270935, "learning_rate": 6.816607658933456e-06, "loss": 0.0207, "step": 105040 }, { "epoch": 0.8870405944565241, "grad_norm": 0.20641683042049408, "learning_rate": 6.8159211130412175e-06, "loss": 0.017, "step": 105050 }, { "epoch": 0.8871250343036879, "grad_norm": 0.7148640751838684, "learning_rate": 6.8152345277075435e-06, "loss": 0.0076, "step": 105060 }, { "epoch": 0.8872094741508518, "grad_norm": 0.07250922173261642, "learning_rate": 6.814547902947348e-06, "loss": 0.0096, "step": 105070 }, { "epoch": 0.8872939139980157, "grad_norm": 0.26467928290367126, "learning_rate": 6.813861238775545e-06, "loss": 0.0122, "step": 105080 }, { "epoch": 0.8873783538451795, "grad_norm": 0.22255821526050568, "learning_rate": 6.813174535207047e-06, "loss": 0.0084, "step": 105090 }, { "epoch": 0.8874627936923434, "grad_norm": 0.5489144325256348, "learning_rate": 6.812487792256769e-06, "loss": 0.0144, "step": 105100 }, { "epoch": 0.8875472335395073, "grad_norm": 0.10090185701847076, "learning_rate": 6.811801009939628e-06, "loss": 0.0131, "step": 105110 }, { "epoch": 0.8876316733866711, "grad_norm": 0.629481315612793, "learning_rate": 6.81111418827054e-06, "loss": 0.0128, "step": 105120 }, { "epoch": 0.887716113233835, "grad_norm": 0.19991172850131989, "learning_rate": 6.810427327264424e-06, "loss": 0.0037, "step": 105130 }, { "epoch": 0.8878005530809989, "grad_norm": 0.18636561930179596, "learning_rate": 6.809740426936195e-06, "loss": 0.0123, "step": 105140 }, { "epoch": 0.8878849929281628, "grad_norm": 0.22667722404003143, "learning_rate": 6.809053487300778e-06, "loss": 0.011, "step": 105150 }, { "epoch": 0.8879694327753267, "grad_norm": 0.2493622750043869, "learning_rate": 6.80836650837309e-06, "loss": 0.0035, "step": 105160 }, { "epoch": 0.8880538726224906, "grad_norm": 0.05613076686859131, "learning_rate": 6.80767949016805e-06, "loss": 0.0058, "step": 105170 }, { "epoch": 0.8881383124696545, "grad_norm": 0.5794581770896912, "learning_rate": 6.806992432700582e-06, "loss": 0.0085, "step": 105180 }, { "epoch": 0.8882227523168184, "grad_norm": 0.032463058829307556, "learning_rate": 6.806305335985609e-06, "loss": 0.0144, "step": 105190 }, { "epoch": 0.8883071921639821, "grad_norm": 0.2853797972202301, "learning_rate": 6.805618200038055e-06, "loss": 0.0117, "step": 105200 }, { "epoch": 0.888391632011146, "grad_norm": 0.42246168851852417, "learning_rate": 6.804931024872843e-06, "loss": 0.0084, "step": 105210 }, { "epoch": 0.8884760718583099, "grad_norm": 0.14658208191394806, "learning_rate": 6.804243810504898e-06, "loss": 0.0114, "step": 105220 }, { "epoch": 0.8885605117054738, "grad_norm": 0.23597542941570282, "learning_rate": 6.8035565569491495e-06, "loss": 0.0056, "step": 105230 }, { "epoch": 0.8886449515526377, "grad_norm": 0.4498926103115082, "learning_rate": 6.802869264220521e-06, "loss": 0.0113, "step": 105240 }, { "epoch": 0.8887293913998016, "grad_norm": 0.43469205498695374, "learning_rate": 6.802181932333941e-06, "loss": 0.0079, "step": 105250 }, { "epoch": 0.8888138312469654, "grad_norm": 0.37833428382873535, "learning_rate": 6.801494561304338e-06, "loss": 0.0148, "step": 105260 }, { "epoch": 0.8888982710941293, "grad_norm": 0.33690404891967773, "learning_rate": 6.800807151146645e-06, "loss": 0.0089, "step": 105270 }, { "epoch": 0.8889827109412932, "grad_norm": 0.10689788311719894, "learning_rate": 6.800119701875789e-06, "loss": 0.0064, "step": 105280 }, { "epoch": 0.8890671507884571, "grad_norm": 0.4189966917037964, "learning_rate": 6.7994322135067005e-06, "loss": 0.0101, "step": 105290 }, { "epoch": 0.889151590635621, "grad_norm": 0.05096859112381935, "learning_rate": 6.798744686054313e-06, "loss": 0.0113, "step": 105300 }, { "epoch": 0.8892360304827849, "grad_norm": 0.5393560528755188, "learning_rate": 6.79805711953356e-06, "loss": 0.0071, "step": 105310 }, { "epoch": 0.8893204703299487, "grad_norm": 0.07369806617498398, "learning_rate": 6.797369513959375e-06, "loss": 0.0193, "step": 105320 }, { "epoch": 0.8894049101771125, "grad_norm": 0.11716673523187637, "learning_rate": 6.796681869346693e-06, "loss": 0.0117, "step": 105330 }, { "epoch": 0.8894893500242764, "grad_norm": 0.22793491184711456, "learning_rate": 6.795994185710448e-06, "loss": 0.0061, "step": 105340 }, { "epoch": 0.8895737898714403, "grad_norm": 0.1175098866224289, "learning_rate": 6.7953064630655775e-06, "loss": 0.0084, "step": 105350 }, { "epoch": 0.8896582297186042, "grad_norm": 0.294420450925827, "learning_rate": 6.794618701427019e-06, "loss": 0.0093, "step": 105360 }, { "epoch": 0.8897426695657681, "grad_norm": 0.5865687131881714, "learning_rate": 6.7939309008097094e-06, "loss": 0.0158, "step": 105370 }, { "epoch": 0.889827109412932, "grad_norm": 0.25651878118515015, "learning_rate": 6.7932430612285895e-06, "loss": 0.0079, "step": 105380 }, { "epoch": 0.8899115492600959, "grad_norm": 0.8480436205863953, "learning_rate": 6.792555182698597e-06, "loss": 0.0086, "step": 105390 }, { "epoch": 0.8899959891072597, "grad_norm": 0.16908007860183716, "learning_rate": 6.791867265234674e-06, "loss": 0.0194, "step": 105400 }, { "epoch": 0.8900804289544236, "grad_norm": 0.49753883481025696, "learning_rate": 6.7911793088517604e-06, "loss": 0.0096, "step": 105410 }, { "epoch": 0.8901648688015875, "grad_norm": 0.036043018102645874, "learning_rate": 6.790491313564799e-06, "loss": 0.0102, "step": 105420 }, { "epoch": 0.8902493086487513, "grad_norm": 0.34268736839294434, "learning_rate": 6.789803279388733e-06, "loss": 0.0129, "step": 105430 }, { "epoch": 0.8903337484959152, "grad_norm": 0.1150398701429367, "learning_rate": 6.789115206338508e-06, "loss": 0.0066, "step": 105440 }, { "epoch": 0.8904181883430791, "grad_norm": 0.05895943194627762, "learning_rate": 6.7884270944290666e-06, "loss": 0.0053, "step": 105450 }, { "epoch": 0.890502628190243, "grad_norm": 0.29675066471099854, "learning_rate": 6.7877389436753535e-06, "loss": 0.0059, "step": 105460 }, { "epoch": 0.8905870680374068, "grad_norm": 0.37605878710746765, "learning_rate": 6.787050754092318e-06, "loss": 0.0177, "step": 105470 }, { "epoch": 0.8906715078845707, "grad_norm": 0.3197978138923645, "learning_rate": 6.786362525694907e-06, "loss": 0.0075, "step": 105480 }, { "epoch": 0.8907559477317346, "grad_norm": 0.1833847612142563, "learning_rate": 6.785674258498069e-06, "loss": 0.0118, "step": 105490 }, { "epoch": 0.8908403875788985, "grad_norm": 0.45378756523132324, "learning_rate": 6.7849859525167495e-06, "loss": 0.0099, "step": 105500 }, { "epoch": 0.8909248274260624, "grad_norm": 1.0101226568222046, "learning_rate": 6.784297607765903e-06, "loss": 0.0117, "step": 105510 }, { "epoch": 0.8910092672732263, "grad_norm": 0.4127304255962372, "learning_rate": 6.783609224260476e-06, "loss": 0.0165, "step": 105520 }, { "epoch": 0.8910937071203902, "grad_norm": 0.40009117126464844, "learning_rate": 6.782920802015425e-06, "loss": 0.0078, "step": 105530 }, { "epoch": 0.891178146967554, "grad_norm": 0.007213061209768057, "learning_rate": 6.782232341045697e-06, "loss": 0.0086, "step": 105540 }, { "epoch": 0.8912625868147178, "grad_norm": 0.1675809770822525, "learning_rate": 6.781543841366248e-06, "loss": 0.0068, "step": 105550 }, { "epoch": 0.8913470266618817, "grad_norm": 0.33921030163764954, "learning_rate": 6.7808553029920336e-06, "loss": 0.0092, "step": 105560 }, { "epoch": 0.8914314665090456, "grad_norm": 0.06365330517292023, "learning_rate": 6.780166725938005e-06, "loss": 0.0089, "step": 105570 }, { "epoch": 0.8915159063562095, "grad_norm": 0.5277128219604492, "learning_rate": 6.779478110219121e-06, "loss": 0.015, "step": 105580 }, { "epoch": 0.8916003462033734, "grad_norm": 0.4036887586116791, "learning_rate": 6.778789455850336e-06, "loss": 0.0103, "step": 105590 }, { "epoch": 0.8916847860505372, "grad_norm": 0.3311508595943451, "learning_rate": 6.77810076284661e-06, "loss": 0.012, "step": 105600 }, { "epoch": 0.8917692258977011, "grad_norm": 0.33509624004364014, "learning_rate": 6.777412031222898e-06, "loss": 0.0121, "step": 105610 }, { "epoch": 0.891853665744865, "grad_norm": 0.44742900133132935, "learning_rate": 6.7767232609941625e-06, "loss": 0.0085, "step": 105620 }, { "epoch": 0.8919381055920289, "grad_norm": 0.12364418804645538, "learning_rate": 6.776034452175362e-06, "loss": 0.0083, "step": 105630 }, { "epoch": 0.8920225454391928, "grad_norm": 0.7648327946662903, "learning_rate": 6.775345604781456e-06, "loss": 0.0123, "step": 105640 }, { "epoch": 0.8921069852863567, "grad_norm": 0.3782874345779419, "learning_rate": 6.774656718827409e-06, "loss": 0.0108, "step": 105650 }, { "epoch": 0.8921914251335205, "grad_norm": 0.6144691109657288, "learning_rate": 6.77396779432818e-06, "loss": 0.0237, "step": 105660 }, { "epoch": 0.8922758649806843, "grad_norm": 0.35098299384117126, "learning_rate": 6.773278831298734e-06, "loss": 0.0178, "step": 105670 }, { "epoch": 0.8923603048278482, "grad_norm": 0.14569291472434998, "learning_rate": 6.772589829754036e-06, "loss": 0.0099, "step": 105680 }, { "epoch": 0.8924447446750121, "grad_norm": 0.20582343637943268, "learning_rate": 6.77190078970905e-06, "loss": 0.0116, "step": 105690 }, { "epoch": 0.892529184522176, "grad_norm": 0.16675513982772827, "learning_rate": 6.771211711178742e-06, "loss": 0.0049, "step": 105700 }, { "epoch": 0.8926136243693399, "grad_norm": 0.7616739869117737, "learning_rate": 6.770522594178079e-06, "loss": 0.009, "step": 105710 }, { "epoch": 0.8926980642165038, "grad_norm": 0.6031322479248047, "learning_rate": 6.7698334387220265e-06, "loss": 0.0117, "step": 105720 }, { "epoch": 0.8927825040636677, "grad_norm": 0.23102790117263794, "learning_rate": 6.7691442448255565e-06, "loss": 0.008, "step": 105730 }, { "epoch": 0.8928669439108315, "grad_norm": 1.0080320835113525, "learning_rate": 6.768455012503634e-06, "loss": 0.0109, "step": 105740 }, { "epoch": 0.8929513837579954, "grad_norm": 0.4421808123588562, "learning_rate": 6.767765741771233e-06, "loss": 0.0059, "step": 105750 }, { "epoch": 0.8930358236051593, "grad_norm": 0.33001479506492615, "learning_rate": 6.76707643264332e-06, "loss": 0.0095, "step": 105760 }, { "epoch": 0.8931202634523232, "grad_norm": 0.2708105742931366, "learning_rate": 6.766387085134871e-06, "loss": 0.0167, "step": 105770 }, { "epoch": 0.893204703299487, "grad_norm": 0.19154846668243408, "learning_rate": 6.765697699260856e-06, "loss": 0.0086, "step": 105780 }, { "epoch": 0.8932891431466509, "grad_norm": 0.19344347715377808, "learning_rate": 6.765008275036248e-06, "loss": 0.0044, "step": 105790 }, { "epoch": 0.8933735829938148, "grad_norm": 0.12487155944108963, "learning_rate": 6.764318812476021e-06, "loss": 0.0078, "step": 105800 }, { "epoch": 0.8934580228409786, "grad_norm": 0.505517840385437, "learning_rate": 6.763629311595153e-06, "loss": 0.0148, "step": 105810 }, { "epoch": 0.8935424626881425, "grad_norm": 0.3865390419960022, "learning_rate": 6.762939772408618e-06, "loss": 0.015, "step": 105820 }, { "epoch": 0.8936269025353064, "grad_norm": 0.3298071324825287, "learning_rate": 6.7622501949313915e-06, "loss": 0.0175, "step": 105830 }, { "epoch": 0.8937113423824703, "grad_norm": 0.3396511673927307, "learning_rate": 6.761560579178451e-06, "loss": 0.0114, "step": 105840 }, { "epoch": 0.8937957822296342, "grad_norm": 0.2855994701385498, "learning_rate": 6.760870925164778e-06, "loss": 0.0124, "step": 105850 }, { "epoch": 0.8938802220767981, "grad_norm": 0.5583894848823547, "learning_rate": 6.7601812329053475e-06, "loss": 0.0142, "step": 105860 }, { "epoch": 0.893964661923962, "grad_norm": 0.38887977600097656, "learning_rate": 6.759491502415142e-06, "loss": 0.0113, "step": 105870 }, { "epoch": 0.8940491017711258, "grad_norm": 0.13657288253307343, "learning_rate": 6.758801733709142e-06, "loss": 0.01, "step": 105880 }, { "epoch": 0.8941335416182896, "grad_norm": 0.25111106038093567, "learning_rate": 6.758111926802327e-06, "loss": 0.0126, "step": 105890 }, { "epoch": 0.8942179814654535, "grad_norm": 0.2596627473831177, "learning_rate": 6.757422081709684e-06, "loss": 0.0104, "step": 105900 }, { "epoch": 0.8943024213126174, "grad_norm": 0.27265670895576477, "learning_rate": 6.756732198446191e-06, "loss": 0.0109, "step": 105910 }, { "epoch": 0.8943868611597813, "grad_norm": 0.39702433347702026, "learning_rate": 6.756042277026835e-06, "loss": 0.0149, "step": 105920 }, { "epoch": 0.8944713010069452, "grad_norm": 0.25457438826560974, "learning_rate": 6.755352317466601e-06, "loss": 0.0064, "step": 105930 }, { "epoch": 0.894555740854109, "grad_norm": 0.6095024943351746, "learning_rate": 6.754662319780475e-06, "loss": 0.0057, "step": 105940 }, { "epoch": 0.8946401807012729, "grad_norm": 0.1450764536857605, "learning_rate": 6.753972283983442e-06, "loss": 0.0217, "step": 105950 }, { "epoch": 0.8947246205484368, "grad_norm": 0.10617950558662415, "learning_rate": 6.75328221009049e-06, "loss": 0.0068, "step": 105960 }, { "epoch": 0.8948090603956007, "grad_norm": 0.519195020198822, "learning_rate": 6.7525920981166105e-06, "loss": 0.0155, "step": 105970 }, { "epoch": 0.8948935002427646, "grad_norm": 0.4628375172615051, "learning_rate": 6.751901948076788e-06, "loss": 0.0105, "step": 105980 }, { "epoch": 0.8949779400899285, "grad_norm": 0.2448052614927292, "learning_rate": 6.751211759986014e-06, "loss": 0.0101, "step": 105990 }, { "epoch": 0.8950623799370924, "grad_norm": 0.21363158524036407, "learning_rate": 6.750521533859279e-06, "loss": 0.011, "step": 106000 }, { "epoch": 0.8951468197842561, "grad_norm": 0.37889012694358826, "learning_rate": 6.749831269711575e-06, "loss": 0.01, "step": 106010 }, { "epoch": 0.89523125963142, "grad_norm": 0.19956620037555695, "learning_rate": 6.749140967557895e-06, "loss": 0.0145, "step": 106020 }, { "epoch": 0.8953156994785839, "grad_norm": 0.369949609041214, "learning_rate": 6.748450627413231e-06, "loss": 0.0093, "step": 106030 }, { "epoch": 0.8954001393257478, "grad_norm": 0.1410391628742218, "learning_rate": 6.747760249292579e-06, "loss": 0.0079, "step": 106040 }, { "epoch": 0.8954845791729117, "grad_norm": 0.06247009336948395, "learning_rate": 6.747069833210932e-06, "loss": 0.0049, "step": 106050 }, { "epoch": 0.8955690190200756, "grad_norm": 0.2817184031009674, "learning_rate": 6.746379379183285e-06, "loss": 0.0082, "step": 106060 }, { "epoch": 0.8956534588672395, "grad_norm": 0.43080055713653564, "learning_rate": 6.745688887224636e-06, "loss": 0.0143, "step": 106070 }, { "epoch": 0.8957378987144033, "grad_norm": 0.24070894718170166, "learning_rate": 6.7449983573499835e-06, "loss": 0.0083, "step": 106080 }, { "epoch": 0.8958223385615672, "grad_norm": 0.20280076563358307, "learning_rate": 6.744307789574323e-06, "loss": 0.0128, "step": 106090 }, { "epoch": 0.8959067784087311, "grad_norm": 0.07393552362918854, "learning_rate": 6.743617183912656e-06, "loss": 0.0131, "step": 106100 }, { "epoch": 0.895991218255895, "grad_norm": 0.33901217579841614, "learning_rate": 6.74292654037998e-06, "loss": 0.0114, "step": 106110 }, { "epoch": 0.8960756581030588, "grad_norm": 0.2956322133541107, "learning_rate": 6.742235858991297e-06, "loss": 0.0073, "step": 106120 }, { "epoch": 0.8961600979502227, "grad_norm": 0.30778518319129944, "learning_rate": 6.741545139761608e-06, "loss": 0.01, "step": 106130 }, { "epoch": 0.8962445377973866, "grad_norm": 0.7633430361747742, "learning_rate": 6.740854382705917e-06, "loss": 0.0105, "step": 106140 }, { "epoch": 0.8963289776445504, "grad_norm": 0.5753083229064941, "learning_rate": 6.740163587839224e-06, "loss": 0.0139, "step": 106150 }, { "epoch": 0.8964134174917143, "grad_norm": 0.14375713467597961, "learning_rate": 6.7394727551765335e-06, "loss": 0.0082, "step": 106160 }, { "epoch": 0.8964978573388782, "grad_norm": 0.40051648020744324, "learning_rate": 6.7387818847328534e-06, "loss": 0.0107, "step": 106170 }, { "epoch": 0.8965822971860421, "grad_norm": 0.1700560599565506, "learning_rate": 6.738090976523186e-06, "loss": 0.0129, "step": 106180 }, { "epoch": 0.896666737033206, "grad_norm": 0.3279467523097992, "learning_rate": 6.737400030562538e-06, "loss": 0.0082, "step": 106190 }, { "epoch": 0.8967511768803699, "grad_norm": 0.09511954337358475, "learning_rate": 6.736709046865918e-06, "loss": 0.0094, "step": 106200 }, { "epoch": 0.8968356167275338, "grad_norm": 0.5952180027961731, "learning_rate": 6.7360180254483346e-06, "loss": 0.0125, "step": 106210 }, { "epoch": 0.8969200565746976, "grad_norm": 0.7756249904632568, "learning_rate": 6.735326966324794e-06, "loss": 0.016, "step": 106220 }, { "epoch": 0.8970044964218615, "grad_norm": 0.03563651069998741, "learning_rate": 6.734635869510309e-06, "loss": 0.0052, "step": 106230 }, { "epoch": 0.8970889362690253, "grad_norm": 0.4714546501636505, "learning_rate": 6.733944735019886e-06, "loss": 0.0079, "step": 106240 }, { "epoch": 0.8971733761161892, "grad_norm": 0.13594721257686615, "learning_rate": 6.733253562868539e-06, "loss": 0.0122, "step": 106250 }, { "epoch": 0.8972578159633531, "grad_norm": 0.9045950770378113, "learning_rate": 6.732562353071281e-06, "loss": 0.0124, "step": 106260 }, { "epoch": 0.897342255810517, "grad_norm": 0.15452337265014648, "learning_rate": 6.731871105643123e-06, "loss": 0.0098, "step": 106270 }, { "epoch": 0.8974266956576809, "grad_norm": 0.5440170764923096, "learning_rate": 6.731179820599079e-06, "loss": 0.0125, "step": 106280 }, { "epoch": 0.8975111355048447, "grad_norm": 0.2064763903617859, "learning_rate": 6.730488497954165e-06, "loss": 0.008, "step": 106290 }, { "epoch": 0.8975955753520086, "grad_norm": 0.4175027310848236, "learning_rate": 6.729797137723396e-06, "loss": 0.0086, "step": 106300 }, { "epoch": 0.8976800151991725, "grad_norm": 0.25649958848953247, "learning_rate": 6.729105739921786e-06, "loss": 0.0052, "step": 106310 }, { "epoch": 0.8977644550463364, "grad_norm": 0.2906092405319214, "learning_rate": 6.728414304564354e-06, "loss": 0.0098, "step": 106320 }, { "epoch": 0.8978488948935003, "grad_norm": 0.3829152584075928, "learning_rate": 6.727722831666119e-06, "loss": 0.0067, "step": 106330 }, { "epoch": 0.8979333347406642, "grad_norm": 0.033907800912857056, "learning_rate": 6.727031321242098e-06, "loss": 0.0064, "step": 106340 }, { "epoch": 0.8980177745878279, "grad_norm": 0.7402023077011108, "learning_rate": 6.72633977330731e-06, "loss": 0.0141, "step": 106350 }, { "epoch": 0.8981022144349918, "grad_norm": 0.46572160720825195, "learning_rate": 6.725648187876774e-06, "loss": 0.0117, "step": 106360 }, { "epoch": 0.8981866542821557, "grad_norm": 0.5633271336555481, "learning_rate": 6.724956564965515e-06, "loss": 0.01, "step": 106370 }, { "epoch": 0.8982710941293196, "grad_norm": 0.2178260236978531, "learning_rate": 6.724264904588553e-06, "loss": 0.0109, "step": 106380 }, { "epoch": 0.8983555339764835, "grad_norm": 0.31362295150756836, "learning_rate": 6.723573206760909e-06, "loss": 0.0073, "step": 106390 }, { "epoch": 0.8984399738236474, "grad_norm": 0.27408304810523987, "learning_rate": 6.72288147149761e-06, "loss": 0.0132, "step": 106400 }, { "epoch": 0.8985244136708113, "grad_norm": 0.4041386842727661, "learning_rate": 6.722189698813677e-06, "loss": 0.0094, "step": 106410 }, { "epoch": 0.8986088535179751, "grad_norm": 0.6625308990478516, "learning_rate": 6.721497888724138e-06, "loss": 0.0103, "step": 106420 }, { "epoch": 0.898693293365139, "grad_norm": 0.4674430787563324, "learning_rate": 6.720806041244017e-06, "loss": 0.0123, "step": 106430 }, { "epoch": 0.8987777332123029, "grad_norm": 0.09097564220428467, "learning_rate": 6.720114156388341e-06, "loss": 0.0161, "step": 106440 }, { "epoch": 0.8988621730594668, "grad_norm": 0.27777576446533203, "learning_rate": 6.7194222341721395e-06, "loss": 0.0127, "step": 106450 }, { "epoch": 0.8989466129066307, "grad_norm": 0.5069656372070312, "learning_rate": 6.718730274610439e-06, "loss": 0.0089, "step": 106460 }, { "epoch": 0.8990310527537945, "grad_norm": 0.5244996547698975, "learning_rate": 6.718038277718268e-06, "loss": 0.0114, "step": 106470 }, { "epoch": 0.8991154926009584, "grad_norm": 0.28111517429351807, "learning_rate": 6.717346243510658e-06, "loss": 0.0108, "step": 106480 }, { "epoch": 0.8991999324481222, "grad_norm": 0.43258216977119446, "learning_rate": 6.716654172002639e-06, "loss": 0.0072, "step": 106490 }, { "epoch": 0.8992843722952861, "grad_norm": 0.18322227895259857, "learning_rate": 6.715962063209245e-06, "loss": 0.0156, "step": 106500 }, { "epoch": 0.89936881214245, "grad_norm": 0.2014179527759552, "learning_rate": 6.715269917145505e-06, "loss": 0.0102, "step": 106510 }, { "epoch": 0.8994532519896139, "grad_norm": 0.5461642146110535, "learning_rate": 6.714577733826454e-06, "loss": 0.0091, "step": 106520 }, { "epoch": 0.8995376918367778, "grad_norm": 0.7042436003684998, "learning_rate": 6.713885513267125e-06, "loss": 0.0095, "step": 106530 }, { "epoch": 0.8996221316839417, "grad_norm": 0.4980703890323639, "learning_rate": 6.7131932554825565e-06, "loss": 0.0103, "step": 106540 }, { "epoch": 0.8997065715311056, "grad_norm": 0.49288880825042725, "learning_rate": 6.71250096048778e-06, "loss": 0.0069, "step": 106550 }, { "epoch": 0.8997910113782694, "grad_norm": 0.2792717516422272, "learning_rate": 6.711808628297833e-06, "loss": 0.0108, "step": 106560 }, { "epoch": 0.8998754512254333, "grad_norm": 0.3372366726398468, "learning_rate": 6.711116258927755e-06, "loss": 0.0086, "step": 106570 }, { "epoch": 0.8999598910725971, "grad_norm": 0.2607925534248352, "learning_rate": 6.710423852392581e-06, "loss": 0.0058, "step": 106580 }, { "epoch": 0.900044330919761, "grad_norm": 0.25002461671829224, "learning_rate": 6.709731408707353e-06, "loss": 0.0121, "step": 106590 }, { "epoch": 0.9001287707669249, "grad_norm": 0.3354300558567047, "learning_rate": 6.7090389278871085e-06, "loss": 0.0083, "step": 106600 }, { "epoch": 0.9002132106140888, "grad_norm": 0.1165240928530693, "learning_rate": 6.708346409946888e-06, "loss": 0.0067, "step": 106610 }, { "epoch": 0.9002976504612527, "grad_norm": 0.0509352907538414, "learning_rate": 6.707653854901734e-06, "loss": 0.0083, "step": 106620 }, { "epoch": 0.9003820903084165, "grad_norm": 0.7510139346122742, "learning_rate": 6.706961262766688e-06, "loss": 0.0211, "step": 106630 }, { "epoch": 0.9004665301555804, "grad_norm": 0.02418746054172516, "learning_rate": 6.706268633556794e-06, "loss": 0.0088, "step": 106640 }, { "epoch": 0.9005509700027443, "grad_norm": 0.3131538927555084, "learning_rate": 6.705575967287094e-06, "loss": 0.0149, "step": 106650 }, { "epoch": 0.9006354098499082, "grad_norm": 0.20606262981891632, "learning_rate": 6.7048832639726334e-06, "loss": 0.013, "step": 106660 }, { "epoch": 0.9007198496970721, "grad_norm": 0.11546414345502853, "learning_rate": 6.7041905236284575e-06, "loss": 0.0175, "step": 106670 }, { "epoch": 0.900804289544236, "grad_norm": 0.36365318298339844, "learning_rate": 6.7034977462696135e-06, "loss": 0.0051, "step": 106680 }, { "epoch": 0.9008887293913999, "grad_norm": 0.2932097911834717, "learning_rate": 6.702804931911148e-06, "loss": 0.0156, "step": 106690 }, { "epoch": 0.9009731692385636, "grad_norm": 0.2972955107688904, "learning_rate": 6.702112080568107e-06, "loss": 0.0066, "step": 106700 }, { "epoch": 0.9010576090857275, "grad_norm": 0.008524893783032894, "learning_rate": 6.7014191922555414e-06, "loss": 0.0048, "step": 106710 }, { "epoch": 0.9011420489328914, "grad_norm": 0.174923837184906, "learning_rate": 6.700726266988499e-06, "loss": 0.0097, "step": 106720 }, { "epoch": 0.9012264887800553, "grad_norm": 0.6116546988487244, "learning_rate": 6.70003330478203e-06, "loss": 0.0077, "step": 106730 }, { "epoch": 0.9013109286272192, "grad_norm": 0.006708784960210323, "learning_rate": 6.6993403056511874e-06, "loss": 0.0047, "step": 106740 }, { "epoch": 0.9013953684743831, "grad_norm": 0.07495374232530594, "learning_rate": 6.698647269611021e-06, "loss": 0.0083, "step": 106750 }, { "epoch": 0.901479808321547, "grad_norm": 0.13933634757995605, "learning_rate": 6.697954196676584e-06, "loss": 0.0071, "step": 106760 }, { "epoch": 0.9015642481687108, "grad_norm": 0.884205162525177, "learning_rate": 6.697261086862929e-06, "loss": 0.0205, "step": 106770 }, { "epoch": 0.9016486880158747, "grad_norm": 0.2816008925437927, "learning_rate": 6.696567940185113e-06, "loss": 0.009, "step": 106780 }, { "epoch": 0.9017331278630386, "grad_norm": 0.24880743026733398, "learning_rate": 6.695874756658187e-06, "loss": 0.0057, "step": 106790 }, { "epoch": 0.9018175677102025, "grad_norm": 0.1853625774383545, "learning_rate": 6.695181536297208e-06, "loss": 0.0154, "step": 106800 }, { "epoch": 0.9019020075573663, "grad_norm": 0.43757447600364685, "learning_rate": 6.694488279117236e-06, "loss": 0.0075, "step": 106810 }, { "epoch": 0.9019864474045302, "grad_norm": 0.1581740826368332, "learning_rate": 6.693794985133325e-06, "loss": 0.0097, "step": 106820 }, { "epoch": 0.902070887251694, "grad_norm": 0.29659876227378845, "learning_rate": 6.693101654360534e-06, "loss": 0.013, "step": 106830 }, { "epoch": 0.9021553270988579, "grad_norm": 0.43068358302116394, "learning_rate": 6.692408286813922e-06, "loss": 0.0153, "step": 106840 }, { "epoch": 0.9022397669460218, "grad_norm": 0.6126315593719482, "learning_rate": 6.691714882508548e-06, "loss": 0.0119, "step": 106850 }, { "epoch": 0.9023242067931857, "grad_norm": 0.18511448800563812, "learning_rate": 6.691021441459473e-06, "loss": 0.0115, "step": 106860 }, { "epoch": 0.9024086466403496, "grad_norm": 0.21357323229312897, "learning_rate": 6.690327963681761e-06, "loss": 0.0055, "step": 106870 }, { "epoch": 0.9024930864875135, "grad_norm": 0.23890265822410583, "learning_rate": 6.689634449190471e-06, "loss": 0.0081, "step": 106880 }, { "epoch": 0.9025775263346774, "grad_norm": 0.1307365745306015, "learning_rate": 6.688940898000666e-06, "loss": 0.0116, "step": 106890 }, { "epoch": 0.9026619661818412, "grad_norm": 0.060156550258398056, "learning_rate": 6.688247310127411e-06, "loss": 0.012, "step": 106900 }, { "epoch": 0.9027464060290051, "grad_norm": 0.4338096082210541, "learning_rate": 6.687553685585772e-06, "loss": 0.0146, "step": 106910 }, { "epoch": 0.902830845876169, "grad_norm": 0.24963052570819855, "learning_rate": 6.686860024390811e-06, "loss": 0.0061, "step": 106920 }, { "epoch": 0.9029152857233328, "grad_norm": 0.7696675062179565, "learning_rate": 6.686166326557597e-06, "loss": 0.0109, "step": 106930 }, { "epoch": 0.9029997255704967, "grad_norm": 0.29409077763557434, "learning_rate": 6.685472592101196e-06, "loss": 0.0156, "step": 106940 }, { "epoch": 0.9030841654176606, "grad_norm": 0.44963204860687256, "learning_rate": 6.684778821036676e-06, "loss": 0.014, "step": 106950 }, { "epoch": 0.9031686052648245, "grad_norm": 0.11852479726076126, "learning_rate": 6.684085013379105e-06, "loss": 0.0104, "step": 106960 }, { "epoch": 0.9032530451119883, "grad_norm": 0.3569158613681793, "learning_rate": 6.683391169143552e-06, "loss": 0.0084, "step": 106970 }, { "epoch": 0.9033374849591522, "grad_norm": 0.14207999408245087, "learning_rate": 6.682697288345086e-06, "loss": 0.0123, "step": 106980 }, { "epoch": 0.9034219248063161, "grad_norm": 1.0024322271347046, "learning_rate": 6.682003370998783e-06, "loss": 0.0158, "step": 106990 }, { "epoch": 0.90350636465348, "grad_norm": 1.0520765781402588, "learning_rate": 6.68130941711971e-06, "loss": 0.018, "step": 107000 }, { "epoch": 0.9035908045006439, "grad_norm": 0.20376351475715637, "learning_rate": 6.680615426722941e-06, "loss": 0.0149, "step": 107010 }, { "epoch": 0.9036752443478078, "grad_norm": 0.6497356295585632, "learning_rate": 6.679921399823548e-06, "loss": 0.0097, "step": 107020 }, { "epoch": 0.9037596841949717, "grad_norm": 0.41720154881477356, "learning_rate": 6.679227336436609e-06, "loss": 0.0129, "step": 107030 }, { "epoch": 0.9038441240421354, "grad_norm": 1.0411757230758667, "learning_rate": 6.678533236577195e-06, "loss": 0.0099, "step": 107040 }, { "epoch": 0.9039285638892993, "grad_norm": 0.2561766803264618, "learning_rate": 6.677839100260384e-06, "loss": 0.011, "step": 107050 }, { "epoch": 0.9040130037364632, "grad_norm": 0.5473900437355042, "learning_rate": 6.67714492750125e-06, "loss": 0.0116, "step": 107060 }, { "epoch": 0.9040974435836271, "grad_norm": 0.5250412821769714, "learning_rate": 6.676450718314873e-06, "loss": 0.0099, "step": 107070 }, { "epoch": 0.904181883430791, "grad_norm": 0.27301597595214844, "learning_rate": 6.67575647271633e-06, "loss": 0.0114, "step": 107080 }, { "epoch": 0.9042663232779549, "grad_norm": 0.16578607261180878, "learning_rate": 6.675062190720699e-06, "loss": 0.0141, "step": 107090 }, { "epoch": 0.9043507631251188, "grad_norm": 0.1666601449251175, "learning_rate": 6.6743678723430605e-06, "loss": 0.0075, "step": 107100 }, { "epoch": 0.9044352029722826, "grad_norm": 0.12011919915676117, "learning_rate": 6.673673517598495e-06, "loss": 0.0069, "step": 107110 }, { "epoch": 0.9045196428194465, "grad_norm": 0.35761559009552, "learning_rate": 6.672979126502085e-06, "loss": 0.0074, "step": 107120 }, { "epoch": 0.9046040826666104, "grad_norm": 0.06534220278263092, "learning_rate": 6.67228469906891e-06, "loss": 0.0093, "step": 107130 }, { "epoch": 0.9046885225137743, "grad_norm": 0.6468214988708496, "learning_rate": 6.671590235314054e-06, "loss": 0.0077, "step": 107140 }, { "epoch": 0.9047729623609382, "grad_norm": 0.3866775333881378, "learning_rate": 6.670895735252601e-06, "loss": 0.0059, "step": 107150 }, { "epoch": 0.904857402208102, "grad_norm": 0.3215009272098541, "learning_rate": 6.670201198899635e-06, "loss": 0.0074, "step": 107160 }, { "epoch": 0.9049418420552658, "grad_norm": 0.7873644828796387, "learning_rate": 6.669506626270241e-06, "loss": 0.0057, "step": 107170 }, { "epoch": 0.9050262819024297, "grad_norm": 0.16817207634449005, "learning_rate": 6.668812017379505e-06, "loss": 0.0084, "step": 107180 }, { "epoch": 0.9051107217495936, "grad_norm": 0.33071643114089966, "learning_rate": 6.6681173722425145e-06, "loss": 0.0103, "step": 107190 }, { "epoch": 0.9051951615967575, "grad_norm": 0.1921623796224594, "learning_rate": 6.667422690874356e-06, "loss": 0.0084, "step": 107200 }, { "epoch": 0.9052796014439214, "grad_norm": 0.1533905565738678, "learning_rate": 6.666727973290119e-06, "loss": 0.0155, "step": 107210 }, { "epoch": 0.9053640412910853, "grad_norm": 0.060043320059776306, "learning_rate": 6.666033219504891e-06, "loss": 0.0124, "step": 107220 }, { "epoch": 0.9054484811382492, "grad_norm": 0.19945786893367767, "learning_rate": 6.6653384295337645e-06, "loss": 0.0154, "step": 107230 }, { "epoch": 0.905532920985413, "grad_norm": 0.19194401800632477, "learning_rate": 6.664643603391827e-06, "loss": 0.0131, "step": 107240 }, { "epoch": 0.9056173608325769, "grad_norm": 0.15004362165927887, "learning_rate": 6.663948741094172e-06, "loss": 0.011, "step": 107250 }, { "epoch": 0.9057018006797408, "grad_norm": 0.5073580145835876, "learning_rate": 6.66325384265589e-06, "loss": 0.007, "step": 107260 }, { "epoch": 0.9057862405269046, "grad_norm": 0.2371106594800949, "learning_rate": 6.662558908092078e-06, "loss": 0.0157, "step": 107270 }, { "epoch": 0.9058706803740685, "grad_norm": 0.19694842398166656, "learning_rate": 6.661863937417827e-06, "loss": 0.0102, "step": 107280 }, { "epoch": 0.9059551202212324, "grad_norm": 0.6291719675064087, "learning_rate": 6.66116893064823e-06, "loss": 0.0107, "step": 107290 }, { "epoch": 0.9060395600683963, "grad_norm": 0.12082496285438538, "learning_rate": 6.660473887798384e-06, "loss": 0.009, "step": 107300 }, { "epoch": 0.9061239999155601, "grad_norm": 0.2268567979335785, "learning_rate": 6.659778808883387e-06, "loss": 0.0062, "step": 107310 }, { "epoch": 0.906208439762724, "grad_norm": 0.18937093019485474, "learning_rate": 6.659083693918335e-06, "loss": 0.0084, "step": 107320 }, { "epoch": 0.9062928796098879, "grad_norm": 0.18482668697834015, "learning_rate": 6.658388542918322e-06, "loss": 0.0075, "step": 107330 }, { "epoch": 0.9063773194570518, "grad_norm": 0.17403364181518555, "learning_rate": 6.6576933558984524e-06, "loss": 0.0196, "step": 107340 }, { "epoch": 0.9064617593042157, "grad_norm": 0.38269704580307007, "learning_rate": 6.656998132873822e-06, "loss": 0.0184, "step": 107350 }, { "epoch": 0.9065461991513796, "grad_norm": 0.09655166417360306, "learning_rate": 6.6563028738595325e-06, "loss": 0.0111, "step": 107360 }, { "epoch": 0.9066306389985435, "grad_norm": 0.5041170120239258, "learning_rate": 6.655607578870682e-06, "loss": 0.0067, "step": 107370 }, { "epoch": 0.9067150788457072, "grad_norm": 0.016145819798111916, "learning_rate": 6.654912247922376e-06, "loss": 0.0135, "step": 107380 }, { "epoch": 0.9067995186928711, "grad_norm": 0.42287835478782654, "learning_rate": 6.654216881029715e-06, "loss": 0.0104, "step": 107390 }, { "epoch": 0.906883958540035, "grad_norm": 0.8222741484642029, "learning_rate": 6.653521478207802e-06, "loss": 0.0124, "step": 107400 }, { "epoch": 0.9069683983871989, "grad_norm": 0.21774211525917053, "learning_rate": 6.65282603947174e-06, "loss": 0.0055, "step": 107410 }, { "epoch": 0.9070528382343628, "grad_norm": 0.9794042110443115, "learning_rate": 6.652130564836636e-06, "loss": 0.0113, "step": 107420 }, { "epoch": 0.9071372780815267, "grad_norm": 0.47958776354789734, "learning_rate": 6.651435054317596e-06, "loss": 0.0099, "step": 107430 }, { "epoch": 0.9072217179286906, "grad_norm": 0.2512509524822235, "learning_rate": 6.6507395079297245e-06, "loss": 0.0055, "step": 107440 }, { "epoch": 0.9073061577758544, "grad_norm": 0.2934769093990326, "learning_rate": 6.650043925688128e-06, "loss": 0.0105, "step": 107450 }, { "epoch": 0.9073905976230183, "grad_norm": 0.022867189720273018, "learning_rate": 6.6493483076079156e-06, "loss": 0.0098, "step": 107460 }, { "epoch": 0.9074750374701822, "grad_norm": 0.14771729707717896, "learning_rate": 6.6486526537041966e-06, "loss": 0.0161, "step": 107470 }, { "epoch": 0.9075594773173461, "grad_norm": 0.5329362750053406, "learning_rate": 6.64795696399208e-06, "loss": 0.0072, "step": 107480 }, { "epoch": 0.90764391716451, "grad_norm": 0.036077722907066345, "learning_rate": 6.6472612384866756e-06, "loss": 0.0083, "step": 107490 }, { "epoch": 0.9077283570116738, "grad_norm": 0.3785172402858734, "learning_rate": 6.646565477203094e-06, "loss": 0.0196, "step": 107500 }, { "epoch": 0.9078127968588376, "grad_norm": 0.18849410116672516, "learning_rate": 6.645869680156448e-06, "loss": 0.0109, "step": 107510 }, { "epoch": 0.9078972367060015, "grad_norm": 0.014936070889234543, "learning_rate": 6.6451738473618525e-06, "loss": 0.0122, "step": 107520 }, { "epoch": 0.9079816765531654, "grad_norm": 0.21926933526992798, "learning_rate": 6.644477978834415e-06, "loss": 0.0059, "step": 107530 }, { "epoch": 0.9080661164003293, "grad_norm": 0.8315039277076721, "learning_rate": 6.6437820745892535e-06, "loss": 0.0093, "step": 107540 }, { "epoch": 0.9081505562474932, "grad_norm": 0.10423451662063599, "learning_rate": 6.643086134641481e-06, "loss": 0.0153, "step": 107550 }, { "epoch": 0.9082349960946571, "grad_norm": 0.1229056566953659, "learning_rate": 6.642390159006217e-06, "loss": 0.0066, "step": 107560 }, { "epoch": 0.908319435941821, "grad_norm": 0.1475905179977417, "learning_rate": 6.641694147698573e-06, "loss": 0.0066, "step": 107570 }, { "epoch": 0.9084038757889848, "grad_norm": 0.0608840174973011, "learning_rate": 6.6409981007336685e-06, "loss": 0.0067, "step": 107580 }, { "epoch": 0.9084883156361487, "grad_norm": 0.5629990696907043, "learning_rate": 6.640302018126623e-06, "loss": 0.0092, "step": 107590 }, { "epoch": 0.9085727554833126, "grad_norm": 0.25848138332366943, "learning_rate": 6.639605899892554e-06, "loss": 0.0067, "step": 107600 }, { "epoch": 0.9086571953304764, "grad_norm": 0.15269221365451813, "learning_rate": 6.6389097460465805e-06, "loss": 0.0065, "step": 107610 }, { "epoch": 0.9087416351776403, "grad_norm": 0.07983062416315079, "learning_rate": 6.638213556603822e-06, "loss": 0.0122, "step": 107620 }, { "epoch": 0.9088260750248042, "grad_norm": 0.1507534682750702, "learning_rate": 6.637517331579401e-06, "loss": 0.0117, "step": 107630 }, { "epoch": 0.9089105148719681, "grad_norm": 0.08855199813842773, "learning_rate": 6.636821070988439e-06, "loss": 0.0124, "step": 107640 }, { "epoch": 0.9089949547191319, "grad_norm": 0.25671887397766113, "learning_rate": 6.636124774846061e-06, "loss": 0.0089, "step": 107650 }, { "epoch": 0.9090793945662958, "grad_norm": 0.2799736559391022, "learning_rate": 6.635428443167386e-06, "loss": 0.0072, "step": 107660 }, { "epoch": 0.9091638344134597, "grad_norm": 0.7392707467079163, "learning_rate": 6.63473207596754e-06, "loss": 0.0071, "step": 107670 }, { "epoch": 0.9092482742606236, "grad_norm": 0.38377487659454346, "learning_rate": 6.634035673261651e-06, "loss": 0.009, "step": 107680 }, { "epoch": 0.9093327141077875, "grad_norm": 0.2430112659931183, "learning_rate": 6.633339235064839e-06, "loss": 0.0048, "step": 107690 }, { "epoch": 0.9094171539549514, "grad_norm": 0.49905991554260254, "learning_rate": 6.632642761392235e-06, "loss": 0.0067, "step": 107700 }, { "epoch": 0.9095015938021153, "grad_norm": 0.010943218134343624, "learning_rate": 6.631946252258964e-06, "loss": 0.0075, "step": 107710 }, { "epoch": 0.9095860336492791, "grad_norm": 0.22639146447181702, "learning_rate": 6.631249707680157e-06, "loss": 0.012, "step": 107720 }, { "epoch": 0.9096704734964429, "grad_norm": 0.059387579560279846, "learning_rate": 6.630553127670938e-06, "loss": 0.009, "step": 107730 }, { "epoch": 0.9097549133436068, "grad_norm": 0.06512592732906342, "learning_rate": 6.62985651224644e-06, "loss": 0.012, "step": 107740 }, { "epoch": 0.9098393531907707, "grad_norm": 1.165168285369873, "learning_rate": 6.629159861421794e-06, "loss": 0.0066, "step": 107750 }, { "epoch": 0.9099237930379346, "grad_norm": 0.27199995517730713, "learning_rate": 6.628463175212128e-06, "loss": 0.0073, "step": 107760 }, { "epoch": 0.9100082328850985, "grad_norm": 0.33583664894104004, "learning_rate": 6.627766453632576e-06, "loss": 0.0091, "step": 107770 }, { "epoch": 0.9100926727322624, "grad_norm": 1.6023751497268677, "learning_rate": 6.627069696698269e-06, "loss": 0.02, "step": 107780 }, { "epoch": 0.9101771125794262, "grad_norm": 0.3926639258861542, "learning_rate": 6.626372904424343e-06, "loss": 0.008, "step": 107790 }, { "epoch": 0.9102615524265901, "grad_norm": 0.2699986696243286, "learning_rate": 6.62567607682593e-06, "loss": 0.0136, "step": 107800 }, { "epoch": 0.910345992273754, "grad_norm": 0.39505985379219055, "learning_rate": 6.624979213918167e-06, "loss": 0.0121, "step": 107810 }, { "epoch": 0.9104304321209179, "grad_norm": 0.066735178232193, "learning_rate": 6.624282315716187e-06, "loss": 0.0087, "step": 107820 }, { "epoch": 0.9105148719680818, "grad_norm": 0.07508549094200134, "learning_rate": 6.623585382235128e-06, "loss": 0.0089, "step": 107830 }, { "epoch": 0.9105993118152456, "grad_norm": 0.33566027879714966, "learning_rate": 6.62288841349013e-06, "loss": 0.0131, "step": 107840 }, { "epoch": 0.9106837516624094, "grad_norm": 0.15637437999248505, "learning_rate": 6.622191409496324e-06, "loss": 0.0122, "step": 107850 }, { "epoch": 0.9107681915095733, "grad_norm": 0.5675119757652283, "learning_rate": 6.621494370268855e-06, "loss": 0.0107, "step": 107860 }, { "epoch": 0.9108526313567372, "grad_norm": 0.25681617856025696, "learning_rate": 6.620797295822861e-06, "loss": 0.0143, "step": 107870 }, { "epoch": 0.9109370712039011, "grad_norm": 0.17240336537361145, "learning_rate": 6.620100186173482e-06, "loss": 0.0081, "step": 107880 }, { "epoch": 0.911021511051065, "grad_norm": 0.014513857662677765, "learning_rate": 6.619403041335859e-06, "loss": 0.0087, "step": 107890 }, { "epoch": 0.9111059508982289, "grad_norm": 0.6137949228286743, "learning_rate": 6.618705861325132e-06, "loss": 0.01, "step": 107900 }, { "epoch": 0.9111903907453928, "grad_norm": 0.3426097333431244, "learning_rate": 6.618008646156446e-06, "loss": 0.0156, "step": 107910 }, { "epoch": 0.9112748305925567, "grad_norm": 0.21793372929096222, "learning_rate": 6.617311395844945e-06, "loss": 0.0065, "step": 107920 }, { "epoch": 0.9113592704397205, "grad_norm": 0.38572266697883606, "learning_rate": 6.616614110405772e-06, "loss": 0.0109, "step": 107930 }, { "epoch": 0.9114437102868844, "grad_norm": 0.12421982735395432, "learning_rate": 6.615916789854071e-06, "loss": 0.0124, "step": 107940 }, { "epoch": 0.9115281501340483, "grad_norm": 0.20632600784301758, "learning_rate": 6.615219434204988e-06, "loss": 0.0087, "step": 107950 }, { "epoch": 0.9116125899812121, "grad_norm": 0.14506199955940247, "learning_rate": 6.61452204347367e-06, "loss": 0.0085, "step": 107960 }, { "epoch": 0.911697029828376, "grad_norm": 0.3217337727546692, "learning_rate": 6.613824617675265e-06, "loss": 0.0114, "step": 107970 }, { "epoch": 0.9117814696755399, "grad_norm": 0.26920440793037415, "learning_rate": 6.613127156824919e-06, "loss": 0.0112, "step": 107980 }, { "epoch": 0.9118659095227037, "grad_norm": 0.14152103662490845, "learning_rate": 6.612429660937782e-06, "loss": 0.0107, "step": 107990 }, { "epoch": 0.9119503493698676, "grad_norm": 0.12024702876806259, "learning_rate": 6.611732130029004e-06, "loss": 0.0063, "step": 108000 }, { "epoch": 0.9120347892170315, "grad_norm": 0.3321867883205414, "learning_rate": 6.611034564113734e-06, "loss": 0.0097, "step": 108010 }, { "epoch": 0.9121192290641954, "grad_norm": 0.08070619404315948, "learning_rate": 6.610336963207124e-06, "loss": 0.0057, "step": 108020 }, { "epoch": 0.9122036689113593, "grad_norm": 0.13791774213314056, "learning_rate": 6.609639327324323e-06, "loss": 0.0079, "step": 108030 }, { "epoch": 0.9122881087585232, "grad_norm": 0.5338274240493774, "learning_rate": 6.608941656480488e-06, "loss": 0.0148, "step": 108040 }, { "epoch": 0.9123725486056871, "grad_norm": 0.30669406056404114, "learning_rate": 6.608243950690769e-06, "loss": 0.0061, "step": 108050 }, { "epoch": 0.912456988452851, "grad_norm": 0.15098978579044342, "learning_rate": 6.60754620997032e-06, "loss": 0.0077, "step": 108060 }, { "epoch": 0.9125414283000147, "grad_norm": 0.4460097849369049, "learning_rate": 6.606848434334298e-06, "loss": 0.0228, "step": 108070 }, { "epoch": 0.9126258681471786, "grad_norm": 0.19754691421985626, "learning_rate": 6.606150623797856e-06, "loss": 0.0061, "step": 108080 }, { "epoch": 0.9127103079943425, "grad_norm": 0.42414388060569763, "learning_rate": 6.605452778376154e-06, "loss": 0.0112, "step": 108090 }, { "epoch": 0.9127947478415064, "grad_norm": 0.5662029385566711, "learning_rate": 6.6047548980843445e-06, "loss": 0.015, "step": 108100 }, { "epoch": 0.9128791876886703, "grad_norm": 0.43100374937057495, "learning_rate": 6.6040569829375876e-06, "loss": 0.0101, "step": 108110 }, { "epoch": 0.9129636275358342, "grad_norm": 0.8523558974266052, "learning_rate": 6.6033590329510425e-06, "loss": 0.0166, "step": 108120 }, { "epoch": 0.913048067382998, "grad_norm": 0.478355348110199, "learning_rate": 6.602661048139868e-06, "loss": 0.0087, "step": 108130 }, { "epoch": 0.9131325072301619, "grad_norm": 0.3668888509273529, "learning_rate": 6.6019630285192225e-06, "loss": 0.01, "step": 108140 }, { "epoch": 0.9132169470773258, "grad_norm": 0.1864466667175293, "learning_rate": 6.60126497410427e-06, "loss": 0.0096, "step": 108150 }, { "epoch": 0.9133013869244897, "grad_norm": 0.5941696166992188, "learning_rate": 6.600566884910169e-06, "loss": 0.0108, "step": 108160 }, { "epoch": 0.9133858267716536, "grad_norm": 1.727952480316162, "learning_rate": 6.599868760952084e-06, "loss": 0.0109, "step": 108170 }, { "epoch": 0.9134702666188175, "grad_norm": 0.17399482429027557, "learning_rate": 6.599170602245177e-06, "loss": 0.0061, "step": 108180 }, { "epoch": 0.9135547064659812, "grad_norm": 0.21272459626197815, "learning_rate": 6.598472408804613e-06, "loss": 0.0081, "step": 108190 }, { "epoch": 0.9136391463131451, "grad_norm": 0.15541382133960724, "learning_rate": 6.597774180645554e-06, "loss": 0.0086, "step": 108200 }, { "epoch": 0.913723586160309, "grad_norm": 0.3947431147098541, "learning_rate": 6.597075917783169e-06, "loss": 0.0097, "step": 108210 }, { "epoch": 0.9138080260074729, "grad_norm": 0.17632140219211578, "learning_rate": 6.596377620232622e-06, "loss": 0.0052, "step": 108220 }, { "epoch": 0.9138924658546368, "grad_norm": 0.09359589219093323, "learning_rate": 6.595679288009081e-06, "loss": 0.0072, "step": 108230 }, { "epoch": 0.9139769057018007, "grad_norm": 0.18486784398555756, "learning_rate": 6.5949809211277115e-06, "loss": 0.0119, "step": 108240 }, { "epoch": 0.9140613455489646, "grad_norm": 0.5530011057853699, "learning_rate": 6.594282519603683e-06, "loss": 0.0102, "step": 108250 }, { "epoch": 0.9141457853961285, "grad_norm": 0.11567816883325577, "learning_rate": 6.593584083452167e-06, "loss": 0.0059, "step": 108260 }, { "epoch": 0.9142302252432923, "grad_norm": 0.4424420893192291, "learning_rate": 6.5928856126883285e-06, "loss": 0.0104, "step": 108270 }, { "epoch": 0.9143146650904562, "grad_norm": 0.15404194593429565, "learning_rate": 6.592187107327341e-06, "loss": 0.0093, "step": 108280 }, { "epoch": 0.9143991049376201, "grad_norm": 0.19938819110393524, "learning_rate": 6.5914885673843785e-06, "loss": 0.0119, "step": 108290 }, { "epoch": 0.9144835447847839, "grad_norm": 0.5124266147613525, "learning_rate": 6.590789992874609e-06, "loss": 0.0091, "step": 108300 }, { "epoch": 0.9145679846319478, "grad_norm": 0.3122204840183258, "learning_rate": 6.590091383813206e-06, "loss": 0.0068, "step": 108310 }, { "epoch": 0.9146524244791117, "grad_norm": 0.4788687527179718, "learning_rate": 6.589392740215345e-06, "loss": 0.0108, "step": 108320 }, { "epoch": 0.9147368643262755, "grad_norm": 0.4067346751689911, "learning_rate": 6.588694062096199e-06, "loss": 0.0096, "step": 108330 }, { "epoch": 0.9148213041734394, "grad_norm": 0.2099061906337738, "learning_rate": 6.587995349470944e-06, "loss": 0.0071, "step": 108340 }, { "epoch": 0.9149057440206033, "grad_norm": 0.40128153562545776, "learning_rate": 6.587296602354756e-06, "loss": 0.0088, "step": 108350 }, { "epoch": 0.9149901838677672, "grad_norm": 0.12750893831253052, "learning_rate": 6.586597820762809e-06, "loss": 0.0111, "step": 108360 }, { "epoch": 0.9150746237149311, "grad_norm": 0.2832711637020111, "learning_rate": 6.585899004710283e-06, "loss": 0.014, "step": 108370 }, { "epoch": 0.915159063562095, "grad_norm": 0.4052233099937439, "learning_rate": 6.585200154212357e-06, "loss": 0.011, "step": 108380 }, { "epoch": 0.9152435034092589, "grad_norm": 0.21870695054531097, "learning_rate": 6.584501269284207e-06, "loss": 0.0072, "step": 108390 }, { "epoch": 0.9153279432564227, "grad_norm": 0.44326019287109375, "learning_rate": 6.583802349941015e-06, "loss": 0.007, "step": 108400 }, { "epoch": 0.9154123831035866, "grad_norm": 0.1928734928369522, "learning_rate": 6.583103396197961e-06, "loss": 0.0092, "step": 108410 }, { "epoch": 0.9154968229507504, "grad_norm": 0.37875446677207947, "learning_rate": 6.582404408070224e-06, "loss": 0.0122, "step": 108420 }, { "epoch": 0.9155812627979143, "grad_norm": 0.3998294770717621, "learning_rate": 6.5817053855729885e-06, "loss": 0.0062, "step": 108430 }, { "epoch": 0.9156657026450782, "grad_norm": 0.5170953273773193, "learning_rate": 6.581006328721435e-06, "loss": 0.0072, "step": 108440 }, { "epoch": 0.9157501424922421, "grad_norm": 0.18368206918239594, "learning_rate": 6.580307237530751e-06, "loss": 0.0132, "step": 108450 }, { "epoch": 0.915834582339406, "grad_norm": 0.4785862863063812, "learning_rate": 6.579608112016116e-06, "loss": 0.011, "step": 108460 }, { "epoch": 0.9159190221865698, "grad_norm": 0.830838143825531, "learning_rate": 6.578908952192716e-06, "loss": 0.0103, "step": 108470 }, { "epoch": 0.9160034620337337, "grad_norm": 0.4671798348426819, "learning_rate": 6.5782097580757375e-06, "loss": 0.0103, "step": 108480 }, { "epoch": 0.9160879018808976, "grad_norm": 0.11907362192869186, "learning_rate": 6.577510529680366e-06, "loss": 0.0094, "step": 108490 }, { "epoch": 0.9161723417280615, "grad_norm": 0.14654572308063507, "learning_rate": 6.57681126702179e-06, "loss": 0.0101, "step": 108500 }, { "epoch": 0.9162567815752254, "grad_norm": 0.2690425217151642, "learning_rate": 6.576111970115195e-06, "loss": 0.0114, "step": 108510 }, { "epoch": 0.9163412214223893, "grad_norm": 0.3314257264137268, "learning_rate": 6.575412638975772e-06, "loss": 0.008, "step": 108520 }, { "epoch": 0.916425661269553, "grad_norm": 0.320937842130661, "learning_rate": 6.574713273618709e-06, "loss": 0.0097, "step": 108530 }, { "epoch": 0.9165101011167169, "grad_norm": 0.9765130877494812, "learning_rate": 6.574013874059198e-06, "loss": 0.0103, "step": 108540 }, { "epoch": 0.9165945409638808, "grad_norm": 0.12511852383613586, "learning_rate": 6.573314440312425e-06, "loss": 0.0109, "step": 108550 }, { "epoch": 0.9166789808110447, "grad_norm": 0.17613506317138672, "learning_rate": 6.572614972393586e-06, "loss": 0.0075, "step": 108560 }, { "epoch": 0.9167634206582086, "grad_norm": 0.59295654296875, "learning_rate": 6.571915470317872e-06, "loss": 0.0079, "step": 108570 }, { "epoch": 0.9168478605053725, "grad_norm": 0.0011496020015329123, "learning_rate": 6.571215934100479e-06, "loss": 0.0059, "step": 108580 }, { "epoch": 0.9169323003525364, "grad_norm": 0.5288728475570679, "learning_rate": 6.570516363756595e-06, "loss": 0.0139, "step": 108590 }, { "epoch": 0.9170167401997003, "grad_norm": 0.28452569246292114, "learning_rate": 6.569816759301417e-06, "loss": 0.0099, "step": 108600 }, { "epoch": 0.9171011800468641, "grad_norm": 0.3225015103816986, "learning_rate": 6.5691171207501415e-06, "loss": 0.0105, "step": 108610 }, { "epoch": 0.917185619894028, "grad_norm": 0.3763274550437927, "learning_rate": 6.568417448117965e-06, "loss": 0.0093, "step": 108620 }, { "epoch": 0.9172700597411919, "grad_norm": 0.5639553070068359, "learning_rate": 6.567717741420081e-06, "loss": 0.0095, "step": 108630 }, { "epoch": 0.9173544995883558, "grad_norm": 0.07663029432296753, "learning_rate": 6.567018000671689e-06, "loss": 0.0126, "step": 108640 }, { "epoch": 0.9174389394355196, "grad_norm": 0.4731111228466034, "learning_rate": 6.5663182258879875e-06, "loss": 0.009, "step": 108650 }, { "epoch": 0.9175233792826835, "grad_norm": 0.31761789321899414, "learning_rate": 6.565618417084176e-06, "loss": 0.0076, "step": 108660 }, { "epoch": 0.9176078191298473, "grad_norm": 0.22851458191871643, "learning_rate": 6.564918574275452e-06, "loss": 0.0144, "step": 108670 }, { "epoch": 0.9176922589770112, "grad_norm": 0.5719241499900818, "learning_rate": 6.564218697477019e-06, "loss": 0.0104, "step": 108680 }, { "epoch": 0.9177766988241751, "grad_norm": 0.37293311953544617, "learning_rate": 6.563518786704074e-06, "loss": 0.0121, "step": 108690 }, { "epoch": 0.917861138671339, "grad_norm": 0.22588665783405304, "learning_rate": 6.562818841971823e-06, "loss": 0.0134, "step": 108700 }, { "epoch": 0.9179455785185029, "grad_norm": 0.3196091949939728, "learning_rate": 6.562118863295468e-06, "loss": 0.0108, "step": 108710 }, { "epoch": 0.9180300183656668, "grad_norm": 0.20577487349510193, "learning_rate": 6.561418850690209e-06, "loss": 0.0069, "step": 108720 }, { "epoch": 0.9181144582128307, "grad_norm": 0.28681084513664246, "learning_rate": 6.560718804171254e-06, "loss": 0.0069, "step": 108730 }, { "epoch": 0.9181988980599946, "grad_norm": 0.15494775772094727, "learning_rate": 6.560018723753806e-06, "loss": 0.0059, "step": 108740 }, { "epoch": 0.9182833379071584, "grad_norm": 0.4059317409992218, "learning_rate": 6.559318609453071e-06, "loss": 0.0077, "step": 108750 }, { "epoch": 0.9183677777543222, "grad_norm": 0.07804979383945465, "learning_rate": 6.558618461284255e-06, "loss": 0.0109, "step": 108760 }, { "epoch": 0.9184522176014861, "grad_norm": 0.23988714814186096, "learning_rate": 6.557918279262565e-06, "loss": 0.0135, "step": 108770 }, { "epoch": 0.91853665744865, "grad_norm": 0.11068028211593628, "learning_rate": 6.557218063403211e-06, "loss": 0.0082, "step": 108780 }, { "epoch": 0.9186210972958139, "grad_norm": 0.26460960507392883, "learning_rate": 6.5565178137213984e-06, "loss": 0.0065, "step": 108790 }, { "epoch": 0.9187055371429778, "grad_norm": 0.34776541590690613, "learning_rate": 6.555817530232338e-06, "loss": 0.0116, "step": 108800 }, { "epoch": 0.9187899769901416, "grad_norm": 0.6407628655433655, "learning_rate": 6.555117212951239e-06, "loss": 0.0098, "step": 108810 }, { "epoch": 0.9188744168373055, "grad_norm": 0.008819032460451126, "learning_rate": 6.554416861893314e-06, "loss": 0.0114, "step": 108820 }, { "epoch": 0.9189588566844694, "grad_norm": 0.5240039229393005, "learning_rate": 6.553716477073775e-06, "loss": 0.0107, "step": 108830 }, { "epoch": 0.9190432965316333, "grad_norm": 0.2322293519973755, "learning_rate": 6.5530160585078295e-06, "loss": 0.0099, "step": 108840 }, { "epoch": 0.9191277363787972, "grad_norm": 0.6708012223243713, "learning_rate": 6.552315606210694e-06, "loss": 0.0159, "step": 108850 }, { "epoch": 0.9192121762259611, "grad_norm": 0.3086610734462738, "learning_rate": 6.551615120197582e-06, "loss": 0.0128, "step": 108860 }, { "epoch": 0.919296616073125, "grad_norm": 0.1581936776638031, "learning_rate": 6.550914600483707e-06, "loss": 0.0115, "step": 108870 }, { "epoch": 0.9193810559202887, "grad_norm": 0.7873698472976685, "learning_rate": 6.550214047084286e-06, "loss": 0.0116, "step": 108880 }, { "epoch": 0.9194654957674526, "grad_norm": 0.14889700710773468, "learning_rate": 6.5495134600145315e-06, "loss": 0.0101, "step": 108890 }, { "epoch": 0.9195499356146165, "grad_norm": 0.32406002283096313, "learning_rate": 6.5488128392896645e-06, "loss": 0.0082, "step": 108900 }, { "epoch": 0.9196343754617804, "grad_norm": 0.31926360726356506, "learning_rate": 6.5481121849249e-06, "loss": 0.0077, "step": 108910 }, { "epoch": 0.9197188153089443, "grad_norm": 0.06929375231266022, "learning_rate": 6.547411496935454e-06, "loss": 0.0066, "step": 108920 }, { "epoch": 0.9198032551561082, "grad_norm": 0.1138111799955368, "learning_rate": 6.5467107753365475e-06, "loss": 0.0108, "step": 108930 }, { "epoch": 0.919887695003272, "grad_norm": 0.3163425922393799, "learning_rate": 6.546010020143402e-06, "loss": 0.0077, "step": 108940 }, { "epoch": 0.9199721348504359, "grad_norm": 0.13924802839756012, "learning_rate": 6.545309231371235e-06, "loss": 0.0114, "step": 108950 }, { "epoch": 0.9200565746975998, "grad_norm": 0.193409726023674, "learning_rate": 6.544608409035268e-06, "loss": 0.006, "step": 108960 }, { "epoch": 0.9201410145447637, "grad_norm": 0.4362344443798065, "learning_rate": 6.543907553150723e-06, "loss": 0.0112, "step": 108970 }, { "epoch": 0.9202254543919276, "grad_norm": 0.6163833141326904, "learning_rate": 6.54320666373282e-06, "loss": 0.0108, "step": 108980 }, { "epoch": 0.9203098942390914, "grad_norm": 0.2892196476459503, "learning_rate": 6.5425057407967875e-06, "loss": 0.015, "step": 108990 }, { "epoch": 0.9203943340862553, "grad_norm": 0.0734686478972435, "learning_rate": 6.5418047843578445e-06, "loss": 0.0065, "step": 109000 }, { "epoch": 0.9204787739334191, "grad_norm": 0.23746377229690552, "learning_rate": 6.5411037944312185e-06, "loss": 0.0077, "step": 109010 }, { "epoch": 0.920563213780583, "grad_norm": 0.20436741411685944, "learning_rate": 6.540402771032134e-06, "loss": 0.0126, "step": 109020 }, { "epoch": 0.9206476536277469, "grad_norm": 0.09117918461561203, "learning_rate": 6.539701714175816e-06, "loss": 0.0057, "step": 109030 }, { "epoch": 0.9207320934749108, "grad_norm": 0.0960191860795021, "learning_rate": 6.539000623877494e-06, "loss": 0.0086, "step": 109040 }, { "epoch": 0.9208165333220747, "grad_norm": 0.4036126434803009, "learning_rate": 6.538299500152392e-06, "loss": 0.0091, "step": 109050 }, { "epoch": 0.9209009731692386, "grad_norm": 0.5363645553588867, "learning_rate": 6.537598343015742e-06, "loss": 0.0073, "step": 109060 }, { "epoch": 0.9209854130164025, "grad_norm": 0.47073736786842346, "learning_rate": 6.5368971524827705e-06, "loss": 0.0061, "step": 109070 }, { "epoch": 0.9210698528635664, "grad_norm": 0.6832209229469299, "learning_rate": 6.536195928568707e-06, "loss": 0.007, "step": 109080 }, { "epoch": 0.9211542927107302, "grad_norm": 0.47206923365592957, "learning_rate": 6.5354946712887825e-06, "loss": 0.0147, "step": 109090 }, { "epoch": 0.9212387325578941, "grad_norm": 0.16437524557113647, "learning_rate": 6.534793380658228e-06, "loss": 0.0088, "step": 109100 }, { "epoch": 0.9213231724050579, "grad_norm": 0.26551318168640137, "learning_rate": 6.534092056692277e-06, "loss": 0.0086, "step": 109110 }, { "epoch": 0.9214076122522218, "grad_norm": 0.47547203302383423, "learning_rate": 6.533390699406161e-06, "loss": 0.0136, "step": 109120 }, { "epoch": 0.9214920520993857, "grad_norm": 0.21585072576999664, "learning_rate": 6.532689308815112e-06, "loss": 0.0061, "step": 109130 }, { "epoch": 0.9215764919465496, "grad_norm": 0.0937342494726181, "learning_rate": 6.531987884934365e-06, "loss": 0.0135, "step": 109140 }, { "epoch": 0.9216609317937134, "grad_norm": 0.35807108879089355, "learning_rate": 6.531286427779157e-06, "loss": 0.0214, "step": 109150 }, { "epoch": 0.9217453716408773, "grad_norm": 1.0834048986434937, "learning_rate": 6.530584937364721e-06, "loss": 0.0083, "step": 109160 }, { "epoch": 0.9218298114880412, "grad_norm": 0.26010337471961975, "learning_rate": 6.529883413706295e-06, "loss": 0.0076, "step": 109170 }, { "epoch": 0.9219142513352051, "grad_norm": 0.9084357023239136, "learning_rate": 6.5291818568191126e-06, "loss": 0.0185, "step": 109180 }, { "epoch": 0.921998691182369, "grad_norm": 0.10808374732732773, "learning_rate": 6.528480266718414e-06, "loss": 0.0077, "step": 109190 }, { "epoch": 0.9220831310295329, "grad_norm": 0.2621162533760071, "learning_rate": 6.527778643419438e-06, "loss": 0.0077, "step": 109200 }, { "epoch": 0.9221675708766968, "grad_norm": 0.2728692293167114, "learning_rate": 6.527076986937421e-06, "loss": 0.0176, "step": 109210 }, { "epoch": 0.9222520107238605, "grad_norm": 0.6137868762016296, "learning_rate": 6.526375297287607e-06, "loss": 0.0098, "step": 109220 }, { "epoch": 0.9223364505710244, "grad_norm": 0.3189034163951874, "learning_rate": 6.525673574485234e-06, "loss": 0.008, "step": 109230 }, { "epoch": 0.9224208904181883, "grad_norm": 0.5059359073638916, "learning_rate": 6.524971818545542e-06, "loss": 0.0067, "step": 109240 }, { "epoch": 0.9225053302653522, "grad_norm": 0.28616082668304443, "learning_rate": 6.524270029483777e-06, "loss": 0.0099, "step": 109250 }, { "epoch": 0.9225897701125161, "grad_norm": 0.27911117672920227, "learning_rate": 6.523568207315178e-06, "loss": 0.0109, "step": 109260 }, { "epoch": 0.92267420995968, "grad_norm": 0.3539985120296478, "learning_rate": 6.522866352054989e-06, "loss": 0.0116, "step": 109270 }, { "epoch": 0.9227586498068439, "grad_norm": 0.3526216447353363, "learning_rate": 6.522164463718457e-06, "loss": 0.0178, "step": 109280 }, { "epoch": 0.9228430896540077, "grad_norm": 0.32045266032218933, "learning_rate": 6.521462542320823e-06, "loss": 0.0063, "step": 109290 }, { "epoch": 0.9229275295011716, "grad_norm": 0.33829429745674133, "learning_rate": 6.520760587877335e-06, "loss": 0.006, "step": 109300 }, { "epoch": 0.9230119693483355, "grad_norm": 0.20214146375656128, "learning_rate": 6.520058600403239e-06, "loss": 0.0102, "step": 109310 }, { "epoch": 0.9230964091954994, "grad_norm": 0.2827816605567932, "learning_rate": 6.519356579913781e-06, "loss": 0.0064, "step": 109320 }, { "epoch": 0.9231808490426633, "grad_norm": 0.33452025055885315, "learning_rate": 6.518654526424209e-06, "loss": 0.0117, "step": 109330 }, { "epoch": 0.9232652888898271, "grad_norm": 0.2141272872686386, "learning_rate": 6.517952439949773e-06, "loss": 0.0077, "step": 109340 }, { "epoch": 0.923349728736991, "grad_norm": 0.12857887148857117, "learning_rate": 6.517250320505721e-06, "loss": 0.0069, "step": 109350 }, { "epoch": 0.9234341685841548, "grad_norm": 0.38124141097068787, "learning_rate": 6.516548168107302e-06, "loss": 0.0179, "step": 109360 }, { "epoch": 0.9235186084313187, "grad_norm": 0.3703010678291321, "learning_rate": 6.515845982769767e-06, "loss": 0.0103, "step": 109370 }, { "epoch": 0.9236030482784826, "grad_norm": 0.2790249288082123, "learning_rate": 6.515143764508369e-06, "loss": 0.0073, "step": 109380 }, { "epoch": 0.9236874881256465, "grad_norm": 0.009637157432734966, "learning_rate": 6.51444151333836e-06, "loss": 0.0141, "step": 109390 }, { "epoch": 0.9237719279728104, "grad_norm": 0.21356101334095, "learning_rate": 6.51373922927499e-06, "loss": 0.006, "step": 109400 }, { "epoch": 0.9238563678199743, "grad_norm": 0.23707382380962372, "learning_rate": 6.513036912333516e-06, "loss": 0.009, "step": 109410 }, { "epoch": 0.9239408076671382, "grad_norm": 0.2633890211582184, "learning_rate": 6.512334562529188e-06, "loss": 0.0131, "step": 109420 }, { "epoch": 0.924025247514302, "grad_norm": 0.18338239192962646, "learning_rate": 6.511632179877264e-06, "loss": 0.0119, "step": 109430 }, { "epoch": 0.9241096873614659, "grad_norm": 0.4876410961151123, "learning_rate": 6.510929764392999e-06, "loss": 0.0122, "step": 109440 }, { "epoch": 0.9241941272086297, "grad_norm": 0.671252965927124, "learning_rate": 6.510227316091649e-06, "loss": 0.009, "step": 109450 }, { "epoch": 0.9242785670557936, "grad_norm": 0.4588720500469208, "learning_rate": 6.509524834988472e-06, "loss": 0.0123, "step": 109460 }, { "epoch": 0.9243630069029575, "grad_norm": 0.2525959014892578, "learning_rate": 6.508822321098724e-06, "loss": 0.0045, "step": 109470 }, { "epoch": 0.9244474467501214, "grad_norm": 0.5485168695449829, "learning_rate": 6.508119774437665e-06, "loss": 0.01, "step": 109480 }, { "epoch": 0.9245318865972852, "grad_norm": 0.7438519597053528, "learning_rate": 6.507417195020552e-06, "loss": 0.0147, "step": 109490 }, { "epoch": 0.9246163264444491, "grad_norm": 0.7118016481399536, "learning_rate": 6.506714582862648e-06, "loss": 0.0083, "step": 109500 }, { "epoch": 0.924700766291613, "grad_norm": 1.0113985538482666, "learning_rate": 6.506011937979212e-06, "loss": 0.0166, "step": 109510 }, { "epoch": 0.9247852061387769, "grad_norm": 0.27902668714523315, "learning_rate": 6.505309260385505e-06, "loss": 0.0116, "step": 109520 }, { "epoch": 0.9248696459859408, "grad_norm": 0.22611366212368011, "learning_rate": 6.504606550096789e-06, "loss": 0.0086, "step": 109530 }, { "epoch": 0.9249540858331047, "grad_norm": 0.10634566098451614, "learning_rate": 6.503903807128328e-06, "loss": 0.0055, "step": 109540 }, { "epoch": 0.9250385256802686, "grad_norm": 0.0393415167927742, "learning_rate": 6.503201031495383e-06, "loss": 0.0086, "step": 109550 }, { "epoch": 0.9251229655274325, "grad_norm": 0.34411856532096863, "learning_rate": 6.50249822321322e-06, "loss": 0.0135, "step": 109560 }, { "epoch": 0.9252074053745962, "grad_norm": 0.926590621471405, "learning_rate": 6.501795382297105e-06, "loss": 0.0099, "step": 109570 }, { "epoch": 0.9252918452217601, "grad_norm": 0.10843874514102936, "learning_rate": 6.5010925087623e-06, "loss": 0.009, "step": 109580 }, { "epoch": 0.925376285068924, "grad_norm": 0.18751809000968933, "learning_rate": 6.500389602624074e-06, "loss": 0.0109, "step": 109590 }, { "epoch": 0.9254607249160879, "grad_norm": 0.8244180083274841, "learning_rate": 6.499686663897695e-06, "loss": 0.0165, "step": 109600 }, { "epoch": 0.9255451647632518, "grad_norm": 0.3519943952560425, "learning_rate": 6.498983692598426e-06, "loss": 0.0114, "step": 109610 }, { "epoch": 0.9256296046104157, "grad_norm": 0.2968895733356476, "learning_rate": 6.4982806887415404e-06, "loss": 0.0087, "step": 109620 }, { "epoch": 0.9257140444575795, "grad_norm": 0.27521952986717224, "learning_rate": 6.497577652342304e-06, "loss": 0.0078, "step": 109630 }, { "epoch": 0.9257984843047434, "grad_norm": 0.45383521914482117, "learning_rate": 6.49687458341599e-06, "loss": 0.0118, "step": 109640 }, { "epoch": 0.9258829241519073, "grad_norm": 0.46755605936050415, "learning_rate": 6.4961714819778664e-06, "loss": 0.0103, "step": 109650 }, { "epoch": 0.9259673639990712, "grad_norm": 0.39376962184906006, "learning_rate": 6.495468348043204e-06, "loss": 0.0128, "step": 109660 }, { "epoch": 0.9260518038462351, "grad_norm": 0.558729887008667, "learning_rate": 6.4947651816272735e-06, "loss": 0.0141, "step": 109670 }, { "epoch": 0.9261362436933989, "grad_norm": 0.2216980755329132, "learning_rate": 6.494061982745352e-06, "loss": 0.0114, "step": 109680 }, { "epoch": 0.9262206835405628, "grad_norm": 0.11421681195497513, "learning_rate": 6.49335875141271e-06, "loss": 0.0054, "step": 109690 }, { "epoch": 0.9263051233877266, "grad_norm": 0.0892719179391861, "learning_rate": 6.492655487644621e-06, "loss": 0.0085, "step": 109700 }, { "epoch": 0.9263895632348905, "grad_norm": 0.18221701681613922, "learning_rate": 6.491952191456361e-06, "loss": 0.0104, "step": 109710 }, { "epoch": 0.9264740030820544, "grad_norm": 0.14672063291072845, "learning_rate": 6.491248862863206e-06, "loss": 0.0105, "step": 109720 }, { "epoch": 0.9265584429292183, "grad_norm": 0.5226982235908508, "learning_rate": 6.490545501880432e-06, "loss": 0.013, "step": 109730 }, { "epoch": 0.9266428827763822, "grad_norm": 0.39383354783058167, "learning_rate": 6.489842108523313e-06, "loss": 0.0175, "step": 109740 }, { "epoch": 0.9267273226235461, "grad_norm": 1.038385272026062, "learning_rate": 6.48913868280713e-06, "loss": 0.012, "step": 109750 }, { "epoch": 0.92681176247071, "grad_norm": 1.2280784845352173, "learning_rate": 6.4884352247471604e-06, "loss": 0.0121, "step": 109760 }, { "epoch": 0.9268962023178738, "grad_norm": 0.32513657212257385, "learning_rate": 6.487731734358682e-06, "loss": 0.0073, "step": 109770 }, { "epoch": 0.9269806421650377, "grad_norm": 0.2952912747859955, "learning_rate": 6.487028211656975e-06, "loss": 0.0037, "step": 109780 }, { "epoch": 0.9270650820122016, "grad_norm": 0.25747695565223694, "learning_rate": 6.48632465665732e-06, "loss": 0.0086, "step": 109790 }, { "epoch": 0.9271495218593654, "grad_norm": 0.014484462328255177, "learning_rate": 6.485621069374999e-06, "loss": 0.0062, "step": 109800 }, { "epoch": 0.9272339617065293, "grad_norm": 0.23008663952350616, "learning_rate": 6.484917449825291e-06, "loss": 0.0157, "step": 109810 }, { "epoch": 0.9273184015536932, "grad_norm": 0.20282699167728424, "learning_rate": 6.4842137980234806e-06, "loss": 0.0093, "step": 109820 }, { "epoch": 0.927402841400857, "grad_norm": 0.3417559564113617, "learning_rate": 6.483510113984852e-06, "loss": 0.011, "step": 109830 }, { "epoch": 0.9274872812480209, "grad_norm": 0.32766517996788025, "learning_rate": 6.482806397724688e-06, "loss": 0.0114, "step": 109840 }, { "epoch": 0.9275717210951848, "grad_norm": 0.06439667195081711, "learning_rate": 6.4821026492582704e-06, "loss": 0.0108, "step": 109850 }, { "epoch": 0.9276561609423487, "grad_norm": 0.45612064003944397, "learning_rate": 6.481398868600889e-06, "loss": 0.0087, "step": 109860 }, { "epoch": 0.9277406007895126, "grad_norm": 0.4547039568424225, "learning_rate": 6.480695055767827e-06, "loss": 0.0114, "step": 109870 }, { "epoch": 0.9278250406366765, "grad_norm": 0.3435575067996979, "learning_rate": 6.479991210774373e-06, "loss": 0.0062, "step": 109880 }, { "epoch": 0.9279094804838404, "grad_norm": 0.3804864287376404, "learning_rate": 6.479287333635813e-06, "loss": 0.0122, "step": 109890 }, { "epoch": 0.9279939203310043, "grad_norm": 0.8944945931434631, "learning_rate": 6.478583424367434e-06, "loss": 0.0155, "step": 109900 }, { "epoch": 0.928078360178168, "grad_norm": 0.5238696932792664, "learning_rate": 6.477879482984526e-06, "loss": 0.0198, "step": 109910 }, { "epoch": 0.9281628000253319, "grad_norm": 0.2182907909154892, "learning_rate": 6.477175509502378e-06, "loss": 0.0095, "step": 109920 }, { "epoch": 0.9282472398724958, "grad_norm": 0.07826527208089828, "learning_rate": 6.4764715039362825e-06, "loss": 0.0154, "step": 109930 }, { "epoch": 0.9283316797196597, "grad_norm": 0.2877972722053528, "learning_rate": 6.475767466301528e-06, "loss": 0.0114, "step": 109940 }, { "epoch": 0.9284161195668236, "grad_norm": 0.10106315463781357, "learning_rate": 6.475063396613406e-06, "loss": 0.008, "step": 109950 }, { "epoch": 0.9285005594139875, "grad_norm": 0.6035320162773132, "learning_rate": 6.4743592948872096e-06, "loss": 0.0119, "step": 109960 }, { "epoch": 0.9285849992611513, "grad_norm": 0.2862548828125, "learning_rate": 6.4736551611382305e-06, "loss": 0.0091, "step": 109970 }, { "epoch": 0.9286694391083152, "grad_norm": 0.3683410584926605, "learning_rate": 6.472950995381764e-06, "loss": 0.0091, "step": 109980 }, { "epoch": 0.9287538789554791, "grad_norm": 0.15837544202804565, "learning_rate": 6.472246797633104e-06, "loss": 0.0101, "step": 109990 }, { "epoch": 0.928838318802643, "grad_norm": 0.31146126985549927, "learning_rate": 6.471542567907545e-06, "loss": 0.0152, "step": 110000 }, { "epoch": 0.9289227586498069, "grad_norm": 0.5842121839523315, "learning_rate": 6.470838306220384e-06, "loss": 0.0099, "step": 110010 }, { "epoch": 0.9290071984969707, "grad_norm": 0.09068526327610016, "learning_rate": 6.470134012586915e-06, "loss": 0.0077, "step": 110020 }, { "epoch": 0.9290916383441346, "grad_norm": 0.09944318234920502, "learning_rate": 6.469429687022437e-06, "loss": 0.0048, "step": 110030 }, { "epoch": 0.9291760781912984, "grad_norm": 0.16142654418945312, "learning_rate": 6.468725329542247e-06, "loss": 0.0054, "step": 110040 }, { "epoch": 0.9292605180384623, "grad_norm": 0.08345610648393631, "learning_rate": 6.468020940161644e-06, "loss": 0.0104, "step": 110050 }, { "epoch": 0.9293449578856262, "grad_norm": 0.132812038064003, "learning_rate": 6.467316518895928e-06, "loss": 0.0052, "step": 110060 }, { "epoch": 0.9294293977327901, "grad_norm": 0.17965589463710785, "learning_rate": 6.466612065760396e-06, "loss": 0.0167, "step": 110070 }, { "epoch": 0.929513837579954, "grad_norm": 0.1979871690273285, "learning_rate": 6.465907580770352e-06, "loss": 0.0107, "step": 110080 }, { "epoch": 0.9295982774271179, "grad_norm": 0.34182208776474, "learning_rate": 6.465203063941094e-06, "loss": 0.0087, "step": 110090 }, { "epoch": 0.9296827172742818, "grad_norm": 0.19129937887191772, "learning_rate": 6.464498515287927e-06, "loss": 0.0092, "step": 110100 }, { "epoch": 0.9297671571214456, "grad_norm": 0.15935580432415009, "learning_rate": 6.463793934826151e-06, "loss": 0.0095, "step": 110110 }, { "epoch": 0.9298515969686095, "grad_norm": 0.32986968755722046, "learning_rate": 6.463089322571073e-06, "loss": 0.017, "step": 110120 }, { "epoch": 0.9299360368157734, "grad_norm": 0.6718010306358337, "learning_rate": 6.462384678537994e-06, "loss": 0.0111, "step": 110130 }, { "epoch": 0.9300204766629372, "grad_norm": 0.26270827651023865, "learning_rate": 6.461680002742219e-06, "loss": 0.0074, "step": 110140 }, { "epoch": 0.9301049165101011, "grad_norm": 0.4464406669139862, "learning_rate": 6.460975295199055e-06, "loss": 0.0098, "step": 110150 }, { "epoch": 0.930189356357265, "grad_norm": 0.5374669432640076, "learning_rate": 6.4602705559238044e-06, "loss": 0.0138, "step": 110160 }, { "epoch": 0.9302737962044288, "grad_norm": 0.2577529847621918, "learning_rate": 6.459565784931778e-06, "loss": 0.0125, "step": 110170 }, { "epoch": 0.9303582360515927, "grad_norm": 0.33842232823371887, "learning_rate": 6.458860982238282e-06, "loss": 0.009, "step": 110180 }, { "epoch": 0.9304426758987566, "grad_norm": 0.540622889995575, "learning_rate": 6.458156147858624e-06, "loss": 0.0072, "step": 110190 }, { "epoch": 0.9305271157459205, "grad_norm": 0.03513916954398155, "learning_rate": 6.457451281808114e-06, "loss": 0.0124, "step": 110200 }, { "epoch": 0.9306115555930844, "grad_norm": 0.20869840681552887, "learning_rate": 6.456746384102061e-06, "loss": 0.0095, "step": 110210 }, { "epoch": 0.9306959954402483, "grad_norm": 0.5224153995513916, "learning_rate": 6.456041454755774e-06, "loss": 0.0205, "step": 110220 }, { "epoch": 0.9307804352874122, "grad_norm": 0.23231413960456848, "learning_rate": 6.455336493784565e-06, "loss": 0.0107, "step": 110230 }, { "epoch": 0.930864875134576, "grad_norm": 0.22090911865234375, "learning_rate": 6.454631501203748e-06, "loss": 0.011, "step": 110240 }, { "epoch": 0.9309493149817398, "grad_norm": 0.4105550944805145, "learning_rate": 6.45392647702863e-06, "loss": 0.011, "step": 110250 }, { "epoch": 0.9310337548289037, "grad_norm": 0.055794257670640945, "learning_rate": 6.4532214212745285e-06, "loss": 0.0075, "step": 110260 }, { "epoch": 0.9311181946760676, "grad_norm": 0.2965606153011322, "learning_rate": 6.4525163339567545e-06, "loss": 0.0094, "step": 110270 }, { "epoch": 0.9312026345232315, "grad_norm": 0.5055829882621765, "learning_rate": 6.451811215090622e-06, "loss": 0.0172, "step": 110280 }, { "epoch": 0.9312870743703954, "grad_norm": 0.6504009366035461, "learning_rate": 6.451106064691451e-06, "loss": 0.012, "step": 110290 }, { "epoch": 0.9313715142175593, "grad_norm": 0.23730263113975525, "learning_rate": 6.450400882774551e-06, "loss": 0.0111, "step": 110300 }, { "epoch": 0.9314559540647231, "grad_norm": 0.11702865362167358, "learning_rate": 6.449695669355241e-06, "loss": 0.0076, "step": 110310 }, { "epoch": 0.931540393911887, "grad_norm": 0.20864808559417725, "learning_rate": 6.448990424448839e-06, "loss": 0.0071, "step": 110320 }, { "epoch": 0.9316248337590509, "grad_norm": 0.4356013536453247, "learning_rate": 6.448285148070662e-06, "loss": 0.0063, "step": 110330 }, { "epoch": 0.9317092736062148, "grad_norm": 0.4991340935230255, "learning_rate": 6.447579840236028e-06, "loss": 0.0116, "step": 110340 }, { "epoch": 0.9317937134533787, "grad_norm": 0.013284157030284405, "learning_rate": 6.446874500960256e-06, "loss": 0.014, "step": 110350 }, { "epoch": 0.9318781533005426, "grad_norm": 0.17159660160541534, "learning_rate": 6.4461691302586684e-06, "loss": 0.0082, "step": 110360 }, { "epoch": 0.9319625931477064, "grad_norm": 0.23136241734027863, "learning_rate": 6.4454637281465825e-06, "loss": 0.0071, "step": 110370 }, { "epoch": 0.9320470329948702, "grad_norm": 0.4455522298812866, "learning_rate": 6.44475829463932e-06, "loss": 0.0129, "step": 110380 }, { "epoch": 0.9321314728420341, "grad_norm": 0.45838662981987, "learning_rate": 6.444052829752203e-06, "loss": 0.0105, "step": 110390 }, { "epoch": 0.932215912689198, "grad_norm": 0.22432366013526917, "learning_rate": 6.443347333500556e-06, "loss": 0.0073, "step": 110400 }, { "epoch": 0.9323003525363619, "grad_norm": 0.27172356843948364, "learning_rate": 6.4426418058997e-06, "loss": 0.0083, "step": 110410 }, { "epoch": 0.9323847923835258, "grad_norm": 0.34486842155456543, "learning_rate": 6.441936246964959e-06, "loss": 0.0079, "step": 110420 }, { "epoch": 0.9324692322306897, "grad_norm": 0.6530598402023315, "learning_rate": 6.441230656711659e-06, "loss": 0.01, "step": 110430 }, { "epoch": 0.9325536720778536, "grad_norm": 0.24239447712898254, "learning_rate": 6.440525035155123e-06, "loss": 0.0096, "step": 110440 }, { "epoch": 0.9326381119250174, "grad_norm": 0.511038064956665, "learning_rate": 6.4398193823106815e-06, "loss": 0.0064, "step": 110450 }, { "epoch": 0.9327225517721813, "grad_norm": 0.3643496632575989, "learning_rate": 6.439113698193656e-06, "loss": 0.0089, "step": 110460 }, { "epoch": 0.9328069916193452, "grad_norm": 0.35496044158935547, "learning_rate": 6.438407982819376e-06, "loss": 0.0107, "step": 110470 }, { "epoch": 0.932891431466509, "grad_norm": 0.2533223032951355, "learning_rate": 6.43770223620317e-06, "loss": 0.0082, "step": 110480 }, { "epoch": 0.9329758713136729, "grad_norm": 0.4264596104621887, "learning_rate": 6.436996458360364e-06, "loss": 0.0059, "step": 110490 }, { "epoch": 0.9330603111608368, "grad_norm": 0.8252102136611938, "learning_rate": 6.436290649306291e-06, "loss": 0.0169, "step": 110500 }, { "epoch": 0.9331447510080007, "grad_norm": 0.12264814972877502, "learning_rate": 6.435584809056279e-06, "loss": 0.0079, "step": 110510 }, { "epoch": 0.9332291908551645, "grad_norm": 0.19479474425315857, "learning_rate": 6.434878937625659e-06, "loss": 0.0103, "step": 110520 }, { "epoch": 0.9333136307023284, "grad_norm": 0.05804967135190964, "learning_rate": 6.434173035029765e-06, "loss": 0.0114, "step": 110530 }, { "epoch": 0.9333980705494923, "grad_norm": 0.23671838641166687, "learning_rate": 6.433467101283923e-06, "loss": 0.0088, "step": 110540 }, { "epoch": 0.9334825103966562, "grad_norm": 0.413578063249588, "learning_rate": 6.4327611364034715e-06, "loss": 0.0126, "step": 110550 }, { "epoch": 0.9335669502438201, "grad_norm": 0.7792186737060547, "learning_rate": 6.43205514040374e-06, "loss": 0.0111, "step": 110560 }, { "epoch": 0.933651390090984, "grad_norm": 0.4672778844833374, "learning_rate": 6.431349113300066e-06, "loss": 0.0227, "step": 110570 }, { "epoch": 0.9337358299381479, "grad_norm": 0.6471349596977234, "learning_rate": 6.4306430551077815e-06, "loss": 0.0086, "step": 110580 }, { "epoch": 0.9338202697853117, "grad_norm": 0.46974682807922363, "learning_rate": 6.429936965842224e-06, "loss": 0.0154, "step": 110590 }, { "epoch": 0.9339047096324755, "grad_norm": 0.10395459085702896, "learning_rate": 6.429230845518728e-06, "loss": 0.0076, "step": 110600 }, { "epoch": 0.9339891494796394, "grad_norm": 0.3415612280368805, "learning_rate": 6.428524694152631e-06, "loss": 0.0121, "step": 110610 }, { "epoch": 0.9340735893268033, "grad_norm": 0.36455240845680237, "learning_rate": 6.427818511759271e-06, "loss": 0.0081, "step": 110620 }, { "epoch": 0.9341580291739672, "grad_norm": 0.42944639921188354, "learning_rate": 6.427112298353984e-06, "loss": 0.0181, "step": 110630 }, { "epoch": 0.9342424690211311, "grad_norm": 0.3374834954738617, "learning_rate": 6.42640605395211e-06, "loss": 0.0146, "step": 110640 }, { "epoch": 0.934326908868295, "grad_norm": 0.28338897228240967, "learning_rate": 6.42569977856899e-06, "loss": 0.0116, "step": 110650 }, { "epoch": 0.9344113487154588, "grad_norm": 0.2912028133869171, "learning_rate": 6.424993472219964e-06, "loss": 0.0183, "step": 110660 }, { "epoch": 0.9344957885626227, "grad_norm": 0.17194488644599915, "learning_rate": 6.4242871349203705e-06, "loss": 0.0086, "step": 110670 }, { "epoch": 0.9345802284097866, "grad_norm": 0.12486400455236435, "learning_rate": 6.423580766685552e-06, "loss": 0.0094, "step": 110680 }, { "epoch": 0.9346646682569505, "grad_norm": 0.06392921507358551, "learning_rate": 6.4228743675308515e-06, "loss": 0.0038, "step": 110690 }, { "epoch": 0.9347491081041144, "grad_norm": 0.2117244154214859, "learning_rate": 6.422167937471612e-06, "loss": 0.0072, "step": 110700 }, { "epoch": 0.9348335479512782, "grad_norm": 0.22005073726177216, "learning_rate": 6.421461476523176e-06, "loss": 0.0129, "step": 110710 }, { "epoch": 0.934917987798442, "grad_norm": 0.5361408591270447, "learning_rate": 6.4207549847008865e-06, "loss": 0.0099, "step": 110720 }, { "epoch": 0.9350024276456059, "grad_norm": 0.35230061411857605, "learning_rate": 6.420048462020091e-06, "loss": 0.0122, "step": 110730 }, { "epoch": 0.9350868674927698, "grad_norm": 0.1864890158176422, "learning_rate": 6.419341908496135e-06, "loss": 0.0114, "step": 110740 }, { "epoch": 0.9351713073399337, "grad_norm": 0.1110161617398262, "learning_rate": 6.418635324144362e-06, "loss": 0.0083, "step": 110750 }, { "epoch": 0.9352557471870976, "grad_norm": 0.7655223608016968, "learning_rate": 6.41792870898012e-06, "loss": 0.0112, "step": 110760 }, { "epoch": 0.9353401870342615, "grad_norm": 0.3453948199748993, "learning_rate": 6.417222063018758e-06, "loss": 0.0099, "step": 110770 }, { "epoch": 0.9354246268814254, "grad_norm": 0.3466417193412781, "learning_rate": 6.416515386275624e-06, "loss": 0.0071, "step": 110780 }, { "epoch": 0.9355090667285892, "grad_norm": 0.2654336988925934, "learning_rate": 6.415808678766064e-06, "loss": 0.0092, "step": 110790 }, { "epoch": 0.9355935065757531, "grad_norm": 0.3473576307296753, "learning_rate": 6.415101940505432e-06, "loss": 0.0112, "step": 110800 }, { "epoch": 0.935677946422917, "grad_norm": 0.15188054740428925, "learning_rate": 6.414395171509074e-06, "loss": 0.0066, "step": 110810 }, { "epoch": 0.9357623862700809, "grad_norm": 0.1119435504078865, "learning_rate": 6.413688371792344e-06, "loss": 0.0053, "step": 110820 }, { "epoch": 0.9358468261172447, "grad_norm": 0.054109837859869, "learning_rate": 6.412981541370592e-06, "loss": 0.0091, "step": 110830 }, { "epoch": 0.9359312659644086, "grad_norm": 0.33704715967178345, "learning_rate": 6.412274680259169e-06, "loss": 0.0095, "step": 110840 }, { "epoch": 0.9360157058115725, "grad_norm": 0.1022304967045784, "learning_rate": 6.411567788473431e-06, "loss": 0.0119, "step": 110850 }, { "epoch": 0.9361001456587363, "grad_norm": 0.1947742998600006, "learning_rate": 6.41086086602873e-06, "loss": 0.0084, "step": 110860 }, { "epoch": 0.9361845855059002, "grad_norm": 0.14233268797397614, "learning_rate": 6.410153912940419e-06, "loss": 0.0075, "step": 110870 }, { "epoch": 0.9362690253530641, "grad_norm": 0.34402918815612793, "learning_rate": 6.409446929223855e-06, "loss": 0.0105, "step": 110880 }, { "epoch": 0.936353465200228, "grad_norm": 0.2571024000644684, "learning_rate": 6.4087399148943904e-06, "loss": 0.0076, "step": 110890 }, { "epoch": 0.9364379050473919, "grad_norm": 0.24532394111156464, "learning_rate": 6.408032869967387e-06, "loss": 0.0121, "step": 110900 }, { "epoch": 0.9365223448945558, "grad_norm": 0.2045402228832245, "learning_rate": 6.4073257944581955e-06, "loss": 0.005, "step": 110910 }, { "epoch": 0.9366067847417197, "grad_norm": 0.1876845508813858, "learning_rate": 6.406618688382177e-06, "loss": 0.0083, "step": 110920 }, { "epoch": 0.9366912245888835, "grad_norm": 0.19992434978485107, "learning_rate": 6.405911551754688e-06, "loss": 0.0072, "step": 110930 }, { "epoch": 0.9367756644360473, "grad_norm": 0.07369889318943024, "learning_rate": 6.405204384591089e-06, "loss": 0.0074, "step": 110940 }, { "epoch": 0.9368601042832112, "grad_norm": 0.47596317529678345, "learning_rate": 6.4044971869067395e-06, "loss": 0.0096, "step": 110950 }, { "epoch": 0.9369445441303751, "grad_norm": 0.34537190198898315, "learning_rate": 6.403789958716998e-06, "loss": 0.0121, "step": 110960 }, { "epoch": 0.937028983977539, "grad_norm": 0.296872615814209, "learning_rate": 6.403082700037226e-06, "loss": 0.0064, "step": 110970 }, { "epoch": 0.9371134238247029, "grad_norm": 0.2565164864063263, "learning_rate": 6.4023754108827865e-06, "loss": 0.0134, "step": 110980 }, { "epoch": 0.9371978636718667, "grad_norm": 0.20921121537685394, "learning_rate": 6.40166809126904e-06, "loss": 0.0127, "step": 110990 }, { "epoch": 0.9372823035190306, "grad_norm": 0.19079448282718658, "learning_rate": 6.400960741211348e-06, "loss": 0.0115, "step": 111000 }, { "epoch": 0.9373667433661945, "grad_norm": 0.02632174827158451, "learning_rate": 6.400253360725077e-06, "loss": 0.0067, "step": 111010 }, { "epoch": 0.9374511832133584, "grad_norm": 0.11755533516407013, "learning_rate": 6.39954594982559e-06, "loss": 0.0131, "step": 111020 }, { "epoch": 0.9375356230605223, "grad_norm": 0.3032389283180237, "learning_rate": 6.398838508528252e-06, "loss": 0.0107, "step": 111030 }, { "epoch": 0.9376200629076862, "grad_norm": 0.2572709321975708, "learning_rate": 6.398131036848428e-06, "loss": 0.0112, "step": 111040 }, { "epoch": 0.9377045027548501, "grad_norm": 0.882549524307251, "learning_rate": 6.3974235348014846e-06, "loss": 0.0129, "step": 111050 }, { "epoch": 0.9377889426020138, "grad_norm": 0.5531227588653564, "learning_rate": 6.396716002402788e-06, "loss": 0.0079, "step": 111060 }, { "epoch": 0.9378733824491777, "grad_norm": 0.25059419870376587, "learning_rate": 6.396008439667708e-06, "loss": 0.0084, "step": 111070 }, { "epoch": 0.9379578222963416, "grad_norm": 0.33832642436027527, "learning_rate": 6.395300846611608e-06, "loss": 0.0117, "step": 111080 }, { "epoch": 0.9380422621435055, "grad_norm": 0.3468119204044342, "learning_rate": 6.3945932232498606e-06, "loss": 0.0088, "step": 111090 }, { "epoch": 0.9381267019906694, "grad_norm": 0.397478848695755, "learning_rate": 6.3938855695978345e-06, "loss": 0.0173, "step": 111100 }, { "epoch": 0.9382111418378333, "grad_norm": 0.20500092208385468, "learning_rate": 6.393177885670899e-06, "loss": 0.0088, "step": 111110 }, { "epoch": 0.9382955816849972, "grad_norm": 0.44852039217948914, "learning_rate": 6.3924701714844254e-06, "loss": 0.007, "step": 111120 }, { "epoch": 0.938380021532161, "grad_norm": 0.29256394505500793, "learning_rate": 6.391762427053785e-06, "loss": 0.008, "step": 111130 }, { "epoch": 0.9384644613793249, "grad_norm": 0.46909621357917786, "learning_rate": 6.3910546523943505e-06, "loss": 0.0129, "step": 111140 }, { "epoch": 0.9385489012264888, "grad_norm": 0.2543902099132538, "learning_rate": 6.3903468475214935e-06, "loss": 0.0063, "step": 111150 }, { "epoch": 0.9386333410736527, "grad_norm": 0.11676254123449326, "learning_rate": 6.389639012450587e-06, "loss": 0.0066, "step": 111160 }, { "epoch": 0.9387177809208165, "grad_norm": 0.3157159686088562, "learning_rate": 6.388931147197006e-06, "loss": 0.007, "step": 111170 }, { "epoch": 0.9388022207679804, "grad_norm": 0.22571241855621338, "learning_rate": 6.388223251776127e-06, "loss": 0.0091, "step": 111180 }, { "epoch": 0.9388866606151443, "grad_norm": 0.7606834173202515, "learning_rate": 6.387515326203323e-06, "loss": 0.0242, "step": 111190 }, { "epoch": 0.9389711004623081, "grad_norm": 0.5025054812431335, "learning_rate": 6.386807370493969e-06, "loss": 0.0061, "step": 111200 }, { "epoch": 0.939055540309472, "grad_norm": 0.3362453877925873, "learning_rate": 6.386099384663444e-06, "loss": 0.0089, "step": 111210 }, { "epoch": 0.9391399801566359, "grad_norm": 0.15927745401859283, "learning_rate": 6.385391368727124e-06, "loss": 0.0048, "step": 111220 }, { "epoch": 0.9392244200037998, "grad_norm": 0.23336736857891083, "learning_rate": 6.384683322700388e-06, "loss": 0.0102, "step": 111230 }, { "epoch": 0.9393088598509637, "grad_norm": 0.3327815532684326, "learning_rate": 6.383975246598613e-06, "loss": 0.0115, "step": 111240 }, { "epoch": 0.9393932996981276, "grad_norm": 0.22699859738349915, "learning_rate": 6.383267140437179e-06, "loss": 0.0089, "step": 111250 }, { "epoch": 0.9394777395452915, "grad_norm": 0.3006415069103241, "learning_rate": 6.3825590042314675e-06, "loss": 0.0129, "step": 111260 }, { "epoch": 0.9395621793924553, "grad_norm": 0.10115300118923187, "learning_rate": 6.381850837996858e-06, "loss": 0.0085, "step": 111270 }, { "epoch": 0.9396466192396192, "grad_norm": 0.1886940747499466, "learning_rate": 6.38114264174873e-06, "loss": 0.0179, "step": 111280 }, { "epoch": 0.939731059086783, "grad_norm": 0.5316951870918274, "learning_rate": 6.3804344155024675e-06, "loss": 0.0125, "step": 111290 }, { "epoch": 0.9398154989339469, "grad_norm": 0.278056800365448, "learning_rate": 6.3797261592734525e-06, "loss": 0.0132, "step": 111300 }, { "epoch": 0.9398999387811108, "grad_norm": 0.2950422167778015, "learning_rate": 6.379017873077069e-06, "loss": 0.0087, "step": 111310 }, { "epoch": 0.9399843786282747, "grad_norm": 1.7585442066192627, "learning_rate": 6.378309556928699e-06, "loss": 0.0117, "step": 111320 }, { "epoch": 0.9400688184754386, "grad_norm": 0.2596794068813324, "learning_rate": 6.377601210843727e-06, "loss": 0.0139, "step": 111330 }, { "epoch": 0.9401532583226024, "grad_norm": 0.3858708143234253, "learning_rate": 6.376892834837539e-06, "loss": 0.0055, "step": 111340 }, { "epoch": 0.9402376981697663, "grad_norm": 0.1523916870355606, "learning_rate": 6.376184428925522e-06, "loss": 0.0071, "step": 111350 }, { "epoch": 0.9403221380169302, "grad_norm": 0.11457114666700363, "learning_rate": 6.375475993123061e-06, "loss": 0.0093, "step": 111360 }, { "epoch": 0.9404065778640941, "grad_norm": 0.3366428017616272, "learning_rate": 6.374767527445543e-06, "loss": 0.0102, "step": 111370 }, { "epoch": 0.940491017711258, "grad_norm": 0.21563391387462616, "learning_rate": 6.374059031908355e-06, "loss": 0.0103, "step": 111380 }, { "epoch": 0.9405754575584219, "grad_norm": 0.5371700525283813, "learning_rate": 6.373350506526889e-06, "loss": 0.0092, "step": 111390 }, { "epoch": 0.9406598974055856, "grad_norm": 0.8638622760772705, "learning_rate": 6.3726419513165294e-06, "loss": 0.0189, "step": 111400 }, { "epoch": 0.9407443372527495, "grad_norm": 0.41921266913414, "learning_rate": 6.371933366292668e-06, "loss": 0.0157, "step": 111410 }, { "epoch": 0.9408287770999134, "grad_norm": 0.27700453996658325, "learning_rate": 6.371224751470696e-06, "loss": 0.0072, "step": 111420 }, { "epoch": 0.9409132169470773, "grad_norm": 0.37064647674560547, "learning_rate": 6.370516106866002e-06, "loss": 0.0081, "step": 111430 }, { "epoch": 0.9409976567942412, "grad_norm": 0.3635176718235016, "learning_rate": 6.3698074324939806e-06, "loss": 0.0111, "step": 111440 }, { "epoch": 0.9410820966414051, "grad_norm": 0.20830270648002625, "learning_rate": 6.3690987283700214e-06, "loss": 0.0099, "step": 111450 }, { "epoch": 0.941166536488569, "grad_norm": 0.6294211745262146, "learning_rate": 6.368389994509518e-06, "loss": 0.0155, "step": 111460 }, { "epoch": 0.9412509763357328, "grad_norm": 0.20675070583820343, "learning_rate": 6.367681230927866e-06, "loss": 0.0118, "step": 111470 }, { "epoch": 0.9413354161828967, "grad_norm": 0.10739267617464066, "learning_rate": 6.366972437640456e-06, "loss": 0.0122, "step": 111480 }, { "epoch": 0.9414198560300606, "grad_norm": 0.37133023142814636, "learning_rate": 6.366263614662685e-06, "loss": 0.006, "step": 111490 }, { "epoch": 0.9415042958772245, "grad_norm": 0.24372407793998718, "learning_rate": 6.365554762009949e-06, "loss": 0.0083, "step": 111500 }, { "epoch": 0.9415887357243884, "grad_norm": 0.1166602224111557, "learning_rate": 6.364845879697644e-06, "loss": 0.0084, "step": 111510 }, { "epoch": 0.9416731755715522, "grad_norm": 0.6047053337097168, "learning_rate": 6.364136967741165e-06, "loss": 0.0125, "step": 111520 }, { "epoch": 0.941757615418716, "grad_norm": 0.2518768906593323, "learning_rate": 6.363428026155911e-06, "loss": 0.0112, "step": 111530 }, { "epoch": 0.9418420552658799, "grad_norm": 0.5716729760169983, "learning_rate": 6.36271905495728e-06, "loss": 0.0123, "step": 111540 }, { "epoch": 0.9419264951130438, "grad_norm": 0.20471985638141632, "learning_rate": 6.36201005416067e-06, "loss": 0.01, "step": 111550 }, { "epoch": 0.9420109349602077, "grad_norm": 0.39907658100128174, "learning_rate": 6.361301023781482e-06, "loss": 0.013, "step": 111560 }, { "epoch": 0.9420953748073716, "grad_norm": 0.37545958161354065, "learning_rate": 6.360591963835113e-06, "loss": 0.0126, "step": 111570 }, { "epoch": 0.9421798146545355, "grad_norm": 0.14137007296085358, "learning_rate": 6.359882874336966e-06, "loss": 0.0061, "step": 111580 }, { "epoch": 0.9422642545016994, "grad_norm": 0.25279131531715393, "learning_rate": 6.359173755302441e-06, "loss": 0.0111, "step": 111590 }, { "epoch": 0.9423486943488633, "grad_norm": 0.30300483107566833, "learning_rate": 6.3584646067469415e-06, "loss": 0.0116, "step": 111600 }, { "epoch": 0.9424331341960271, "grad_norm": 0.4890395700931549, "learning_rate": 6.357755428685869e-06, "loss": 0.0081, "step": 111610 }, { "epoch": 0.942517574043191, "grad_norm": 0.3338830769062042, "learning_rate": 6.357046221134626e-06, "loss": 0.0103, "step": 111620 }, { "epoch": 0.9426020138903548, "grad_norm": 0.09004339575767517, "learning_rate": 6.3563369841086175e-06, "loss": 0.0122, "step": 111630 }, { "epoch": 0.9426864537375187, "grad_norm": 0.3703949451446533, "learning_rate": 6.355627717623249e-06, "loss": 0.0127, "step": 111640 }, { "epoch": 0.9427708935846826, "grad_norm": 0.18323934078216553, "learning_rate": 6.354918421693922e-06, "loss": 0.0117, "step": 111650 }, { "epoch": 0.9428553334318465, "grad_norm": 0.4235832393169403, "learning_rate": 6.354209096336046e-06, "loss": 0.01, "step": 111660 }, { "epoch": 0.9429397732790104, "grad_norm": 0.27559253573417664, "learning_rate": 6.353499741565026e-06, "loss": 0.006, "step": 111670 }, { "epoch": 0.9430242131261742, "grad_norm": 0.7507948279380798, "learning_rate": 6.352790357396269e-06, "loss": 0.0151, "step": 111680 }, { "epoch": 0.9431086529733381, "grad_norm": 0.17548732459545135, "learning_rate": 6.352080943845181e-06, "loss": 0.0069, "step": 111690 }, { "epoch": 0.943193092820502, "grad_norm": 0.682759702205658, "learning_rate": 6.3513715009271735e-06, "loss": 0.0122, "step": 111700 }, { "epoch": 0.9432775326676659, "grad_norm": 0.5624315142631531, "learning_rate": 6.350662028657652e-06, "loss": 0.0116, "step": 111710 }, { "epoch": 0.9433619725148298, "grad_norm": 0.2302771508693695, "learning_rate": 6.349952527052031e-06, "loss": 0.0101, "step": 111720 }, { "epoch": 0.9434464123619937, "grad_norm": 0.6901260018348694, "learning_rate": 6.3492429961257164e-06, "loss": 0.0124, "step": 111730 }, { "epoch": 0.9435308522091576, "grad_norm": 0.2950296700000763, "learning_rate": 6.34853343589412e-06, "loss": 0.0097, "step": 111740 }, { "epoch": 0.9436152920563213, "grad_norm": 0.2374715954065323, "learning_rate": 6.347823846372655e-06, "loss": 0.0085, "step": 111750 }, { "epoch": 0.9436997319034852, "grad_norm": 0.26864778995513916, "learning_rate": 6.347114227576732e-06, "loss": 0.0147, "step": 111760 }, { "epoch": 0.9437841717506491, "grad_norm": 0.06590183824300766, "learning_rate": 6.3464045795217635e-06, "loss": 0.0041, "step": 111770 }, { "epoch": 0.943868611597813, "grad_norm": 0.07778077572584152, "learning_rate": 6.345694902223163e-06, "loss": 0.0047, "step": 111780 }, { "epoch": 0.9439530514449769, "grad_norm": 0.24289141595363617, "learning_rate": 6.344985195696345e-06, "loss": 0.0165, "step": 111790 }, { "epoch": 0.9440374912921408, "grad_norm": 0.3012888431549072, "learning_rate": 6.344275459956725e-06, "loss": 0.0084, "step": 111800 }, { "epoch": 0.9441219311393046, "grad_norm": 0.070210300385952, "learning_rate": 6.343565695019716e-06, "loss": 0.0056, "step": 111810 }, { "epoch": 0.9442063709864685, "grad_norm": 0.6172435283660889, "learning_rate": 6.342855900900736e-06, "loss": 0.0068, "step": 111820 }, { "epoch": 0.9442908108336324, "grad_norm": 0.47389155626296997, "learning_rate": 6.342146077615201e-06, "loss": 0.0137, "step": 111830 }, { "epoch": 0.9443752506807963, "grad_norm": 0.44409796595573425, "learning_rate": 6.34143622517853e-06, "loss": 0.0137, "step": 111840 }, { "epoch": 0.9444596905279602, "grad_norm": 0.3224235475063324, "learning_rate": 6.3407263436061364e-06, "loss": 0.0049, "step": 111850 }, { "epoch": 0.944544130375124, "grad_norm": 0.20350241661071777, "learning_rate": 6.340016432913443e-06, "loss": 0.0139, "step": 111860 }, { "epoch": 0.9446285702222879, "grad_norm": 1.0208889245986938, "learning_rate": 6.339306493115867e-06, "loss": 0.0101, "step": 111870 }, { "epoch": 0.9447130100694517, "grad_norm": 0.19735123217105865, "learning_rate": 6.338596524228828e-06, "loss": 0.0127, "step": 111880 }, { "epoch": 0.9447974499166156, "grad_norm": 0.47519469261169434, "learning_rate": 6.337886526267748e-06, "loss": 0.0092, "step": 111890 }, { "epoch": 0.9448818897637795, "grad_norm": 0.2892921566963196, "learning_rate": 6.337176499248046e-06, "loss": 0.0053, "step": 111900 }, { "epoch": 0.9449663296109434, "grad_norm": 0.1794624924659729, "learning_rate": 6.336466443185143e-06, "loss": 0.0038, "step": 111910 }, { "epoch": 0.9450507694581073, "grad_norm": 0.2394653558731079, "learning_rate": 6.335756358094464e-06, "loss": 0.0082, "step": 111920 }, { "epoch": 0.9451352093052712, "grad_norm": 0.249092236161232, "learning_rate": 6.335046243991431e-06, "loss": 0.0075, "step": 111930 }, { "epoch": 0.9452196491524351, "grad_norm": 0.9245098233222961, "learning_rate": 6.334336100891467e-06, "loss": 0.0101, "step": 111940 }, { "epoch": 0.945304088999599, "grad_norm": 0.4282682240009308, "learning_rate": 6.333625928809996e-06, "loss": 0.0074, "step": 111950 }, { "epoch": 0.9453885288467628, "grad_norm": 0.2139202356338501, "learning_rate": 6.332915727762444e-06, "loss": 0.0051, "step": 111960 }, { "epoch": 0.9454729686939267, "grad_norm": 0.4172201454639435, "learning_rate": 6.332205497764235e-06, "loss": 0.0088, "step": 111970 }, { "epoch": 0.9455574085410905, "grad_norm": 0.3450571894645691, "learning_rate": 6.331495238830795e-06, "loss": 0.0081, "step": 111980 }, { "epoch": 0.9456418483882544, "grad_norm": 0.265521764755249, "learning_rate": 6.330784950977551e-06, "loss": 0.0098, "step": 111990 }, { "epoch": 0.9457262882354183, "grad_norm": 0.049026042222976685, "learning_rate": 6.330074634219931e-06, "loss": 0.0122, "step": 112000 }, { "epoch": 0.9458107280825822, "grad_norm": 0.11332693696022034, "learning_rate": 6.329364288573364e-06, "loss": 0.0087, "step": 112010 }, { "epoch": 0.945895167929746, "grad_norm": 0.3159015476703644, "learning_rate": 6.328653914053275e-06, "loss": 0.009, "step": 112020 }, { "epoch": 0.9459796077769099, "grad_norm": 0.2642998695373535, "learning_rate": 6.327943510675097e-06, "loss": 0.0167, "step": 112030 }, { "epoch": 0.9460640476240738, "grad_norm": 0.4371708929538727, "learning_rate": 6.327233078454258e-06, "loss": 0.01, "step": 112040 }, { "epoch": 0.9461484874712377, "grad_norm": 0.42288607358932495, "learning_rate": 6.326522617406187e-06, "loss": 0.0183, "step": 112050 }, { "epoch": 0.9462329273184016, "grad_norm": 0.12599939107894897, "learning_rate": 6.325812127546317e-06, "loss": 0.0065, "step": 112060 }, { "epoch": 0.9463173671655655, "grad_norm": 0.15141481161117554, "learning_rate": 6.325101608890079e-06, "loss": 0.0082, "step": 112070 }, { "epoch": 0.9464018070127294, "grad_norm": 0.17822052538394928, "learning_rate": 6.324391061452907e-06, "loss": 0.0115, "step": 112080 }, { "epoch": 0.9464862468598931, "grad_norm": 0.1494634449481964, "learning_rate": 6.323680485250231e-06, "loss": 0.005, "step": 112090 }, { "epoch": 0.946570686707057, "grad_norm": 0.05848546698689461, "learning_rate": 6.322969880297486e-06, "loss": 0.0114, "step": 112100 }, { "epoch": 0.9466551265542209, "grad_norm": 0.3230695128440857, "learning_rate": 6.322259246610107e-06, "loss": 0.0087, "step": 112110 }, { "epoch": 0.9467395664013848, "grad_norm": 0.4526999294757843, "learning_rate": 6.321548584203528e-06, "loss": 0.0135, "step": 112120 }, { "epoch": 0.9468240062485487, "grad_norm": 0.5278488993644714, "learning_rate": 6.320837893093186e-06, "loss": 0.0137, "step": 112130 }, { "epoch": 0.9469084460957126, "grad_norm": 0.4037075638771057, "learning_rate": 6.320127173294514e-06, "loss": 0.0098, "step": 112140 }, { "epoch": 0.9469928859428765, "grad_norm": 0.03677358105778694, "learning_rate": 6.31941642482295e-06, "loss": 0.0083, "step": 112150 }, { "epoch": 0.9470773257900403, "grad_norm": 0.14260494709014893, "learning_rate": 6.318705647693933e-06, "loss": 0.0071, "step": 112160 }, { "epoch": 0.9471617656372042, "grad_norm": 0.5508408546447754, "learning_rate": 6.317994841922899e-06, "loss": 0.0125, "step": 112170 }, { "epoch": 0.9472462054843681, "grad_norm": 0.445211797952652, "learning_rate": 6.317284007525286e-06, "loss": 0.0074, "step": 112180 }, { "epoch": 0.947330645331532, "grad_norm": 0.3116287291049957, "learning_rate": 6.316573144516536e-06, "loss": 0.0065, "step": 112190 }, { "epoch": 0.9474150851786959, "grad_norm": 0.12219472974538803, "learning_rate": 6.315862252912086e-06, "loss": 0.015, "step": 112200 }, { "epoch": 0.9474995250258597, "grad_norm": 0.02638622187077999, "learning_rate": 6.315151332727379e-06, "loss": 0.0113, "step": 112210 }, { "epoch": 0.9475839648730235, "grad_norm": 0.23347453773021698, "learning_rate": 6.314440383977853e-06, "loss": 0.0084, "step": 112220 }, { "epoch": 0.9476684047201874, "grad_norm": 0.7195373177528381, "learning_rate": 6.31372940667895e-06, "loss": 0.0115, "step": 112230 }, { "epoch": 0.9477528445673513, "grad_norm": 0.35190191864967346, "learning_rate": 6.313018400846116e-06, "loss": 0.0136, "step": 112240 }, { "epoch": 0.9478372844145152, "grad_norm": 0.2436041533946991, "learning_rate": 6.312307366494792e-06, "loss": 0.009, "step": 112250 }, { "epoch": 0.9479217242616791, "grad_norm": 0.4658568501472473, "learning_rate": 6.311596303640419e-06, "loss": 0.0141, "step": 112260 }, { "epoch": 0.948006164108843, "grad_norm": 0.3539372980594635, "learning_rate": 6.310885212298443e-06, "loss": 0.0144, "step": 112270 }, { "epoch": 0.9480906039560069, "grad_norm": 0.32813552021980286, "learning_rate": 6.3101740924843095e-06, "loss": 0.007, "step": 112280 }, { "epoch": 0.9481750438031707, "grad_norm": 0.17190726101398468, "learning_rate": 6.309462944213464e-06, "loss": 0.0083, "step": 112290 }, { "epoch": 0.9482594836503346, "grad_norm": 0.56428462266922, "learning_rate": 6.30875176750135e-06, "loss": 0.0109, "step": 112300 }, { "epoch": 0.9483439234974985, "grad_norm": 0.4729458689689636, "learning_rate": 6.308040562363415e-06, "loss": 0.01, "step": 112310 }, { "epoch": 0.9484283633446623, "grad_norm": 0.23297442495822906, "learning_rate": 6.3073293288151085e-06, "loss": 0.0119, "step": 112320 }, { "epoch": 0.9485128031918262, "grad_norm": 0.2884732484817505, "learning_rate": 6.306618066871878e-06, "loss": 0.0089, "step": 112330 }, { "epoch": 0.9485972430389901, "grad_norm": 0.2908540964126587, "learning_rate": 6.305906776549169e-06, "loss": 0.0074, "step": 112340 }, { "epoch": 0.948681682886154, "grad_norm": 0.3524886667728424, "learning_rate": 6.305195457862433e-06, "loss": 0.0102, "step": 112350 }, { "epoch": 0.9487661227333178, "grad_norm": 0.11322536319494247, "learning_rate": 6.304484110827121e-06, "loss": 0.0066, "step": 112360 }, { "epoch": 0.9488505625804817, "grad_norm": 0.7230663299560547, "learning_rate": 6.30377273545868e-06, "loss": 0.0089, "step": 112370 }, { "epoch": 0.9489350024276456, "grad_norm": 0.3061597943305969, "learning_rate": 6.303061331772562e-06, "loss": 0.0091, "step": 112380 }, { "epoch": 0.9490194422748095, "grad_norm": 0.041429366916418076, "learning_rate": 6.302349899784218e-06, "loss": 0.0069, "step": 112390 }, { "epoch": 0.9491038821219734, "grad_norm": 0.36551570892333984, "learning_rate": 6.301638439509102e-06, "loss": 0.0043, "step": 112400 }, { "epoch": 0.9491883219691373, "grad_norm": 0.72586590051651, "learning_rate": 6.3009269509626655e-06, "loss": 0.0133, "step": 112410 }, { "epoch": 0.9492727618163012, "grad_norm": 1.1110098361968994, "learning_rate": 6.300215434160363e-06, "loss": 0.0119, "step": 112420 }, { "epoch": 0.949357201663465, "grad_norm": 0.11120111495256424, "learning_rate": 6.2995038891176465e-06, "loss": 0.0073, "step": 112430 }, { "epoch": 0.9494416415106288, "grad_norm": 0.014581817202270031, "learning_rate": 6.298792315849973e-06, "loss": 0.0114, "step": 112440 }, { "epoch": 0.9495260813577927, "grad_norm": 0.35226741433143616, "learning_rate": 6.298080714372796e-06, "loss": 0.0111, "step": 112450 }, { "epoch": 0.9496105212049566, "grad_norm": 0.36769139766693115, "learning_rate": 6.297369084701572e-06, "loss": 0.0084, "step": 112460 }, { "epoch": 0.9496949610521205, "grad_norm": 0.8776289224624634, "learning_rate": 6.2966574268517566e-06, "loss": 0.0112, "step": 112470 }, { "epoch": 0.9497794008992844, "grad_norm": 0.3858252763748169, "learning_rate": 6.29594574083881e-06, "loss": 0.0079, "step": 112480 }, { "epoch": 0.9498638407464483, "grad_norm": 0.3727954626083374, "learning_rate": 6.295234026678185e-06, "loss": 0.0075, "step": 112490 }, { "epoch": 0.9499482805936121, "grad_norm": 0.20724740624427795, "learning_rate": 6.294522284385344e-06, "loss": 0.0152, "step": 112500 }, { "epoch": 0.950032720440776, "grad_norm": 0.16634006798267365, "learning_rate": 6.293810513975742e-06, "loss": 0.0079, "step": 112510 }, { "epoch": 0.9501171602879399, "grad_norm": 0.4390336275100708, "learning_rate": 6.293098715464843e-06, "loss": 0.0132, "step": 112520 }, { "epoch": 0.9502016001351038, "grad_norm": 0.38880738615989685, "learning_rate": 6.292386888868105e-06, "loss": 0.0162, "step": 112530 }, { "epoch": 0.9502860399822677, "grad_norm": 0.20018525421619415, "learning_rate": 6.291675034200987e-06, "loss": 0.0111, "step": 112540 }, { "epoch": 0.9503704798294315, "grad_norm": 0.16634684801101685, "learning_rate": 6.2909631514789525e-06, "loss": 0.0077, "step": 112550 }, { "epoch": 0.9504549196765953, "grad_norm": 0.24468021094799042, "learning_rate": 6.290251240717462e-06, "loss": 0.0089, "step": 112560 }, { "epoch": 0.9505393595237592, "grad_norm": 0.2413942515850067, "learning_rate": 6.289539301931981e-06, "loss": 0.0121, "step": 112570 }, { "epoch": 0.9506237993709231, "grad_norm": 0.15626747906208038, "learning_rate": 6.288827335137969e-06, "loss": 0.007, "step": 112580 }, { "epoch": 0.950708239218087, "grad_norm": 0.5627042055130005, "learning_rate": 6.288115340350892e-06, "loss": 0.0072, "step": 112590 }, { "epoch": 0.9507926790652509, "grad_norm": 0.16007588803768158, "learning_rate": 6.287403317586214e-06, "loss": 0.0088, "step": 112600 }, { "epoch": 0.9508771189124148, "grad_norm": 0.2541699707508087, "learning_rate": 6.286691266859399e-06, "loss": 0.0092, "step": 112610 }, { "epoch": 0.9509615587595787, "grad_norm": 0.3157166540622711, "learning_rate": 6.285979188185915e-06, "loss": 0.009, "step": 112620 }, { "epoch": 0.9510459986067425, "grad_norm": 0.11321750283241272, "learning_rate": 6.285267081581226e-06, "loss": 0.0116, "step": 112630 }, { "epoch": 0.9511304384539064, "grad_norm": 0.14757993817329407, "learning_rate": 6.284554947060797e-06, "loss": 0.0095, "step": 112640 }, { "epoch": 0.9512148783010703, "grad_norm": 0.11840119957923889, "learning_rate": 6.2838427846401e-06, "loss": 0.0149, "step": 112650 }, { "epoch": 0.9512993181482341, "grad_norm": 0.33120617270469666, "learning_rate": 6.283130594334601e-06, "loss": 0.0044, "step": 112660 }, { "epoch": 0.951383757995398, "grad_norm": 0.19397863745689392, "learning_rate": 6.2824183761597664e-06, "loss": 0.0117, "step": 112670 }, { "epoch": 0.9514681978425619, "grad_norm": 0.3906763195991516, "learning_rate": 6.2817061301310685e-06, "loss": 0.0085, "step": 112680 }, { "epoch": 0.9515526376897258, "grad_norm": 0.715354859828949, "learning_rate": 6.280993856263978e-06, "loss": 0.0172, "step": 112690 }, { "epoch": 0.9516370775368896, "grad_norm": 0.17338120937347412, "learning_rate": 6.280281554573962e-06, "loss": 0.0118, "step": 112700 }, { "epoch": 0.9517215173840535, "grad_norm": 0.21602028608322144, "learning_rate": 6.279569225076492e-06, "loss": 0.007, "step": 112710 }, { "epoch": 0.9518059572312174, "grad_norm": 0.14905945956707, "learning_rate": 6.278856867787042e-06, "loss": 0.009, "step": 112720 }, { "epoch": 0.9518903970783813, "grad_norm": 0.40584155917167664, "learning_rate": 6.278144482721083e-06, "loss": 0.0121, "step": 112730 }, { "epoch": 0.9519748369255452, "grad_norm": 0.4185752272605896, "learning_rate": 6.277432069894087e-06, "loss": 0.0071, "step": 112740 }, { "epoch": 0.9520592767727091, "grad_norm": 0.19552361965179443, "learning_rate": 6.276719629321528e-06, "loss": 0.0094, "step": 112750 }, { "epoch": 0.952143716619873, "grad_norm": 0.37744617462158203, "learning_rate": 6.27600716101888e-06, "loss": 0.0164, "step": 112760 }, { "epoch": 0.9522281564670368, "grad_norm": 0.26061031222343445, "learning_rate": 6.275294665001619e-06, "loss": 0.0071, "step": 112770 }, { "epoch": 0.9523125963142006, "grad_norm": 0.7165950536727905, "learning_rate": 6.274582141285219e-06, "loss": 0.0124, "step": 112780 }, { "epoch": 0.9523970361613645, "grad_norm": 0.06440004706382751, "learning_rate": 6.273869589885155e-06, "loss": 0.0086, "step": 112790 }, { "epoch": 0.9524814760085284, "grad_norm": 0.7601131796836853, "learning_rate": 6.273157010816906e-06, "loss": 0.0086, "step": 112800 }, { "epoch": 0.9525659158556923, "grad_norm": 1.224472999572754, "learning_rate": 6.272444404095948e-06, "loss": 0.0151, "step": 112810 }, { "epoch": 0.9526503557028562, "grad_norm": 0.6271811127662659, "learning_rate": 6.271731769737756e-06, "loss": 0.0099, "step": 112820 }, { "epoch": 0.95273479555002, "grad_norm": 0.20671938359737396, "learning_rate": 6.271019107757812e-06, "loss": 0.0143, "step": 112830 }, { "epoch": 0.9528192353971839, "grad_norm": 0.4873950779438019, "learning_rate": 6.270306418171595e-06, "loss": 0.0165, "step": 112840 }, { "epoch": 0.9529036752443478, "grad_norm": 0.2945302128791809, "learning_rate": 6.269593700994581e-06, "loss": 0.013, "step": 112850 }, { "epoch": 0.9529881150915117, "grad_norm": 0.24593941867351532, "learning_rate": 6.268880956242252e-06, "loss": 0.0112, "step": 112860 }, { "epoch": 0.9530725549386756, "grad_norm": 0.22556331753730774, "learning_rate": 6.268168183930088e-06, "loss": 0.0066, "step": 112870 }, { "epoch": 0.9531569947858395, "grad_norm": 0.2038327157497406, "learning_rate": 6.267455384073572e-06, "loss": 0.0068, "step": 112880 }, { "epoch": 0.9532414346330033, "grad_norm": 0.22073571383953094, "learning_rate": 6.266742556688185e-06, "loss": 0.0103, "step": 112890 }, { "epoch": 0.9533258744801671, "grad_norm": 0.5912730097770691, "learning_rate": 6.266029701789409e-06, "loss": 0.0091, "step": 112900 }, { "epoch": 0.953410314327331, "grad_norm": 0.30725163221359253, "learning_rate": 6.265316819392726e-06, "loss": 0.0147, "step": 112910 }, { "epoch": 0.9534947541744949, "grad_norm": 0.18679635226726532, "learning_rate": 6.264603909513623e-06, "loss": 0.0037, "step": 112920 }, { "epoch": 0.9535791940216588, "grad_norm": 0.27254414558410645, "learning_rate": 6.26389097216758e-06, "loss": 0.0075, "step": 112930 }, { "epoch": 0.9536636338688227, "grad_norm": 0.47643905878067017, "learning_rate": 6.2631780073700865e-06, "loss": 0.0049, "step": 112940 }, { "epoch": 0.9537480737159866, "grad_norm": 0.16455647349357605, "learning_rate": 6.262465015136624e-06, "loss": 0.0072, "step": 112950 }, { "epoch": 0.9538325135631505, "grad_norm": 0.5308939218521118, "learning_rate": 6.261751995482681e-06, "loss": 0.0078, "step": 112960 }, { "epoch": 0.9539169534103143, "grad_norm": 0.4072904884815216, "learning_rate": 6.261038948423744e-06, "loss": 0.0103, "step": 112970 }, { "epoch": 0.9540013932574782, "grad_norm": 0.11617083102464676, "learning_rate": 6.260325873975297e-06, "loss": 0.0041, "step": 112980 }, { "epoch": 0.9540858331046421, "grad_norm": 0.6266131401062012, "learning_rate": 6.259612772152832e-06, "loss": 0.0101, "step": 112990 }, { "epoch": 0.954170272951806, "grad_norm": 0.3509080708026886, "learning_rate": 6.2588996429718345e-06, "loss": 0.008, "step": 113000 }, { "epoch": 0.9542547127989698, "grad_norm": 0.37983548641204834, "learning_rate": 6.258186486447795e-06, "loss": 0.0086, "step": 113010 }, { "epoch": 0.9543391526461337, "grad_norm": 0.444029837846756, "learning_rate": 6.257473302596204e-06, "loss": 0.0129, "step": 113020 }, { "epoch": 0.9544235924932976, "grad_norm": 0.07100289314985275, "learning_rate": 6.25676009143255e-06, "loss": 0.0078, "step": 113030 }, { "epoch": 0.9545080323404614, "grad_norm": 0.4001387059688568, "learning_rate": 6.256046852972323e-06, "loss": 0.016, "step": 113040 }, { "epoch": 0.9545924721876253, "grad_norm": 0.5910243988037109, "learning_rate": 6.255333587231016e-06, "loss": 0.0132, "step": 113050 }, { "epoch": 0.9546769120347892, "grad_norm": 0.40482041239738464, "learning_rate": 6.254620294224123e-06, "loss": 0.0071, "step": 113060 }, { "epoch": 0.9547613518819531, "grad_norm": 0.39167311787605286, "learning_rate": 6.253906973967134e-06, "loss": 0.0126, "step": 113070 }, { "epoch": 0.954845791729117, "grad_norm": 0.2841060757637024, "learning_rate": 6.253193626475541e-06, "loss": 0.0135, "step": 113080 }, { "epoch": 0.9549302315762809, "grad_norm": 0.2055179923772812, "learning_rate": 6.252480251764839e-06, "loss": 0.0078, "step": 113090 }, { "epoch": 0.9550146714234448, "grad_norm": 0.22365207970142365, "learning_rate": 6.251766849850525e-06, "loss": 0.012, "step": 113100 }, { "epoch": 0.9550991112706086, "grad_norm": 0.27330341935157776, "learning_rate": 6.251053420748088e-06, "loss": 0.0093, "step": 113110 }, { "epoch": 0.9551835511177724, "grad_norm": 0.1005505919456482, "learning_rate": 6.250339964473029e-06, "loss": 0.0087, "step": 113120 }, { "epoch": 0.9552679909649363, "grad_norm": 0.15671434998512268, "learning_rate": 6.2496264810408415e-06, "loss": 0.0121, "step": 113130 }, { "epoch": 0.9553524308121002, "grad_norm": 0.2968321442604065, "learning_rate": 6.248912970467023e-06, "loss": 0.0085, "step": 113140 }, { "epoch": 0.9554368706592641, "grad_norm": 0.2347922921180725, "learning_rate": 6.2481994327670716e-06, "loss": 0.0155, "step": 113150 }, { "epoch": 0.955521310506428, "grad_norm": 0.006673155818134546, "learning_rate": 6.247485867956484e-06, "loss": 0.0097, "step": 113160 }, { "epoch": 0.9556057503535919, "grad_norm": 0.17533451318740845, "learning_rate": 6.246772276050759e-06, "loss": 0.0069, "step": 113170 }, { "epoch": 0.9556901902007557, "grad_norm": 0.9590039253234863, "learning_rate": 6.246058657065396e-06, "loss": 0.0185, "step": 113180 }, { "epoch": 0.9557746300479196, "grad_norm": 0.16716210544109344, "learning_rate": 6.245345011015894e-06, "loss": 0.0107, "step": 113190 }, { "epoch": 0.9558590698950835, "grad_norm": 0.38496071100234985, "learning_rate": 6.2446313379177525e-06, "loss": 0.0063, "step": 113200 }, { "epoch": 0.9559435097422474, "grad_norm": 0.4244680106639862, "learning_rate": 6.243917637786474e-06, "loss": 0.0088, "step": 113210 }, { "epoch": 0.9560279495894113, "grad_norm": 0.15367290377616882, "learning_rate": 6.24320391063756e-06, "loss": 0.005, "step": 113220 }, { "epoch": 0.9561123894365752, "grad_norm": 0.08190514892339706, "learning_rate": 6.242490156486511e-06, "loss": 0.0058, "step": 113230 }, { "epoch": 0.956196829283739, "grad_norm": 0.4477425217628479, "learning_rate": 6.2417763753488315e-06, "loss": 0.0134, "step": 113240 }, { "epoch": 0.9562812691309028, "grad_norm": 0.372710645198822, "learning_rate": 6.241062567240022e-06, "loss": 0.005, "step": 113250 }, { "epoch": 0.9563657089780667, "grad_norm": 0.4355171322822571, "learning_rate": 6.2403487321755895e-06, "loss": 0.0108, "step": 113260 }, { "epoch": 0.9564501488252306, "grad_norm": 0.35220572352409363, "learning_rate": 6.2396348701710354e-06, "loss": 0.0095, "step": 113270 }, { "epoch": 0.9565345886723945, "grad_norm": 0.26205822825431824, "learning_rate": 6.238920981241866e-06, "loss": 0.0076, "step": 113280 }, { "epoch": 0.9566190285195584, "grad_norm": 0.186529740691185, "learning_rate": 6.238207065403586e-06, "loss": 0.0113, "step": 113290 }, { "epoch": 0.9567034683667223, "grad_norm": 0.20734940469264984, "learning_rate": 6.237493122671705e-06, "loss": 0.0087, "step": 113300 }, { "epoch": 0.9567879082138862, "grad_norm": 0.26098406314849854, "learning_rate": 6.236779153061726e-06, "loss": 0.0118, "step": 113310 }, { "epoch": 0.95687234806105, "grad_norm": 0.2867819368839264, "learning_rate": 6.236065156589156e-06, "loss": 0.0082, "step": 113320 }, { "epoch": 0.9569567879082139, "grad_norm": 0.3777827024459839, "learning_rate": 6.235351133269505e-06, "loss": 0.0117, "step": 113330 }, { "epoch": 0.9570412277553778, "grad_norm": 0.5078626871109009, "learning_rate": 6.23463708311828e-06, "loss": 0.0147, "step": 113340 }, { "epoch": 0.9571256676025416, "grad_norm": 0.6872158646583557, "learning_rate": 6.233923006150992e-06, "loss": 0.0089, "step": 113350 }, { "epoch": 0.9572101074497055, "grad_norm": 0.34582746028900146, "learning_rate": 6.233208902383148e-06, "loss": 0.0054, "step": 113360 }, { "epoch": 0.9572945472968694, "grad_norm": 0.38051140308380127, "learning_rate": 6.232494771830259e-06, "loss": 0.0113, "step": 113370 }, { "epoch": 0.9573789871440332, "grad_norm": 0.40118932723999023, "learning_rate": 6.231780614507836e-06, "loss": 0.007, "step": 113380 }, { "epoch": 0.9574634269911971, "grad_norm": 0.23843331634998322, "learning_rate": 6.231066430431392e-06, "loss": 0.0134, "step": 113390 }, { "epoch": 0.957547866838361, "grad_norm": 0.5333216190338135, "learning_rate": 6.230352219616436e-06, "loss": 0.0116, "step": 113400 }, { "epoch": 0.9576323066855249, "grad_norm": 0.6558742523193359, "learning_rate": 6.229637982078481e-06, "loss": 0.0142, "step": 113410 }, { "epoch": 0.9577167465326888, "grad_norm": 0.4281332194805145, "learning_rate": 6.228923717833043e-06, "loss": 0.01, "step": 113420 }, { "epoch": 0.9578011863798527, "grad_norm": 0.3079514503479004, "learning_rate": 6.228209426895634e-06, "loss": 0.0147, "step": 113430 }, { "epoch": 0.9578856262270166, "grad_norm": 0.37582316994667053, "learning_rate": 6.2274951092817655e-06, "loss": 0.0135, "step": 113440 }, { "epoch": 0.9579700660741804, "grad_norm": 0.5150070190429688, "learning_rate": 6.2267807650069565e-06, "loss": 0.009, "step": 113450 }, { "epoch": 0.9580545059213443, "grad_norm": 0.21375000476837158, "learning_rate": 6.22606639408672e-06, "loss": 0.0108, "step": 113460 }, { "epoch": 0.9581389457685081, "grad_norm": 0.06598968803882599, "learning_rate": 6.225351996536574e-06, "loss": 0.0068, "step": 113470 }, { "epoch": 0.958223385615672, "grad_norm": 0.30909353494644165, "learning_rate": 6.224637572372032e-06, "loss": 0.0115, "step": 113480 }, { "epoch": 0.9583078254628359, "grad_norm": 0.020425641909241676, "learning_rate": 6.223923121608614e-06, "loss": 0.0121, "step": 113490 }, { "epoch": 0.9583922653099998, "grad_norm": 0.18721096217632294, "learning_rate": 6.223208644261835e-06, "loss": 0.0046, "step": 113500 }, { "epoch": 0.9584767051571637, "grad_norm": 0.08109983056783676, "learning_rate": 6.222494140347217e-06, "loss": 0.0121, "step": 113510 }, { "epoch": 0.9585611450043275, "grad_norm": 0.11513548344373703, "learning_rate": 6.221779609880276e-06, "loss": 0.0072, "step": 113520 }, { "epoch": 0.9586455848514914, "grad_norm": 0.29243263602256775, "learning_rate": 6.221065052876531e-06, "loss": 0.0133, "step": 113530 }, { "epoch": 0.9587300246986553, "grad_norm": 1.086571455001831, "learning_rate": 6.220350469351505e-06, "loss": 0.0106, "step": 113540 }, { "epoch": 0.9588144645458192, "grad_norm": 0.3651580214500427, "learning_rate": 6.2196358593207175e-06, "loss": 0.0121, "step": 113550 }, { "epoch": 0.9588989043929831, "grad_norm": 0.09445548802614212, "learning_rate": 6.218921222799687e-06, "loss": 0.0072, "step": 113560 }, { "epoch": 0.958983344240147, "grad_norm": 0.09437862783670425, "learning_rate": 6.218206559803939e-06, "loss": 0.0056, "step": 113570 }, { "epoch": 0.9590677840873107, "grad_norm": 0.7774479389190674, "learning_rate": 6.2174918703489925e-06, "loss": 0.0069, "step": 113580 }, { "epoch": 0.9591522239344746, "grad_norm": 0.6260595917701721, "learning_rate": 6.2167771544503745e-06, "loss": 0.0112, "step": 113590 }, { "epoch": 0.9592366637816385, "grad_norm": 1.0696107149124146, "learning_rate": 6.216062412123603e-06, "loss": 0.0122, "step": 113600 }, { "epoch": 0.9593211036288024, "grad_norm": 0.512137234210968, "learning_rate": 6.215347643384206e-06, "loss": 0.0118, "step": 113610 }, { "epoch": 0.9594055434759663, "grad_norm": 0.8545393347740173, "learning_rate": 6.214632848247708e-06, "loss": 0.0085, "step": 113620 }, { "epoch": 0.9594899833231302, "grad_norm": 0.3394252061843872, "learning_rate": 6.213918026729634e-06, "loss": 0.0071, "step": 113630 }, { "epoch": 0.9595744231702941, "grad_norm": 0.3439387381076813, "learning_rate": 6.213203178845509e-06, "loss": 0.0085, "step": 113640 }, { "epoch": 0.959658863017458, "grad_norm": 0.1251729130744934, "learning_rate": 6.2124883046108575e-06, "loss": 0.0058, "step": 113650 }, { "epoch": 0.9597433028646218, "grad_norm": 0.24976298213005066, "learning_rate": 6.21177340404121e-06, "loss": 0.009, "step": 113660 }, { "epoch": 0.9598277427117857, "grad_norm": 0.7924458980560303, "learning_rate": 6.211058477152093e-06, "loss": 0.0073, "step": 113670 }, { "epoch": 0.9599121825589496, "grad_norm": 0.10930608212947845, "learning_rate": 6.2103435239590345e-06, "loss": 0.025, "step": 113680 }, { "epoch": 0.9599966224061135, "grad_norm": 0.35003766417503357, "learning_rate": 6.209628544477561e-06, "loss": 0.0068, "step": 113690 }, { "epoch": 0.9600810622532773, "grad_norm": 0.39419203996658325, "learning_rate": 6.208913538723203e-06, "loss": 0.0089, "step": 113700 }, { "epoch": 0.9601655021004412, "grad_norm": 0.29437050223350525, "learning_rate": 6.208198506711494e-06, "loss": 0.0103, "step": 113710 }, { "epoch": 0.960249941947605, "grad_norm": 0.3385402262210846, "learning_rate": 6.207483448457958e-06, "loss": 0.0087, "step": 113720 }, { "epoch": 0.9603343817947689, "grad_norm": 0.5402820110321045, "learning_rate": 6.206768363978129e-06, "loss": 0.0105, "step": 113730 }, { "epoch": 0.9604188216419328, "grad_norm": 0.21933919191360474, "learning_rate": 6.206053253287538e-06, "loss": 0.015, "step": 113740 }, { "epoch": 0.9605032614890967, "grad_norm": 0.2051818072795868, "learning_rate": 6.20533811640172e-06, "loss": 0.0065, "step": 113750 }, { "epoch": 0.9605877013362606, "grad_norm": 0.31644758582115173, "learning_rate": 6.204622953336203e-06, "loss": 0.0101, "step": 113760 }, { "epoch": 0.9606721411834245, "grad_norm": 0.2480495274066925, "learning_rate": 6.2039077641065235e-06, "loss": 0.0081, "step": 113770 }, { "epoch": 0.9607565810305884, "grad_norm": 0.3188290297985077, "learning_rate": 6.203192548728214e-06, "loss": 0.0155, "step": 113780 }, { "epoch": 0.9608410208777522, "grad_norm": 0.09273888170719147, "learning_rate": 6.2024773072168085e-06, "loss": 0.0125, "step": 113790 }, { "epoch": 0.9609254607249161, "grad_norm": 0.11237432807683945, "learning_rate": 6.201762039587843e-06, "loss": 0.0061, "step": 113800 }, { "epoch": 0.9610099005720799, "grad_norm": 0.19418591260910034, "learning_rate": 6.201046745856851e-06, "loss": 0.0179, "step": 113810 }, { "epoch": 0.9610943404192438, "grad_norm": 0.6768664717674255, "learning_rate": 6.20033142603937e-06, "loss": 0.0092, "step": 113820 }, { "epoch": 0.9611787802664077, "grad_norm": 0.10014566034078598, "learning_rate": 6.199616080150937e-06, "loss": 0.0085, "step": 113830 }, { "epoch": 0.9612632201135716, "grad_norm": 0.4624989926815033, "learning_rate": 6.198900708207088e-06, "loss": 0.0112, "step": 113840 }, { "epoch": 0.9613476599607355, "grad_norm": 0.24224011600017548, "learning_rate": 6.198185310223361e-06, "loss": 0.0058, "step": 113850 }, { "epoch": 0.9614320998078993, "grad_norm": 0.4944278597831726, "learning_rate": 6.197469886215296e-06, "loss": 0.0054, "step": 113860 }, { "epoch": 0.9615165396550632, "grad_norm": 0.25449302792549133, "learning_rate": 6.19675443619843e-06, "loss": 0.0111, "step": 113870 }, { "epoch": 0.9616009795022271, "grad_norm": 0.6464336514472961, "learning_rate": 6.196038960188302e-06, "loss": 0.0161, "step": 113880 }, { "epoch": 0.961685419349391, "grad_norm": 0.24745655059814453, "learning_rate": 6.195323458200453e-06, "loss": 0.0076, "step": 113890 }, { "epoch": 0.9617698591965549, "grad_norm": 0.8024018406867981, "learning_rate": 6.194607930250425e-06, "loss": 0.0129, "step": 113900 }, { "epoch": 0.9618542990437188, "grad_norm": 0.4323439300060272, "learning_rate": 6.193892376353756e-06, "loss": 0.0129, "step": 113910 }, { "epoch": 0.9619387388908827, "grad_norm": 0.6361353397369385, "learning_rate": 6.19317679652599e-06, "loss": 0.0099, "step": 113920 }, { "epoch": 0.9620231787380464, "grad_norm": 0.17533613741397858, "learning_rate": 6.192461190782668e-06, "loss": 0.0057, "step": 113930 }, { "epoch": 0.9621076185852103, "grad_norm": 0.40972405672073364, "learning_rate": 6.191745559139333e-06, "loss": 0.0059, "step": 113940 }, { "epoch": 0.9621920584323742, "grad_norm": 0.34845688939094543, "learning_rate": 6.1910299016115295e-06, "loss": 0.0073, "step": 113950 }, { "epoch": 0.9622764982795381, "grad_norm": 0.11301890015602112, "learning_rate": 6.1903142182148e-06, "loss": 0.0073, "step": 113960 }, { "epoch": 0.962360938126702, "grad_norm": 0.23247884213924408, "learning_rate": 6.189598508964689e-06, "loss": 0.0087, "step": 113970 }, { "epoch": 0.9624453779738659, "grad_norm": 0.3150962293148041, "learning_rate": 6.188882773876743e-06, "loss": 0.0107, "step": 113980 }, { "epoch": 0.9625298178210298, "grad_norm": 0.2832394540309906, "learning_rate": 6.1881670129665075e-06, "loss": 0.0082, "step": 113990 }, { "epoch": 0.9626142576681936, "grad_norm": 0.1194390058517456, "learning_rate": 6.187451226249527e-06, "loss": 0.0105, "step": 114000 }, { "epoch": 0.9626986975153575, "grad_norm": 0.43995368480682373, "learning_rate": 6.1867354137413495e-06, "loss": 0.0073, "step": 114010 }, { "epoch": 0.9627831373625214, "grad_norm": 0.4032977223396301, "learning_rate": 6.186019575457522e-06, "loss": 0.012, "step": 114020 }, { "epoch": 0.9628675772096853, "grad_norm": 0.278032124042511, "learning_rate": 6.185303711413593e-06, "loss": 0.0128, "step": 114030 }, { "epoch": 0.9629520170568491, "grad_norm": 0.31650233268737793, "learning_rate": 6.18458782162511e-06, "loss": 0.0046, "step": 114040 }, { "epoch": 0.963036456904013, "grad_norm": 0.14270798861980438, "learning_rate": 6.183871906107621e-06, "loss": 0.0052, "step": 114050 }, { "epoch": 0.9631208967511768, "grad_norm": 0.9225338101387024, "learning_rate": 6.183155964876677e-06, "loss": 0.0177, "step": 114060 }, { "epoch": 0.9632053365983407, "grad_norm": 0.9308257699012756, "learning_rate": 6.182439997947829e-06, "loss": 0.0163, "step": 114070 }, { "epoch": 0.9632897764455046, "grad_norm": 0.154891237616539, "learning_rate": 6.1817240053366255e-06, "loss": 0.0098, "step": 114080 }, { "epoch": 0.9633742162926685, "grad_norm": 0.4428136646747589, "learning_rate": 6.181007987058619e-06, "loss": 0.0107, "step": 114090 }, { "epoch": 0.9634586561398324, "grad_norm": 0.5780704617500305, "learning_rate": 6.1802919431293614e-06, "loss": 0.0074, "step": 114100 }, { "epoch": 0.9635430959869963, "grad_norm": 0.1677117645740509, "learning_rate": 6.1795758735644044e-06, "loss": 0.0151, "step": 114110 }, { "epoch": 0.9636275358341602, "grad_norm": 0.03881727159023285, "learning_rate": 6.178859778379303e-06, "loss": 0.016, "step": 114120 }, { "epoch": 0.963711975681324, "grad_norm": 0.24094267189502716, "learning_rate": 6.178143657589606e-06, "loss": 0.0149, "step": 114130 }, { "epoch": 0.9637964155284879, "grad_norm": 0.8636822700500488, "learning_rate": 6.177427511210872e-06, "loss": 0.0114, "step": 114140 }, { "epoch": 0.9638808553756518, "grad_norm": 0.35573601722717285, "learning_rate": 6.176711339258652e-06, "loss": 0.0104, "step": 114150 }, { "epoch": 0.9639652952228156, "grad_norm": 0.2210928052663803, "learning_rate": 6.175995141748505e-06, "loss": 0.0089, "step": 114160 }, { "epoch": 0.9640497350699795, "grad_norm": 0.09887225925922394, "learning_rate": 6.175278918695982e-06, "loss": 0.0079, "step": 114170 }, { "epoch": 0.9641341749171434, "grad_norm": 0.2503657341003418, "learning_rate": 6.174562670116644e-06, "loss": 0.0134, "step": 114180 }, { "epoch": 0.9642186147643073, "grad_norm": 0.19025598466396332, "learning_rate": 6.173846396026044e-06, "loss": 0.007, "step": 114190 }, { "epoch": 0.9643030546114711, "grad_norm": 0.07489821314811707, "learning_rate": 6.173130096439742e-06, "loss": 0.0064, "step": 114200 }, { "epoch": 0.964387494458635, "grad_norm": 0.45115697383880615, "learning_rate": 6.172413771373294e-06, "loss": 0.0138, "step": 114210 }, { "epoch": 0.9644719343057989, "grad_norm": 0.23704488575458527, "learning_rate": 6.171697420842258e-06, "loss": 0.0107, "step": 114220 }, { "epoch": 0.9645563741529628, "grad_norm": 0.11297620832920074, "learning_rate": 6.170981044862196e-06, "loss": 0.0056, "step": 114230 }, { "epoch": 0.9646408140001267, "grad_norm": 0.16418154537677765, "learning_rate": 6.1702646434486655e-06, "loss": 0.0048, "step": 114240 }, { "epoch": 0.9647252538472906, "grad_norm": 0.06030990555882454, "learning_rate": 6.169548216617226e-06, "loss": 0.0107, "step": 114250 }, { "epoch": 0.9648096936944545, "grad_norm": 0.3805594742298126, "learning_rate": 6.1688317643834405e-06, "loss": 0.016, "step": 114260 }, { "epoch": 0.9648941335416182, "grad_norm": 0.09278526902198792, "learning_rate": 6.168115286762867e-06, "loss": 0.0085, "step": 114270 }, { "epoch": 0.9649785733887821, "grad_norm": 0.2587166726589203, "learning_rate": 6.1673987837710684e-06, "loss": 0.0082, "step": 114280 }, { "epoch": 0.965063013235946, "grad_norm": 0.3547968864440918, "learning_rate": 6.166682255423608e-06, "loss": 0.012, "step": 114290 }, { "epoch": 0.9651474530831099, "grad_norm": 0.2892928719520569, "learning_rate": 6.165965701736049e-06, "loss": 0.0063, "step": 114300 }, { "epoch": 0.9652318929302738, "grad_norm": 0.28571009635925293, "learning_rate": 6.165249122723952e-06, "loss": 0.0058, "step": 114310 }, { "epoch": 0.9653163327774377, "grad_norm": 0.6988843083381653, "learning_rate": 6.164532518402885e-06, "loss": 0.0108, "step": 114320 }, { "epoch": 0.9654007726246016, "grad_norm": 0.2269282341003418, "learning_rate": 6.163815888788409e-06, "loss": 0.008, "step": 114330 }, { "epoch": 0.9654852124717654, "grad_norm": 0.26763468980789185, "learning_rate": 6.16309923389609e-06, "loss": 0.0063, "step": 114340 }, { "epoch": 0.9655696523189293, "grad_norm": 0.26671555638313293, "learning_rate": 6.162382553741495e-06, "loss": 0.012, "step": 114350 }, { "epoch": 0.9656540921660932, "grad_norm": 0.3534662425518036, "learning_rate": 6.161665848340189e-06, "loss": 0.0076, "step": 114360 }, { "epoch": 0.9657385320132571, "grad_norm": 0.2782995104789734, "learning_rate": 6.16094911770774e-06, "loss": 0.0144, "step": 114370 }, { "epoch": 0.965822971860421, "grad_norm": 0.4446110427379608, "learning_rate": 6.160232361859711e-06, "loss": 0.0089, "step": 114380 }, { "epoch": 0.9659074117075848, "grad_norm": 0.26214706897735596, "learning_rate": 6.159515580811675e-06, "loss": 0.0109, "step": 114390 }, { "epoch": 0.9659918515547486, "grad_norm": 0.4626659154891968, "learning_rate": 6.158798774579198e-06, "loss": 0.0099, "step": 114400 }, { "epoch": 0.9660762914019125, "grad_norm": 0.32651761174201965, "learning_rate": 6.15808194317785e-06, "loss": 0.0093, "step": 114410 }, { "epoch": 0.9661607312490764, "grad_norm": 0.3133574426174164, "learning_rate": 6.157365086623197e-06, "loss": 0.0068, "step": 114420 }, { "epoch": 0.9662451710962403, "grad_norm": 0.022291626781225204, "learning_rate": 6.1566482049308126e-06, "loss": 0.0086, "step": 114430 }, { "epoch": 0.9663296109434042, "grad_norm": 0.13066452741622925, "learning_rate": 6.155931298116266e-06, "loss": 0.0134, "step": 114440 }, { "epoch": 0.9664140507905681, "grad_norm": 0.5722293257713318, "learning_rate": 6.15521436619513e-06, "loss": 0.0116, "step": 114450 }, { "epoch": 0.966498490637732, "grad_norm": 0.21191377937793732, "learning_rate": 6.154497409182974e-06, "loss": 0.0056, "step": 114460 }, { "epoch": 0.9665829304848959, "grad_norm": 0.13579972088336945, "learning_rate": 6.15378042709537e-06, "loss": 0.0098, "step": 114470 }, { "epoch": 0.9666673703320597, "grad_norm": 0.17174431681632996, "learning_rate": 6.153063419947891e-06, "loss": 0.0094, "step": 114480 }, { "epoch": 0.9667518101792236, "grad_norm": 0.38557153940200806, "learning_rate": 6.152346387756113e-06, "loss": 0.0073, "step": 114490 }, { "epoch": 0.9668362500263874, "grad_norm": 0.656757652759552, "learning_rate": 6.151629330535607e-06, "loss": 0.0106, "step": 114500 }, { "epoch": 0.9669206898735513, "grad_norm": 0.12657901644706726, "learning_rate": 6.1509122483019454e-06, "loss": 0.007, "step": 114510 }, { "epoch": 0.9670051297207152, "grad_norm": 0.49734362959861755, "learning_rate": 6.150195141070709e-06, "loss": 0.011, "step": 114520 }, { "epoch": 0.9670895695678791, "grad_norm": 0.3462560474872589, "learning_rate": 6.1494780088574676e-06, "loss": 0.0153, "step": 114530 }, { "epoch": 0.967174009415043, "grad_norm": 0.3571942448616028, "learning_rate": 6.148760851677801e-06, "loss": 0.0108, "step": 114540 }, { "epoch": 0.9672584492622068, "grad_norm": 0.4242438077926636, "learning_rate": 6.1480436695472826e-06, "loss": 0.0074, "step": 114550 }, { "epoch": 0.9673428891093707, "grad_norm": 0.5951387286186218, "learning_rate": 6.147326462481491e-06, "loss": 0.0052, "step": 114560 }, { "epoch": 0.9674273289565346, "grad_norm": 0.37596914172172546, "learning_rate": 6.146609230496006e-06, "loss": 0.0092, "step": 114570 }, { "epoch": 0.9675117688036985, "grad_norm": 0.21239647269248962, "learning_rate": 6.1458919736064005e-06, "loss": 0.0078, "step": 114580 }, { "epoch": 0.9675962086508624, "grad_norm": 0.19862455129623413, "learning_rate": 6.1451746918282574e-06, "loss": 0.0107, "step": 114590 }, { "epoch": 0.9676806484980263, "grad_norm": 0.48661699891090393, "learning_rate": 6.144457385177154e-06, "loss": 0.01, "step": 114600 }, { "epoch": 0.9677650883451901, "grad_norm": 0.4587332606315613, "learning_rate": 6.143740053668673e-06, "loss": 0.0075, "step": 114610 }, { "epoch": 0.9678495281923539, "grad_norm": 0.24088618159294128, "learning_rate": 6.143022697318391e-06, "loss": 0.0069, "step": 114620 }, { "epoch": 0.9679339680395178, "grad_norm": 0.2529550790786743, "learning_rate": 6.14230531614189e-06, "loss": 0.0101, "step": 114630 }, { "epoch": 0.9680184078866817, "grad_norm": 0.45135611295700073, "learning_rate": 6.141587910154751e-06, "loss": 0.0094, "step": 114640 }, { "epoch": 0.9681028477338456, "grad_norm": 0.19863349199295044, "learning_rate": 6.140870479372558e-06, "loss": 0.0165, "step": 114650 }, { "epoch": 0.9681872875810095, "grad_norm": 0.1327962577342987, "learning_rate": 6.1401530238108905e-06, "loss": 0.0124, "step": 114660 }, { "epoch": 0.9682717274281734, "grad_norm": 0.3589330017566681, "learning_rate": 6.139435543485334e-06, "loss": 0.007, "step": 114670 }, { "epoch": 0.9683561672753372, "grad_norm": 0.15965402126312256, "learning_rate": 6.138718038411471e-06, "loss": 0.0087, "step": 114680 }, { "epoch": 0.9684406071225011, "grad_norm": 0.7132868766784668, "learning_rate": 6.138000508604886e-06, "loss": 0.0177, "step": 114690 }, { "epoch": 0.968525046969665, "grad_norm": 0.057793617248535156, "learning_rate": 6.137282954081162e-06, "loss": 0.0112, "step": 114700 }, { "epoch": 0.9686094868168289, "grad_norm": 0.3474864661693573, "learning_rate": 6.136565374855885e-06, "loss": 0.0104, "step": 114710 }, { "epoch": 0.9686939266639928, "grad_norm": 0.3554494082927704, "learning_rate": 6.135847770944641e-06, "loss": 0.015, "step": 114720 }, { "epoch": 0.9687783665111566, "grad_norm": 0.07388078421354294, "learning_rate": 6.135130142363017e-06, "loss": 0.0093, "step": 114730 }, { "epoch": 0.9688628063583204, "grad_norm": 0.3372289538383484, "learning_rate": 6.1344124891266e-06, "loss": 0.0107, "step": 114740 }, { "epoch": 0.9689472462054843, "grad_norm": 0.7868037819862366, "learning_rate": 6.133694811250974e-06, "loss": 0.0117, "step": 114750 }, { "epoch": 0.9690316860526482, "grad_norm": 0.4761399030685425, "learning_rate": 6.13297710875173e-06, "loss": 0.0112, "step": 114760 }, { "epoch": 0.9691161258998121, "grad_norm": 0.36312851309776306, "learning_rate": 6.132259381644456e-06, "loss": 0.0125, "step": 114770 }, { "epoch": 0.969200565746976, "grad_norm": 0.44890642166137695, "learning_rate": 6.13154162994474e-06, "loss": 0.0152, "step": 114780 }, { "epoch": 0.9692850055941399, "grad_norm": 0.3682819902896881, "learning_rate": 6.130823853668171e-06, "loss": 0.0135, "step": 114790 }, { "epoch": 0.9693694454413038, "grad_norm": 0.22068855166435242, "learning_rate": 6.130106052830339e-06, "loss": 0.01, "step": 114800 }, { "epoch": 0.9694538852884677, "grad_norm": 0.27604907751083374, "learning_rate": 6.129388227446836e-06, "loss": 0.0074, "step": 114810 }, { "epoch": 0.9695383251356315, "grad_norm": 0.16598568856716156, "learning_rate": 6.128670377533252e-06, "loss": 0.0086, "step": 114820 }, { "epoch": 0.9696227649827954, "grad_norm": 0.24921004474163055, "learning_rate": 6.127952503105179e-06, "loss": 0.0066, "step": 114830 }, { "epoch": 0.9697072048299593, "grad_norm": 0.16004984080791473, "learning_rate": 6.127234604178208e-06, "loss": 0.0077, "step": 114840 }, { "epoch": 0.9697916446771231, "grad_norm": 0.1931019276380539, "learning_rate": 6.126516680767933e-06, "loss": 0.0146, "step": 114850 }, { "epoch": 0.969876084524287, "grad_norm": 0.22188526391983032, "learning_rate": 6.125798732889948e-06, "loss": 0.0088, "step": 114860 }, { "epoch": 0.9699605243714509, "grad_norm": 0.24399137496948242, "learning_rate": 6.125080760559843e-06, "loss": 0.0083, "step": 114870 }, { "epoch": 0.9700449642186147, "grad_norm": 0.2638647258281708, "learning_rate": 6.124362763793215e-06, "loss": 0.0142, "step": 114880 }, { "epoch": 0.9701294040657786, "grad_norm": 0.5361051559448242, "learning_rate": 6.123644742605657e-06, "loss": 0.013, "step": 114890 }, { "epoch": 0.9702138439129425, "grad_norm": 0.47478926181793213, "learning_rate": 6.122926697012769e-06, "loss": 0.0126, "step": 114900 }, { "epoch": 0.9702982837601064, "grad_norm": 0.10932312905788422, "learning_rate": 6.1222086270301405e-06, "loss": 0.0067, "step": 114910 }, { "epoch": 0.9703827236072703, "grad_norm": 0.08115415275096893, "learning_rate": 6.1214905326733706e-06, "loss": 0.0064, "step": 114920 }, { "epoch": 0.9704671634544342, "grad_norm": 0.31536585092544556, "learning_rate": 6.1207724139580575e-06, "loss": 0.009, "step": 114930 }, { "epoch": 0.9705516033015981, "grad_norm": 0.27052658796310425, "learning_rate": 6.120054270899796e-06, "loss": 0.0068, "step": 114940 }, { "epoch": 0.970636043148762, "grad_norm": 0.2947799861431122, "learning_rate": 6.119336103514185e-06, "loss": 0.0096, "step": 114950 }, { "epoch": 0.9707204829959257, "grad_norm": 0.37524834275245667, "learning_rate": 6.118617911816823e-06, "loss": 0.0086, "step": 114960 }, { "epoch": 0.9708049228430896, "grad_norm": 0.27352699637413025, "learning_rate": 6.1178996958233115e-06, "loss": 0.0118, "step": 114970 }, { "epoch": 0.9708893626902535, "grad_norm": 0.6945366263389587, "learning_rate": 6.117181455549247e-06, "loss": 0.0181, "step": 114980 }, { "epoch": 0.9709738025374174, "grad_norm": 0.15370647609233856, "learning_rate": 6.116463191010229e-06, "loss": 0.0129, "step": 114990 }, { "epoch": 0.9710582423845813, "grad_norm": 0.10668178647756577, "learning_rate": 6.115744902221859e-06, "loss": 0.009, "step": 115000 }, { "epoch": 0.9711426822317452, "grad_norm": 0.24565467238426208, "learning_rate": 6.115026589199739e-06, "loss": 0.0188, "step": 115010 }, { "epoch": 0.971227122078909, "grad_norm": 0.23022685945034027, "learning_rate": 6.114308251959471e-06, "loss": 0.0078, "step": 115020 }, { "epoch": 0.9713115619260729, "grad_norm": 0.31092292070388794, "learning_rate": 6.113589890516654e-06, "loss": 0.0087, "step": 115030 }, { "epoch": 0.9713960017732368, "grad_norm": 0.39385050535202026, "learning_rate": 6.112871504886895e-06, "loss": 0.0093, "step": 115040 }, { "epoch": 0.9714804416204007, "grad_norm": 0.1493745893239975, "learning_rate": 6.112153095085793e-06, "loss": 0.0052, "step": 115050 }, { "epoch": 0.9715648814675646, "grad_norm": 0.48037275671958923, "learning_rate": 6.111434661128956e-06, "loss": 0.0083, "step": 115060 }, { "epoch": 0.9716493213147285, "grad_norm": 0.2907904088497162, "learning_rate": 6.110716203031983e-06, "loss": 0.01, "step": 115070 }, { "epoch": 0.9717337611618923, "grad_norm": 0.3295961022377014, "learning_rate": 6.1099977208104845e-06, "loss": 0.0134, "step": 115080 }, { "epoch": 0.9718182010090561, "grad_norm": 0.18935441970825195, "learning_rate": 6.1092792144800605e-06, "loss": 0.0083, "step": 115090 }, { "epoch": 0.97190264085622, "grad_norm": 0.23683422803878784, "learning_rate": 6.108560684056321e-06, "loss": 0.0187, "step": 115100 }, { "epoch": 0.9719870807033839, "grad_norm": 1.2541673183441162, "learning_rate": 6.107842129554871e-06, "loss": 0.0114, "step": 115110 }, { "epoch": 0.9720715205505478, "grad_norm": 0.7104538083076477, "learning_rate": 6.107123550991315e-06, "loss": 0.0069, "step": 115120 }, { "epoch": 0.9721559603977117, "grad_norm": 0.2641737759113312, "learning_rate": 6.1064049483812625e-06, "loss": 0.0181, "step": 115130 }, { "epoch": 0.9722404002448756, "grad_norm": 0.29831910133361816, "learning_rate": 6.1056863217403225e-06, "loss": 0.0128, "step": 115140 }, { "epoch": 0.9723248400920395, "grad_norm": 0.20731298625469208, "learning_rate": 6.104967671084102e-06, "loss": 0.0101, "step": 115150 }, { "epoch": 0.9724092799392033, "grad_norm": 0.26532530784606934, "learning_rate": 6.104248996428209e-06, "loss": 0.0073, "step": 115160 }, { "epoch": 0.9724937197863672, "grad_norm": 0.34596168994903564, "learning_rate": 6.103530297788255e-06, "loss": 0.0096, "step": 115170 }, { "epoch": 0.9725781596335311, "grad_norm": 0.38244035840034485, "learning_rate": 6.10281157517985e-06, "loss": 0.014, "step": 115180 }, { "epoch": 0.9726625994806949, "grad_norm": 0.4528672695159912, "learning_rate": 6.102092828618602e-06, "loss": 0.0081, "step": 115190 }, { "epoch": 0.9727470393278588, "grad_norm": 0.23114649951457977, "learning_rate": 6.101374058120126e-06, "loss": 0.0089, "step": 115200 }, { "epoch": 0.9728314791750227, "grad_norm": 0.6855199933052063, "learning_rate": 6.100655263700029e-06, "loss": 0.0115, "step": 115210 }, { "epoch": 0.9729159190221865, "grad_norm": 0.15539629757404327, "learning_rate": 6.099936445373925e-06, "loss": 0.0097, "step": 115220 }, { "epoch": 0.9730003588693504, "grad_norm": 0.23551379144191742, "learning_rate": 6.0992176031574266e-06, "loss": 0.0099, "step": 115230 }, { "epoch": 0.9730847987165143, "grad_norm": 0.17047730088233948, "learning_rate": 6.0984987370661475e-06, "loss": 0.005, "step": 115240 }, { "epoch": 0.9731692385636782, "grad_norm": 0.16119526326656342, "learning_rate": 6.097779847115701e-06, "loss": 0.01, "step": 115250 }, { "epoch": 0.9732536784108421, "grad_norm": 0.28940168023109436, "learning_rate": 6.097060933321702e-06, "loss": 0.0126, "step": 115260 }, { "epoch": 0.973338118258006, "grad_norm": 0.04180483892560005, "learning_rate": 6.096341995699764e-06, "loss": 0.0044, "step": 115270 }, { "epoch": 0.9734225581051699, "grad_norm": 0.6476774215698242, "learning_rate": 6.095623034265502e-06, "loss": 0.0182, "step": 115280 }, { "epoch": 0.9735069979523338, "grad_norm": 0.21213755011558533, "learning_rate": 6.094904049034532e-06, "loss": 0.0058, "step": 115290 }, { "epoch": 0.9735914377994975, "grad_norm": 0.36392319202423096, "learning_rate": 6.094185040022473e-06, "loss": 0.0093, "step": 115300 }, { "epoch": 0.9736758776466614, "grad_norm": 0.29222485423088074, "learning_rate": 6.093466007244936e-06, "loss": 0.0105, "step": 115310 }, { "epoch": 0.9737603174938253, "grad_norm": 0.30802103877067566, "learning_rate": 6.092746950717543e-06, "loss": 0.0075, "step": 115320 }, { "epoch": 0.9738447573409892, "grad_norm": 0.4265971779823303, "learning_rate": 6.092027870455909e-06, "loss": 0.0085, "step": 115330 }, { "epoch": 0.9739291971881531, "grad_norm": 0.35687056183815, "learning_rate": 6.091308766475654e-06, "loss": 0.0074, "step": 115340 }, { "epoch": 0.974013637035317, "grad_norm": 0.5043158531188965, "learning_rate": 6.0905896387923964e-06, "loss": 0.0088, "step": 115350 }, { "epoch": 0.9740980768824808, "grad_norm": 0.356503427028656, "learning_rate": 6.089870487421755e-06, "loss": 0.0086, "step": 115360 }, { "epoch": 0.9741825167296447, "grad_norm": 0.2699034810066223, "learning_rate": 6.08915131237935e-06, "loss": 0.0059, "step": 115370 }, { "epoch": 0.9742669565768086, "grad_norm": 0.679712176322937, "learning_rate": 6.0884321136808035e-06, "loss": 0.0061, "step": 115380 }, { "epoch": 0.9743513964239725, "grad_norm": 0.8809325695037842, "learning_rate": 6.087712891341732e-06, "loss": 0.0091, "step": 115390 }, { "epoch": 0.9744358362711364, "grad_norm": 0.23020541667938232, "learning_rate": 6.08699364537776e-06, "loss": 0.0072, "step": 115400 }, { "epoch": 0.9745202761183003, "grad_norm": 0.05187057703733444, "learning_rate": 6.086274375804508e-06, "loss": 0.0178, "step": 115410 }, { "epoch": 0.974604715965464, "grad_norm": 0.8852176666259766, "learning_rate": 6.085555082637602e-06, "loss": 0.011, "step": 115420 }, { "epoch": 0.9746891558126279, "grad_norm": 0.21666069328784943, "learning_rate": 6.084835765892659e-06, "loss": 0.0077, "step": 115430 }, { "epoch": 0.9747735956597918, "grad_norm": 0.20589956641197205, "learning_rate": 6.084116425585308e-06, "loss": 0.0141, "step": 115440 }, { "epoch": 0.9748580355069557, "grad_norm": 0.354483038187027, "learning_rate": 6.0833970617311685e-06, "loss": 0.0104, "step": 115450 }, { "epoch": 0.9749424753541196, "grad_norm": 0.39487358927726746, "learning_rate": 6.082677674345867e-06, "loss": 0.0101, "step": 115460 }, { "epoch": 0.9750269152012835, "grad_norm": 0.23218345642089844, "learning_rate": 6.0819582634450305e-06, "loss": 0.0071, "step": 115470 }, { "epoch": 0.9751113550484474, "grad_norm": 0.4439133107662201, "learning_rate": 6.081238829044279e-06, "loss": 0.0115, "step": 115480 }, { "epoch": 0.9751957948956113, "grad_norm": 0.4032706022262573, "learning_rate": 6.080519371159244e-06, "loss": 0.0077, "step": 115490 }, { "epoch": 0.9752802347427751, "grad_norm": 0.1643213927745819, "learning_rate": 6.0797998898055475e-06, "loss": 0.0105, "step": 115500 }, { "epoch": 0.975364674589939, "grad_norm": 0.18821297585964203, "learning_rate": 6.079080384998821e-06, "loss": 0.0102, "step": 115510 }, { "epoch": 0.9754491144371029, "grad_norm": 0.27973827719688416, "learning_rate": 6.078360856754688e-06, "loss": 0.0121, "step": 115520 }, { "epoch": 0.9755335542842667, "grad_norm": 0.5471905469894409, "learning_rate": 6.0776413050887795e-06, "loss": 0.0095, "step": 115530 }, { "epoch": 0.9756179941314306, "grad_norm": 0.33476021885871887, "learning_rate": 6.076921730016723e-06, "loss": 0.0143, "step": 115540 }, { "epoch": 0.9757024339785945, "grad_norm": 0.7371969819068909, "learning_rate": 6.076202131554146e-06, "loss": 0.0095, "step": 115550 }, { "epoch": 0.9757868738257583, "grad_norm": 0.3482077419757843, "learning_rate": 6.075482509716679e-06, "loss": 0.0087, "step": 115560 }, { "epoch": 0.9758713136729222, "grad_norm": 0.2893514335155487, "learning_rate": 6.074762864519953e-06, "loss": 0.0102, "step": 115570 }, { "epoch": 0.9759557535200861, "grad_norm": 0.5573439002037048, "learning_rate": 6.074043195979597e-06, "loss": 0.0104, "step": 115580 }, { "epoch": 0.97604019336725, "grad_norm": 0.13219459354877472, "learning_rate": 6.073323504111244e-06, "loss": 0.0146, "step": 115590 }, { "epoch": 0.9761246332144139, "grad_norm": 0.20254485309123993, "learning_rate": 6.072603788930523e-06, "loss": 0.013, "step": 115600 }, { "epoch": 0.9762090730615778, "grad_norm": 0.2101513147354126, "learning_rate": 6.071884050453068e-06, "loss": 0.0133, "step": 115610 }, { "epoch": 0.9762935129087417, "grad_norm": 0.3128785490989685, "learning_rate": 6.0711642886945124e-06, "loss": 0.007, "step": 115620 }, { "epoch": 0.9763779527559056, "grad_norm": 0.14457020163536072, "learning_rate": 6.070444503670487e-06, "loss": 0.003, "step": 115630 }, { "epoch": 0.9764623926030694, "grad_norm": 1.0198850631713867, "learning_rate": 6.069724695396626e-06, "loss": 0.0115, "step": 115640 }, { "epoch": 0.9765468324502332, "grad_norm": 0.40911221504211426, "learning_rate": 6.069004863888563e-06, "loss": 0.0141, "step": 115650 }, { "epoch": 0.9766312722973971, "grad_norm": 0.19849282503128052, "learning_rate": 6.068285009161936e-06, "loss": 0.0067, "step": 115660 }, { "epoch": 0.976715712144561, "grad_norm": 0.28205621242523193, "learning_rate": 6.067565131232376e-06, "loss": 0.0211, "step": 115670 }, { "epoch": 0.9768001519917249, "grad_norm": 0.40939441323280334, "learning_rate": 6.066845230115521e-06, "loss": 0.0081, "step": 115680 }, { "epoch": 0.9768845918388888, "grad_norm": 0.2770928740501404, "learning_rate": 6.066125305827006e-06, "loss": 0.005, "step": 115690 }, { "epoch": 0.9769690316860526, "grad_norm": 0.08720356971025467, "learning_rate": 6.065405358382466e-06, "loss": 0.0071, "step": 115700 }, { "epoch": 0.9770534715332165, "grad_norm": 0.03821777179837227, "learning_rate": 6.064685387797542e-06, "loss": 0.0073, "step": 115710 }, { "epoch": 0.9771379113803804, "grad_norm": 0.24134594202041626, "learning_rate": 6.063965394087868e-06, "loss": 0.0085, "step": 115720 }, { "epoch": 0.9772223512275443, "grad_norm": 0.5336904525756836, "learning_rate": 6.063245377269085e-06, "loss": 0.0086, "step": 115730 }, { "epoch": 0.9773067910747082, "grad_norm": 0.2512509226799011, "learning_rate": 6.062525337356829e-06, "loss": 0.0047, "step": 115740 }, { "epoch": 0.9773912309218721, "grad_norm": 0.20916730165481567, "learning_rate": 6.061805274366741e-06, "loss": 0.0069, "step": 115750 }, { "epoch": 0.9774756707690359, "grad_norm": 0.394806444644928, "learning_rate": 6.0610851883144605e-06, "loss": 0.0115, "step": 115760 }, { "epoch": 0.9775601106161997, "grad_norm": 0.33398616313934326, "learning_rate": 6.060365079215626e-06, "loss": 0.0109, "step": 115770 }, { "epoch": 0.9776445504633636, "grad_norm": 0.21225100755691528, "learning_rate": 6.05964494708588e-06, "loss": 0.008, "step": 115780 }, { "epoch": 0.9777289903105275, "grad_norm": 0.607688307762146, "learning_rate": 6.058924791940863e-06, "loss": 0.0083, "step": 115790 }, { "epoch": 0.9778134301576914, "grad_norm": 0.2598191201686859, "learning_rate": 6.058204613796218e-06, "loss": 0.0061, "step": 115800 }, { "epoch": 0.9778978700048553, "grad_norm": 0.3382193148136139, "learning_rate": 6.057484412667584e-06, "loss": 0.0072, "step": 115810 }, { "epoch": 0.9779823098520192, "grad_norm": 0.7238548994064331, "learning_rate": 6.056764188570605e-06, "loss": 0.0137, "step": 115820 }, { "epoch": 0.9780667496991831, "grad_norm": 0.048351138830184937, "learning_rate": 6.056043941520925e-06, "loss": 0.0105, "step": 115830 }, { "epoch": 0.9781511895463469, "grad_norm": 0.26789286732673645, "learning_rate": 6.055323671534187e-06, "loss": 0.0063, "step": 115840 }, { "epoch": 0.9782356293935108, "grad_norm": 0.1972074955701828, "learning_rate": 6.054603378626034e-06, "loss": 0.0069, "step": 115850 }, { "epoch": 0.9783200692406747, "grad_norm": 0.4937775731086731, "learning_rate": 6.053883062812113e-06, "loss": 0.0105, "step": 115860 }, { "epoch": 0.9784045090878386, "grad_norm": 0.2989547550678253, "learning_rate": 6.053162724108067e-06, "loss": 0.0071, "step": 115870 }, { "epoch": 0.9784889489350024, "grad_norm": 0.34866780042648315, "learning_rate": 6.052442362529542e-06, "loss": 0.0094, "step": 115880 }, { "epoch": 0.9785733887821663, "grad_norm": 1.0449180603027344, "learning_rate": 6.051721978092185e-06, "loss": 0.014, "step": 115890 }, { "epoch": 0.9786578286293302, "grad_norm": 0.34700295329093933, "learning_rate": 6.051001570811643e-06, "loss": 0.0106, "step": 115900 }, { "epoch": 0.978742268476494, "grad_norm": 0.04061361774802208, "learning_rate": 6.050281140703563e-06, "loss": 0.0045, "step": 115910 }, { "epoch": 0.9788267083236579, "grad_norm": 0.3873661756515503, "learning_rate": 6.049560687783591e-06, "loss": 0.0066, "step": 115920 }, { "epoch": 0.9789111481708218, "grad_norm": 0.23379743099212646, "learning_rate": 6.048840212067375e-06, "loss": 0.0083, "step": 115930 }, { "epoch": 0.9789955880179857, "grad_norm": 0.5346904397010803, "learning_rate": 6.048119713570565e-06, "loss": 0.0105, "step": 115940 }, { "epoch": 0.9790800278651496, "grad_norm": 0.08782706409692764, "learning_rate": 6.047399192308811e-06, "loss": 0.007, "step": 115950 }, { "epoch": 0.9791644677123135, "grad_norm": 0.46720823645591736, "learning_rate": 6.046678648297759e-06, "loss": 0.0178, "step": 115960 }, { "epoch": 0.9792489075594774, "grad_norm": 0.25322505831718445, "learning_rate": 6.045958081553062e-06, "loss": 0.0132, "step": 115970 }, { "epoch": 0.9793333474066412, "grad_norm": 0.24545353651046753, "learning_rate": 6.045237492090369e-06, "loss": 0.0051, "step": 115980 }, { "epoch": 0.979417787253805, "grad_norm": 0.45121780037879944, "learning_rate": 6.044516879925334e-06, "loss": 0.0118, "step": 115990 }, { "epoch": 0.9795022271009689, "grad_norm": 0.4158487021923065, "learning_rate": 6.043796245073604e-06, "loss": 0.0072, "step": 116000 }, { "epoch": 0.9795866669481328, "grad_norm": 0.46626582741737366, "learning_rate": 6.043075587550835e-06, "loss": 0.0131, "step": 116010 }, { "epoch": 0.9796711067952967, "grad_norm": 0.06252287328243256, "learning_rate": 6.0423549073726775e-06, "loss": 0.0174, "step": 116020 }, { "epoch": 0.9797555466424606, "grad_norm": 0.1909666508436203, "learning_rate": 6.041634204554785e-06, "loss": 0.01, "step": 116030 }, { "epoch": 0.9798399864896244, "grad_norm": 0.22361315786838531, "learning_rate": 6.0409134791128124e-06, "loss": 0.0086, "step": 116040 }, { "epoch": 0.9799244263367883, "grad_norm": 0.36388930678367615, "learning_rate": 6.040192731062411e-06, "loss": 0.0085, "step": 116050 }, { "epoch": 0.9800088661839522, "grad_norm": 0.2950849235057831, "learning_rate": 6.039471960419237e-06, "loss": 0.0079, "step": 116060 }, { "epoch": 0.9800933060311161, "grad_norm": 0.058035314083099365, "learning_rate": 6.038751167198944e-06, "loss": 0.0098, "step": 116070 }, { "epoch": 0.98017774587828, "grad_norm": 0.2754848003387451, "learning_rate": 6.03803035141719e-06, "loss": 0.01, "step": 116080 }, { "epoch": 0.9802621857254439, "grad_norm": 0.5293766856193542, "learning_rate": 6.037309513089628e-06, "loss": 0.0089, "step": 116090 }, { "epoch": 0.9803466255726078, "grad_norm": 0.149761363863945, "learning_rate": 6.036588652231916e-06, "loss": 0.0053, "step": 116100 }, { "epoch": 0.9804310654197715, "grad_norm": 0.4501233398914337, "learning_rate": 6.035867768859712e-06, "loss": 0.0138, "step": 116110 }, { "epoch": 0.9805155052669354, "grad_norm": 0.14236125349998474, "learning_rate": 6.035146862988672e-06, "loss": 0.0093, "step": 116120 }, { "epoch": 0.9805999451140993, "grad_norm": 0.19588448107242584, "learning_rate": 6.0344259346344536e-06, "loss": 0.0071, "step": 116130 }, { "epoch": 0.9806843849612632, "grad_norm": 0.2659585475921631, "learning_rate": 6.033704983812716e-06, "loss": 0.0053, "step": 116140 }, { "epoch": 0.9807688248084271, "grad_norm": 0.11207985877990723, "learning_rate": 6.03298401053912e-06, "loss": 0.012, "step": 116150 }, { "epoch": 0.980853264655591, "grad_norm": 0.7343537211418152, "learning_rate": 6.032263014829321e-06, "loss": 0.0134, "step": 116160 }, { "epoch": 0.9809377045027549, "grad_norm": 0.33545371890068054, "learning_rate": 6.031541996698982e-06, "loss": 0.004, "step": 116170 }, { "epoch": 0.9810221443499187, "grad_norm": 0.36913734674453735, "learning_rate": 6.030820956163761e-06, "loss": 0.0083, "step": 116180 }, { "epoch": 0.9811065841970826, "grad_norm": 0.4245728552341461, "learning_rate": 6.0300998932393194e-06, "loss": 0.0091, "step": 116190 }, { "epoch": 0.9811910240442465, "grad_norm": 0.24290698766708374, "learning_rate": 6.029378807941321e-06, "loss": 0.0127, "step": 116200 }, { "epoch": 0.9812754638914104, "grad_norm": 0.26446300745010376, "learning_rate": 6.028657700285425e-06, "loss": 0.0161, "step": 116210 }, { "epoch": 0.9813599037385742, "grad_norm": 0.053417451679706573, "learning_rate": 6.027936570287295e-06, "loss": 0.0051, "step": 116220 }, { "epoch": 0.9814443435857381, "grad_norm": 1.0600800514221191, "learning_rate": 6.027215417962593e-06, "loss": 0.0101, "step": 116230 }, { "epoch": 0.981528783432902, "grad_norm": 1.3376481533050537, "learning_rate": 6.026494243326984e-06, "loss": 0.0144, "step": 116240 }, { "epoch": 0.9816132232800658, "grad_norm": 0.1712288111448288, "learning_rate": 6.0257730463961275e-06, "loss": 0.0121, "step": 116250 }, { "epoch": 0.9816976631272297, "grad_norm": 0.2625793516635895, "learning_rate": 6.025051827185692e-06, "loss": 0.011, "step": 116260 }, { "epoch": 0.9817821029743936, "grad_norm": 0.39887869358062744, "learning_rate": 6.024330585711342e-06, "loss": 0.0125, "step": 116270 }, { "epoch": 0.9818665428215575, "grad_norm": 0.11393425613641739, "learning_rate": 6.023609321988741e-06, "loss": 0.0078, "step": 116280 }, { "epoch": 0.9819509826687214, "grad_norm": 0.5275446176528931, "learning_rate": 6.022888036033555e-06, "loss": 0.0121, "step": 116290 }, { "epoch": 0.9820354225158853, "grad_norm": 0.5541825890541077, "learning_rate": 6.022166727861449e-06, "loss": 0.0078, "step": 116300 }, { "epoch": 0.9821198623630492, "grad_norm": 0.2816966474056244, "learning_rate": 6.0214453974880925e-06, "loss": 0.0062, "step": 116310 }, { "epoch": 0.982204302210213, "grad_norm": 0.6220521330833435, "learning_rate": 6.020724044929153e-06, "loss": 0.022, "step": 116320 }, { "epoch": 0.9822887420573769, "grad_norm": 0.17351819574832916, "learning_rate": 6.020002670200293e-06, "loss": 0.0064, "step": 116330 }, { "epoch": 0.9823731819045407, "grad_norm": 0.24829845130443573, "learning_rate": 6.019281273317185e-06, "loss": 0.0148, "step": 116340 }, { "epoch": 0.9824576217517046, "grad_norm": 0.8965827822685242, "learning_rate": 6.018559854295498e-06, "loss": 0.013, "step": 116350 }, { "epoch": 0.9825420615988685, "grad_norm": 0.1589495986700058, "learning_rate": 6.0178384131509e-06, "loss": 0.0123, "step": 116360 }, { "epoch": 0.9826265014460324, "grad_norm": 0.27589094638824463, "learning_rate": 6.017116949899059e-06, "loss": 0.0105, "step": 116370 }, { "epoch": 0.9827109412931962, "grad_norm": 0.33048707246780396, "learning_rate": 6.016395464555647e-06, "loss": 0.0105, "step": 116380 }, { "epoch": 0.9827953811403601, "grad_norm": 0.4160858392715454, "learning_rate": 6.0156739571363335e-06, "loss": 0.0223, "step": 116390 }, { "epoch": 0.982879820987524, "grad_norm": 0.6034177541732788, "learning_rate": 6.0149524276567905e-06, "loss": 0.0171, "step": 116400 }, { "epoch": 0.9829642608346879, "grad_norm": 0.23247714340686798, "learning_rate": 6.0142308761326874e-06, "loss": 0.0068, "step": 116410 }, { "epoch": 0.9830487006818518, "grad_norm": 0.28464940190315247, "learning_rate": 6.013509302579698e-06, "loss": 0.009, "step": 116420 }, { "epoch": 0.9831331405290157, "grad_norm": 0.10458555817604065, "learning_rate": 6.012787707013493e-06, "loss": 0.0084, "step": 116430 }, { "epoch": 0.9832175803761796, "grad_norm": 0.4406537711620331, "learning_rate": 6.012066089449749e-06, "loss": 0.0079, "step": 116440 }, { "epoch": 0.9833020202233433, "grad_norm": 0.4989588260650635, "learning_rate": 6.011344449904135e-06, "loss": 0.0085, "step": 116450 }, { "epoch": 0.9833864600705072, "grad_norm": 0.1620015799999237, "learning_rate": 6.010622788392328e-06, "loss": 0.0063, "step": 116460 }, { "epoch": 0.9834708999176711, "grad_norm": 0.7259813547134399, "learning_rate": 6.0099011049299994e-06, "loss": 0.0136, "step": 116470 }, { "epoch": 0.983555339764835, "grad_norm": 0.5843364596366882, "learning_rate": 6.009179399532827e-06, "loss": 0.0283, "step": 116480 }, { "epoch": 0.9836397796119989, "grad_norm": 0.28674691915512085, "learning_rate": 6.0084576722164855e-06, "loss": 0.0074, "step": 116490 }, { "epoch": 0.9837242194591628, "grad_norm": 0.08015347272157669, "learning_rate": 6.007735922996649e-06, "loss": 0.0118, "step": 116500 }, { "epoch": 0.9838086593063267, "grad_norm": 0.275899738073349, "learning_rate": 6.007014151888995e-06, "loss": 0.0127, "step": 116510 }, { "epoch": 0.9838930991534905, "grad_norm": 0.06773801892995834, "learning_rate": 6.0062923589092e-06, "loss": 0.015, "step": 116520 }, { "epoch": 0.9839775390006544, "grad_norm": 0.6297892928123474, "learning_rate": 6.005570544072942e-06, "loss": 0.0077, "step": 116530 }, { "epoch": 0.9840619788478183, "grad_norm": 0.6960635185241699, "learning_rate": 6.0048487073958975e-06, "loss": 0.0084, "step": 116540 }, { "epoch": 0.9841464186949822, "grad_norm": 0.37598347663879395, "learning_rate": 6.004126848893743e-06, "loss": 0.0117, "step": 116550 }, { "epoch": 0.9842308585421461, "grad_norm": 0.4493214786052704, "learning_rate": 6.003404968582162e-06, "loss": 0.0099, "step": 116560 }, { "epoch": 0.9843152983893099, "grad_norm": 0.16093015670776367, "learning_rate": 6.002683066476829e-06, "loss": 0.0069, "step": 116570 }, { "epoch": 0.9843997382364738, "grad_norm": 0.04311029240489006, "learning_rate": 6.0019611425934255e-06, "loss": 0.0132, "step": 116580 }, { "epoch": 0.9844841780836376, "grad_norm": 0.29497817158699036, "learning_rate": 6.0012391969476305e-06, "loss": 0.0059, "step": 116590 }, { "epoch": 0.9845686179308015, "grad_norm": 0.376697838306427, "learning_rate": 6.000517229555127e-06, "loss": 0.0144, "step": 116600 }, { "epoch": 0.9846530577779654, "grad_norm": 0.3785629868507385, "learning_rate": 5.999795240431594e-06, "loss": 0.0125, "step": 116610 }, { "epoch": 0.9847374976251293, "grad_norm": 0.4263734519481659, "learning_rate": 5.999073229592713e-06, "loss": 0.0076, "step": 116620 }, { "epoch": 0.9848219374722932, "grad_norm": 0.304425448179245, "learning_rate": 5.998351197054165e-06, "loss": 0.0064, "step": 116630 }, { "epoch": 0.9849063773194571, "grad_norm": 1.3120490312576294, "learning_rate": 5.997629142831635e-06, "loss": 0.0092, "step": 116640 }, { "epoch": 0.984990817166621, "grad_norm": 0.3947804272174835, "learning_rate": 5.996907066940805e-06, "loss": 0.0169, "step": 116650 }, { "epoch": 0.9850752570137848, "grad_norm": 0.14878420531749725, "learning_rate": 5.996184969397356e-06, "loss": 0.0039, "step": 116660 }, { "epoch": 0.9851596968609487, "grad_norm": 0.1093684583902359, "learning_rate": 5.995462850216974e-06, "loss": 0.012, "step": 116670 }, { "epoch": 0.9852441367081125, "grad_norm": 0.2843914031982422, "learning_rate": 5.994740709415343e-06, "loss": 0.0067, "step": 116680 }, { "epoch": 0.9853285765552764, "grad_norm": 0.7487130165100098, "learning_rate": 5.9940185470081495e-06, "loss": 0.0139, "step": 116690 }, { "epoch": 0.9854130164024403, "grad_norm": 0.27424901723861694, "learning_rate": 5.9932963630110744e-06, "loss": 0.0154, "step": 116700 }, { "epoch": 0.9854974562496042, "grad_norm": 0.3427756726741791, "learning_rate": 5.992574157439805e-06, "loss": 0.0103, "step": 116710 }, { "epoch": 0.985581896096768, "grad_norm": 0.4719932973384857, "learning_rate": 5.991851930310032e-06, "loss": 0.0118, "step": 116720 }, { "epoch": 0.9856663359439319, "grad_norm": 0.3212504982948303, "learning_rate": 5.991129681637435e-06, "loss": 0.0097, "step": 116730 }, { "epoch": 0.9857507757910958, "grad_norm": 0.23843924701213837, "learning_rate": 5.990407411437708e-06, "loss": 0.0178, "step": 116740 }, { "epoch": 0.9858352156382597, "grad_norm": 0.021718833595514297, "learning_rate": 5.989685119726532e-06, "loss": 0.0048, "step": 116750 }, { "epoch": 0.9859196554854236, "grad_norm": 0.4066835045814514, "learning_rate": 5.9889628065195985e-06, "loss": 0.0116, "step": 116760 }, { "epoch": 0.9860040953325875, "grad_norm": 0.4688664376735687, "learning_rate": 5.988240471832596e-06, "loss": 0.0092, "step": 116770 }, { "epoch": 0.9860885351797514, "grad_norm": 0.5102046132087708, "learning_rate": 5.987518115681212e-06, "loss": 0.0103, "step": 116780 }, { "epoch": 0.9861729750269153, "grad_norm": 0.18571671843528748, "learning_rate": 5.986795738081138e-06, "loss": 0.0168, "step": 116790 }, { "epoch": 0.986257414874079, "grad_norm": 0.3891282379627228, "learning_rate": 5.986073339048062e-06, "loss": 0.0051, "step": 116800 }, { "epoch": 0.9863418547212429, "grad_norm": 0.32797542214393616, "learning_rate": 5.985350918597677e-06, "loss": 0.0132, "step": 116810 }, { "epoch": 0.9864262945684068, "grad_norm": 0.2245892435312271, "learning_rate": 5.984628476745668e-06, "loss": 0.0069, "step": 116820 }, { "epoch": 0.9865107344155707, "grad_norm": 0.5269702076911926, "learning_rate": 5.983906013507733e-06, "loss": 0.0135, "step": 116830 }, { "epoch": 0.9865951742627346, "grad_norm": 0.46621832251548767, "learning_rate": 5.983183528899561e-06, "loss": 0.0114, "step": 116840 }, { "epoch": 0.9866796141098985, "grad_norm": 0.22662363946437836, "learning_rate": 5.982461022936845e-06, "loss": 0.0175, "step": 116850 }, { "epoch": 0.9867640539570623, "grad_norm": 0.012513152323663235, "learning_rate": 5.981738495635277e-06, "loss": 0.0074, "step": 116860 }, { "epoch": 0.9868484938042262, "grad_norm": 0.143129363656044, "learning_rate": 5.981015947010548e-06, "loss": 0.0158, "step": 116870 }, { "epoch": 0.9869329336513901, "grad_norm": 0.0783718079328537, "learning_rate": 5.980293377078354e-06, "loss": 0.0109, "step": 116880 }, { "epoch": 0.987017373498554, "grad_norm": 0.07304660230875015, "learning_rate": 5.979570785854391e-06, "loss": 0.0092, "step": 116890 }, { "epoch": 0.9871018133457179, "grad_norm": 0.6456913352012634, "learning_rate": 5.978848173354348e-06, "loss": 0.0267, "step": 116900 }, { "epoch": 0.9871862531928817, "grad_norm": 0.2440308779478073, "learning_rate": 5.978125539593925e-06, "loss": 0.0099, "step": 116910 }, { "epoch": 0.9872706930400456, "grad_norm": 0.12842974066734314, "learning_rate": 5.977402884588815e-06, "loss": 0.0068, "step": 116920 }, { "epoch": 0.9873551328872094, "grad_norm": 0.32095953822135925, "learning_rate": 5.976680208354716e-06, "loss": 0.0146, "step": 116930 }, { "epoch": 0.9874395727343733, "grad_norm": 0.2129664272069931, "learning_rate": 5.975957510907322e-06, "loss": 0.0117, "step": 116940 }, { "epoch": 0.9875240125815372, "grad_norm": 0.13103438913822174, "learning_rate": 5.975234792262331e-06, "loss": 0.0134, "step": 116950 }, { "epoch": 0.9876084524287011, "grad_norm": 0.20808550715446472, "learning_rate": 5.9745120524354395e-06, "loss": 0.0094, "step": 116960 }, { "epoch": 0.987692892275865, "grad_norm": 0.1472507119178772, "learning_rate": 5.973789291442348e-06, "loss": 0.0127, "step": 116970 }, { "epoch": 0.9877773321230289, "grad_norm": 0.13375714421272278, "learning_rate": 5.973066509298752e-06, "loss": 0.0089, "step": 116980 }, { "epoch": 0.9878617719701928, "grad_norm": 0.26034417748451233, "learning_rate": 5.9723437060203495e-06, "loss": 0.015, "step": 116990 }, { "epoch": 0.9879462118173566, "grad_norm": 0.30470603704452515, "learning_rate": 5.971620881622841e-06, "loss": 0.0075, "step": 117000 }, { "epoch": 0.9880306516645205, "grad_norm": 0.6016457080841064, "learning_rate": 5.970898036121927e-06, "loss": 0.0117, "step": 117010 }, { "epoch": 0.9881150915116844, "grad_norm": 0.2214793711900711, "learning_rate": 5.970175169533306e-06, "loss": 0.0116, "step": 117020 }, { "epoch": 0.9881995313588482, "grad_norm": 0.33858993649482727, "learning_rate": 5.969452281872679e-06, "loss": 0.0126, "step": 117030 }, { "epoch": 0.9882839712060121, "grad_norm": 0.1291629523038864, "learning_rate": 5.968729373155747e-06, "loss": 0.0118, "step": 117040 }, { "epoch": 0.988368411053176, "grad_norm": 0.22164513170719147, "learning_rate": 5.968006443398213e-06, "loss": 0.0098, "step": 117050 }, { "epoch": 0.9884528509003399, "grad_norm": 0.44158002734184265, "learning_rate": 5.967283492615776e-06, "loss": 0.0149, "step": 117060 }, { "epoch": 0.9885372907475037, "grad_norm": 0.4945128560066223, "learning_rate": 5.96656052082414e-06, "loss": 0.011, "step": 117070 }, { "epoch": 0.9886217305946676, "grad_norm": 0.4356830418109894, "learning_rate": 5.965837528039007e-06, "loss": 0.0069, "step": 117080 }, { "epoch": 0.9887061704418315, "grad_norm": 0.0257750004529953, "learning_rate": 5.9651145142760825e-06, "loss": 0.0056, "step": 117090 }, { "epoch": 0.9887906102889954, "grad_norm": 0.360866904258728, "learning_rate": 5.964391479551068e-06, "loss": 0.0184, "step": 117100 }, { "epoch": 0.9888750501361593, "grad_norm": 0.1356402337551117, "learning_rate": 5.963668423879667e-06, "loss": 0.0097, "step": 117110 }, { "epoch": 0.9889594899833232, "grad_norm": 0.47661498188972473, "learning_rate": 5.962945347277587e-06, "loss": 0.0105, "step": 117120 }, { "epoch": 0.9890439298304871, "grad_norm": 0.2400568425655365, "learning_rate": 5.96222224976053e-06, "loss": 0.0095, "step": 117130 }, { "epoch": 0.9891283696776508, "grad_norm": 0.21890926361083984, "learning_rate": 5.961499131344204e-06, "loss": 0.0097, "step": 117140 }, { "epoch": 0.9892128095248147, "grad_norm": 0.32107600569725037, "learning_rate": 5.9607759920443124e-06, "loss": 0.0092, "step": 117150 }, { "epoch": 0.9892972493719786, "grad_norm": 0.2222064882516861, "learning_rate": 5.9600528318765635e-06, "loss": 0.0084, "step": 117160 }, { "epoch": 0.9893816892191425, "grad_norm": 2.200185775756836, "learning_rate": 5.959329650856664e-06, "loss": 0.0112, "step": 117170 }, { "epoch": 0.9894661290663064, "grad_norm": 0.4941905438899994, "learning_rate": 5.958606449000322e-06, "loss": 0.0069, "step": 117180 }, { "epoch": 0.9895505689134703, "grad_norm": 0.21232202649116516, "learning_rate": 5.957883226323243e-06, "loss": 0.0092, "step": 117190 }, { "epoch": 0.9896350087606341, "grad_norm": 0.4407435953617096, "learning_rate": 5.957159982841138e-06, "loss": 0.0201, "step": 117200 }, { "epoch": 0.989719448607798, "grad_norm": 0.192457914352417, "learning_rate": 5.956436718569713e-06, "loss": 0.0078, "step": 117210 }, { "epoch": 0.9898038884549619, "grad_norm": 0.09328515082597733, "learning_rate": 5.9557134335246805e-06, "loss": 0.0035, "step": 117220 }, { "epoch": 0.9898883283021258, "grad_norm": 0.5025803446769714, "learning_rate": 5.954990127721746e-06, "loss": 0.0237, "step": 117230 }, { "epoch": 0.9899727681492897, "grad_norm": 0.4389079213142395, "learning_rate": 5.9542668011766225e-06, "loss": 0.0084, "step": 117240 }, { "epoch": 0.9900572079964536, "grad_norm": 0.086487777531147, "learning_rate": 5.953543453905019e-06, "loss": 0.0063, "step": 117250 }, { "epoch": 0.9901416478436174, "grad_norm": 0.11275852471590042, "learning_rate": 5.952820085922648e-06, "loss": 0.0036, "step": 117260 }, { "epoch": 0.9902260876907812, "grad_norm": 0.02558717131614685, "learning_rate": 5.952096697245219e-06, "loss": 0.0115, "step": 117270 }, { "epoch": 0.9903105275379451, "grad_norm": 0.26892149448394775, "learning_rate": 5.951373287888446e-06, "loss": 0.0109, "step": 117280 }, { "epoch": 0.990394967385109, "grad_norm": 0.3590477705001831, "learning_rate": 5.950649857868039e-06, "loss": 0.0057, "step": 117290 }, { "epoch": 0.9904794072322729, "grad_norm": 0.4062216877937317, "learning_rate": 5.949926407199712e-06, "loss": 0.0085, "step": 117300 }, { "epoch": 0.9905638470794368, "grad_norm": 0.3589833378791809, "learning_rate": 5.949202935899179e-06, "loss": 0.0085, "step": 117310 }, { "epoch": 0.9906482869266007, "grad_norm": 0.12454430013895035, "learning_rate": 5.948479443982152e-06, "loss": 0.0086, "step": 117320 }, { "epoch": 0.9907327267737646, "grad_norm": 0.6135586500167847, "learning_rate": 5.947755931464346e-06, "loss": 0.0076, "step": 117330 }, { "epoch": 0.9908171666209284, "grad_norm": 0.015616240911185741, "learning_rate": 5.947032398361476e-06, "loss": 0.015, "step": 117340 }, { "epoch": 0.9909016064680923, "grad_norm": 0.20053696632385254, "learning_rate": 5.946308844689256e-06, "loss": 0.0076, "step": 117350 }, { "epoch": 0.9909860463152562, "grad_norm": 0.19461587071418762, "learning_rate": 5.945585270463401e-06, "loss": 0.0115, "step": 117360 }, { "epoch": 0.99107048616242, "grad_norm": 0.2662470042705536, "learning_rate": 5.944861675699628e-06, "loss": 0.0057, "step": 117370 }, { "epoch": 0.9911549260095839, "grad_norm": 0.5169786214828491, "learning_rate": 5.944138060413654e-06, "loss": 0.009, "step": 117380 }, { "epoch": 0.9912393658567478, "grad_norm": 0.06412059813737869, "learning_rate": 5.943414424621193e-06, "loss": 0.006, "step": 117390 }, { "epoch": 0.9913238057039117, "grad_norm": 0.35471999645233154, "learning_rate": 5.9426907683379656e-06, "loss": 0.0117, "step": 117400 }, { "epoch": 0.9914082455510755, "grad_norm": 0.19646358489990234, "learning_rate": 5.941967091579687e-06, "loss": 0.0137, "step": 117410 }, { "epoch": 0.9914926853982394, "grad_norm": 0.5516740679740906, "learning_rate": 5.941243394362077e-06, "loss": 0.0194, "step": 117420 }, { "epoch": 0.9915771252454033, "grad_norm": 0.28912606835365295, "learning_rate": 5.940519676700854e-06, "loss": 0.0082, "step": 117430 }, { "epoch": 0.9916615650925672, "grad_norm": 0.8265253305435181, "learning_rate": 5.9397959386117346e-06, "loss": 0.0147, "step": 117440 }, { "epoch": 0.9917460049397311, "grad_norm": 0.560823917388916, "learning_rate": 5.939072180110441e-06, "loss": 0.0083, "step": 117450 }, { "epoch": 0.991830444786895, "grad_norm": 0.2345036268234253, "learning_rate": 5.938348401212693e-06, "loss": 0.0076, "step": 117460 }, { "epoch": 0.9919148846340589, "grad_norm": 0.20948264002799988, "learning_rate": 5.937624601934207e-06, "loss": 0.0131, "step": 117470 }, { "epoch": 0.9919993244812227, "grad_norm": 0.23250606656074524, "learning_rate": 5.936900782290709e-06, "loss": 0.011, "step": 117480 }, { "epoch": 0.9920837643283865, "grad_norm": 0.2971169948577881, "learning_rate": 5.936176942297918e-06, "loss": 0.0105, "step": 117490 }, { "epoch": 0.9921682041755504, "grad_norm": 0.14119653403759003, "learning_rate": 5.935453081971555e-06, "loss": 0.0111, "step": 117500 }, { "epoch": 0.9922526440227143, "grad_norm": 0.23435986042022705, "learning_rate": 5.934729201327343e-06, "loss": 0.0129, "step": 117510 }, { "epoch": 0.9923370838698782, "grad_norm": 0.1945832520723343, "learning_rate": 5.9340053003810035e-06, "loss": 0.0286, "step": 117520 }, { "epoch": 0.9924215237170421, "grad_norm": 0.2432815134525299, "learning_rate": 5.933281379148261e-06, "loss": 0.0088, "step": 117530 }, { "epoch": 0.992505963564206, "grad_norm": 0.1784549057483673, "learning_rate": 5.932557437644839e-06, "loss": 0.0107, "step": 117540 }, { "epoch": 0.9925904034113698, "grad_norm": 0.3217950165271759, "learning_rate": 5.931833475886459e-06, "loss": 0.0055, "step": 117550 }, { "epoch": 0.9926748432585337, "grad_norm": 0.2177957445383072, "learning_rate": 5.931109493888848e-06, "loss": 0.0079, "step": 117560 }, { "epoch": 0.9927592831056976, "grad_norm": 0.3771445155143738, "learning_rate": 5.93038549166773e-06, "loss": 0.0108, "step": 117570 }, { "epoch": 0.9928437229528615, "grad_norm": 0.27782586216926575, "learning_rate": 5.929661469238829e-06, "loss": 0.0079, "step": 117580 }, { "epoch": 0.9929281628000254, "grad_norm": 0.23073838651180267, "learning_rate": 5.9289374266178725e-06, "loss": 0.009, "step": 117590 }, { "epoch": 0.9930126026471892, "grad_norm": 0.8773447871208191, "learning_rate": 5.928213363820583e-06, "loss": 0.0086, "step": 117600 }, { "epoch": 0.993097042494353, "grad_norm": 0.16337162256240845, "learning_rate": 5.927489280862691e-06, "loss": 0.0114, "step": 117610 }, { "epoch": 0.9931814823415169, "grad_norm": 0.19199350476264954, "learning_rate": 5.926765177759922e-06, "loss": 0.0118, "step": 117620 }, { "epoch": 0.9932659221886808, "grad_norm": 0.3993901312351227, "learning_rate": 5.926041054528004e-06, "loss": 0.011, "step": 117630 }, { "epoch": 0.9933503620358447, "grad_norm": 0.2239387482404709, "learning_rate": 5.925316911182662e-06, "loss": 0.0181, "step": 117640 }, { "epoch": 0.9934348018830086, "grad_norm": 0.049661826342344284, "learning_rate": 5.924592747739627e-06, "loss": 0.0056, "step": 117650 }, { "epoch": 0.9935192417301725, "grad_norm": 0.10524322837591171, "learning_rate": 5.923868564214628e-06, "loss": 0.0111, "step": 117660 }, { "epoch": 0.9936036815773364, "grad_norm": 0.22388535737991333, "learning_rate": 5.923144360623392e-06, "loss": 0.0131, "step": 117670 }, { "epoch": 0.9936881214245002, "grad_norm": 0.3606085479259491, "learning_rate": 5.922420136981651e-06, "loss": 0.0075, "step": 117680 }, { "epoch": 0.9937725612716641, "grad_norm": 0.29371851682662964, "learning_rate": 5.9216958933051316e-06, "loss": 0.0083, "step": 117690 }, { "epoch": 0.993857001118828, "grad_norm": 0.0468420684337616, "learning_rate": 5.920971629609567e-06, "loss": 0.012, "step": 117700 }, { "epoch": 0.9939414409659919, "grad_norm": 0.23173661530017853, "learning_rate": 5.920247345910689e-06, "loss": 0.0136, "step": 117710 }, { "epoch": 0.9940258808131557, "grad_norm": 0.2566111087799072, "learning_rate": 5.919523042224226e-06, "loss": 0.0117, "step": 117720 }, { "epoch": 0.9941103206603196, "grad_norm": 0.22876347601413727, "learning_rate": 5.91879871856591e-06, "loss": 0.0078, "step": 117730 }, { "epoch": 0.9941947605074835, "grad_norm": 0.2704116106033325, "learning_rate": 5.918074374951474e-06, "loss": 0.0156, "step": 117740 }, { "epoch": 0.9942792003546473, "grad_norm": 0.3192022442817688, "learning_rate": 5.917350011396653e-06, "loss": 0.0073, "step": 117750 }, { "epoch": 0.9943636402018112, "grad_norm": 0.1757858395576477, "learning_rate": 5.916625627917177e-06, "loss": 0.0063, "step": 117760 }, { "epoch": 0.9944480800489751, "grad_norm": 0.6017798781394958, "learning_rate": 5.915901224528779e-06, "loss": 0.0137, "step": 117770 }, { "epoch": 0.994532519896139, "grad_norm": 0.2534254193305969, "learning_rate": 5.915176801247195e-06, "loss": 0.0054, "step": 117780 }, { "epoch": 0.9946169597433029, "grad_norm": 0.6736917495727539, "learning_rate": 5.9144523580881575e-06, "loss": 0.0141, "step": 117790 }, { "epoch": 0.9947013995904668, "grad_norm": 0.24963264167308807, "learning_rate": 5.913727895067403e-06, "loss": 0.0156, "step": 117800 }, { "epoch": 0.9947858394376307, "grad_norm": 0.20896926522254944, "learning_rate": 5.913003412200666e-06, "loss": 0.0104, "step": 117810 }, { "epoch": 0.9948702792847945, "grad_norm": 0.2980729937553406, "learning_rate": 5.912278909503681e-06, "loss": 0.0099, "step": 117820 }, { "epoch": 0.9949547191319583, "grad_norm": 0.35658663511276245, "learning_rate": 5.911554386992186e-06, "loss": 0.0078, "step": 117830 }, { "epoch": 0.9950391589791222, "grad_norm": 0.5440360307693481, "learning_rate": 5.910829844681917e-06, "loss": 0.0138, "step": 117840 }, { "epoch": 0.9951235988262861, "grad_norm": 0.2471284121274948, "learning_rate": 5.91010528258861e-06, "loss": 0.0091, "step": 117850 }, { "epoch": 0.99520803867345, "grad_norm": 0.07687899470329285, "learning_rate": 5.9093807007280015e-06, "loss": 0.0092, "step": 117860 }, { "epoch": 0.9952924785206139, "grad_norm": 0.32891544699668884, "learning_rate": 5.908656099115832e-06, "loss": 0.0092, "step": 117870 }, { "epoch": 0.9953769183677778, "grad_norm": 0.5226671695709229, "learning_rate": 5.907931477767837e-06, "loss": 0.0094, "step": 117880 }, { "epoch": 0.9954613582149416, "grad_norm": 0.5420491695404053, "learning_rate": 5.907206836699757e-06, "loss": 0.0157, "step": 117890 }, { "epoch": 0.9955457980621055, "grad_norm": 0.3004451096057892, "learning_rate": 5.9064821759273295e-06, "loss": 0.014, "step": 117900 }, { "epoch": 0.9956302379092694, "grad_norm": 0.6889890432357788, "learning_rate": 5.905757495466297e-06, "loss": 0.0119, "step": 117910 }, { "epoch": 0.9957146777564333, "grad_norm": 0.17002257704734802, "learning_rate": 5.905032795332397e-06, "loss": 0.0073, "step": 117920 }, { "epoch": 0.9957991176035972, "grad_norm": 0.24932527542114258, "learning_rate": 5.9043080755413685e-06, "loss": 0.0056, "step": 117930 }, { "epoch": 0.995883557450761, "grad_norm": 0.22379152476787567, "learning_rate": 5.903583336108955e-06, "loss": 0.0126, "step": 117940 }, { "epoch": 0.9959679972979248, "grad_norm": 0.30144885182380676, "learning_rate": 5.902858577050896e-06, "loss": 0.0085, "step": 117950 }, { "epoch": 0.9960524371450887, "grad_norm": 0.11899774521589279, "learning_rate": 5.902133798382934e-06, "loss": 0.0047, "step": 117960 }, { "epoch": 0.9961368769922526, "grad_norm": 0.1977681964635849, "learning_rate": 5.90140900012081e-06, "loss": 0.0086, "step": 117970 }, { "epoch": 0.9962213168394165, "grad_norm": 0.45360681414604187, "learning_rate": 5.9006841822802685e-06, "loss": 0.0089, "step": 117980 }, { "epoch": 0.9963057566865804, "grad_norm": 0.5285472273826599, "learning_rate": 5.899959344877051e-06, "loss": 0.0123, "step": 117990 }, { "epoch": 0.9963901965337443, "grad_norm": 0.192648246884346, "learning_rate": 5.8992344879269e-06, "loss": 0.0055, "step": 118000 }, { "epoch": 0.9964746363809082, "grad_norm": 0.19108636677265167, "learning_rate": 5.898509611445561e-06, "loss": 0.0048, "step": 118010 }, { "epoch": 0.996559076228072, "grad_norm": 0.2909855544567108, "learning_rate": 5.897784715448777e-06, "loss": 0.0111, "step": 118020 }, { "epoch": 0.9966435160752359, "grad_norm": 0.28909358382225037, "learning_rate": 5.897059799952293e-06, "loss": 0.0042, "step": 118030 }, { "epoch": 0.9967279559223998, "grad_norm": 0.3176700174808502, "learning_rate": 5.896334864971855e-06, "loss": 0.0226, "step": 118040 }, { "epoch": 0.9968123957695637, "grad_norm": 0.18199734389781952, "learning_rate": 5.895609910523206e-06, "loss": 0.0144, "step": 118050 }, { "epoch": 0.9968968356167275, "grad_norm": 0.20645876228809357, "learning_rate": 5.894884936622094e-06, "loss": 0.0089, "step": 118060 }, { "epoch": 0.9969812754638914, "grad_norm": 0.2677629888057709, "learning_rate": 5.8941599432842646e-06, "loss": 0.0088, "step": 118070 }, { "epoch": 0.9970657153110553, "grad_norm": 0.21228280663490295, "learning_rate": 5.893434930525463e-06, "loss": 0.0093, "step": 118080 }, { "epoch": 0.9971501551582191, "grad_norm": 0.3278904855251312, "learning_rate": 5.89270989836144e-06, "loss": 0.0108, "step": 118090 }, { "epoch": 0.997234595005383, "grad_norm": 0.31380900740623474, "learning_rate": 5.891984846807939e-06, "loss": 0.0107, "step": 118100 }, { "epoch": 0.9973190348525469, "grad_norm": 0.46152347326278687, "learning_rate": 5.891259775880711e-06, "loss": 0.0102, "step": 118110 }, { "epoch": 0.9974034746997108, "grad_norm": 0.29618266224861145, "learning_rate": 5.890534685595502e-06, "loss": 0.0106, "step": 118120 }, { "epoch": 0.9974879145468747, "grad_norm": 0.24111241102218628, "learning_rate": 5.889809575968064e-06, "loss": 0.0116, "step": 118130 }, { "epoch": 0.9975723543940386, "grad_norm": 0.24793656170368195, "learning_rate": 5.889084447014143e-06, "loss": 0.0109, "step": 118140 }, { "epoch": 0.9976567942412025, "grad_norm": 0.32987403869628906, "learning_rate": 5.888359298749491e-06, "loss": 0.0065, "step": 118150 }, { "epoch": 0.9977412340883663, "grad_norm": 0.3240155577659607, "learning_rate": 5.887634131189858e-06, "loss": 0.0102, "step": 118160 }, { "epoch": 0.9978256739355301, "grad_norm": 0.07127577811479568, "learning_rate": 5.8869089443509916e-06, "loss": 0.0105, "step": 118170 }, { "epoch": 0.997910113782694, "grad_norm": 0.5298315286636353, "learning_rate": 5.886183738248645e-06, "loss": 0.0101, "step": 118180 }, { "epoch": 0.9979945536298579, "grad_norm": 0.7417435050010681, "learning_rate": 5.885458512898568e-06, "loss": 0.0131, "step": 118190 }, { "epoch": 0.9980789934770218, "grad_norm": 0.1978132724761963, "learning_rate": 5.884733268316517e-06, "loss": 0.0094, "step": 118200 }, { "epoch": 0.9981634333241857, "grad_norm": 0.3259364664554596, "learning_rate": 5.884008004518238e-06, "loss": 0.0123, "step": 118210 }, { "epoch": 0.9982478731713496, "grad_norm": 0.16973012685775757, "learning_rate": 5.8832827215194865e-06, "loss": 0.0098, "step": 118220 }, { "epoch": 0.9983323130185134, "grad_norm": 0.17778843641281128, "learning_rate": 5.882557419336017e-06, "loss": 0.017, "step": 118230 }, { "epoch": 0.9984167528656773, "grad_norm": 0.35246652364730835, "learning_rate": 5.88183209798358e-06, "loss": 0.0119, "step": 118240 }, { "epoch": 0.9985011927128412, "grad_norm": 0.14974386990070343, "learning_rate": 5.88110675747793e-06, "loss": 0.0068, "step": 118250 }, { "epoch": 0.9985856325600051, "grad_norm": 0.25259971618652344, "learning_rate": 5.880381397834821e-06, "loss": 0.0075, "step": 118260 }, { "epoch": 0.998670072407169, "grad_norm": 0.276302695274353, "learning_rate": 5.879656019070011e-06, "loss": 0.0087, "step": 118270 }, { "epoch": 0.9987545122543329, "grad_norm": 0.2042996734380722, "learning_rate": 5.878930621199252e-06, "loss": 0.0103, "step": 118280 }, { "epoch": 0.9988389521014966, "grad_norm": 0.6138465404510498, "learning_rate": 5.878205204238299e-06, "loss": 0.01, "step": 118290 }, { "epoch": 0.9989233919486605, "grad_norm": 0.385576069355011, "learning_rate": 5.87747976820291e-06, "loss": 0.0113, "step": 118300 }, { "epoch": 0.9990078317958244, "grad_norm": 0.1947682648897171, "learning_rate": 5.876754313108838e-06, "loss": 0.0072, "step": 118310 }, { "epoch": 0.9990922716429883, "grad_norm": 0.4816875159740448, "learning_rate": 5.876028838971845e-06, "loss": 0.0089, "step": 118320 }, { "epoch": 0.9991767114901522, "grad_norm": 0.5348758101463318, "learning_rate": 5.875303345807682e-06, "loss": 0.0047, "step": 118330 }, { "epoch": 0.9992611513373161, "grad_norm": 0.2637590169906616, "learning_rate": 5.874577833632111e-06, "loss": 0.0112, "step": 118340 }, { "epoch": 0.99934559118448, "grad_norm": 0.3983834683895111, "learning_rate": 5.87385230246089e-06, "loss": 0.0079, "step": 118350 }, { "epoch": 0.9994300310316439, "grad_norm": 0.15964122116565704, "learning_rate": 5.8731267523097746e-06, "loss": 0.0073, "step": 118360 }, { "epoch": 0.9995144708788077, "grad_norm": 0.4387376308441162, "learning_rate": 5.872401183194526e-06, "loss": 0.0091, "step": 118370 }, { "epoch": 0.9995989107259716, "grad_norm": 0.3926185369491577, "learning_rate": 5.871675595130901e-06, "loss": 0.0085, "step": 118380 }, { "epoch": 0.9996833505731355, "grad_norm": 0.32913726568222046, "learning_rate": 5.870949988134662e-06, "loss": 0.0069, "step": 118390 }, { "epoch": 0.9997677904202993, "grad_norm": 0.5128103494644165, "learning_rate": 5.870224362221568e-06, "loss": 0.0121, "step": 118400 }, { "epoch": 0.9998522302674632, "grad_norm": 0.2950648069381714, "learning_rate": 5.8694987174073794e-06, "loss": 0.0103, "step": 118410 }, { "epoch": 0.9999366701146271, "grad_norm": 0.35086584091186523, "learning_rate": 5.8687730537078544e-06, "loss": 0.0074, "step": 118420 }, { "epoch": 1.000021109961791, "grad_norm": 0.32077693939208984, "learning_rate": 5.868047371138759e-06, "loss": 0.0063, "step": 118430 }, { "epoch": 1.0001055498089548, "grad_norm": 0.34344080090522766, "learning_rate": 5.867321669715853e-06, "loss": 0.0096, "step": 118440 }, { "epoch": 1.0001899896561188, "grad_norm": 0.42326754331588745, "learning_rate": 5.866595949454896e-06, "loss": 0.0072, "step": 118450 }, { "epoch": 1.0002744295032826, "grad_norm": 0.5569007396697998, "learning_rate": 5.865870210371654e-06, "loss": 0.0133, "step": 118460 }, { "epoch": 1.0003588693504464, "grad_norm": 0.5303378701210022, "learning_rate": 5.865144452481887e-06, "loss": 0.0103, "step": 118470 }, { "epoch": 1.0004433091976104, "grad_norm": 0.05257807672023773, "learning_rate": 5.864418675801361e-06, "loss": 0.01, "step": 118480 }, { "epoch": 1.0005277490447742, "grad_norm": 0.24058322608470917, "learning_rate": 5.863692880345838e-06, "loss": 0.0055, "step": 118490 }, { "epoch": 1.0006121888919381, "grad_norm": 1.1479607820510864, "learning_rate": 5.862967066131082e-06, "loss": 0.0173, "step": 118500 }, { "epoch": 1.000696628739102, "grad_norm": 0.6366682052612305, "learning_rate": 5.86224123317286e-06, "loss": 0.0124, "step": 118510 }, { "epoch": 1.000781068586266, "grad_norm": 0.09113142639398575, "learning_rate": 5.861515381486935e-06, "loss": 0.0112, "step": 118520 }, { "epoch": 1.0008655084334297, "grad_norm": 0.039029810577631, "learning_rate": 5.860789511089072e-06, "loss": 0.0052, "step": 118530 }, { "epoch": 1.0009499482805937, "grad_norm": 0.19518797099590302, "learning_rate": 5.860063621995036e-06, "loss": 0.0067, "step": 118540 }, { "epoch": 1.0010343881277575, "grad_norm": 0.20207522809505463, "learning_rate": 5.8593377142205955e-06, "loss": 0.0083, "step": 118550 }, { "epoch": 1.0011188279749215, "grad_norm": 0.22399991750717163, "learning_rate": 5.8586117877815164e-06, "loss": 0.0093, "step": 118560 }, { "epoch": 1.0012032678220852, "grad_norm": 0.3512367904186249, "learning_rate": 5.857885842693564e-06, "loss": 0.0155, "step": 118570 }, { "epoch": 1.001287707669249, "grad_norm": 0.11867985874414444, "learning_rate": 5.857159878972507e-06, "loss": 0.0076, "step": 118580 }, { "epoch": 1.001372147516413, "grad_norm": 0.34704962372779846, "learning_rate": 5.856433896634113e-06, "loss": 0.006, "step": 118590 }, { "epoch": 1.0014565873635768, "grad_norm": 0.6818333268165588, "learning_rate": 5.855707895694151e-06, "loss": 0.0101, "step": 118600 }, { "epoch": 1.0015410272107408, "grad_norm": 0.7452634572982788, "learning_rate": 5.854981876168388e-06, "loss": 0.0102, "step": 118610 }, { "epoch": 1.0016254670579046, "grad_norm": 0.44572922587394714, "learning_rate": 5.854255838072594e-06, "loss": 0.0055, "step": 118620 }, { "epoch": 1.0017099069050686, "grad_norm": 0.36893826723098755, "learning_rate": 5.85352978142254e-06, "loss": 0.0046, "step": 118630 }, { "epoch": 1.0017943467522323, "grad_norm": 0.01927967742085457, "learning_rate": 5.852803706233992e-06, "loss": 0.0073, "step": 118640 }, { "epoch": 1.0018787865993963, "grad_norm": 0.18950912356376648, "learning_rate": 5.852077612522724e-06, "loss": 0.0092, "step": 118650 }, { "epoch": 1.00196322644656, "grad_norm": 0.27800676226615906, "learning_rate": 5.851351500304503e-06, "loss": 0.0082, "step": 118660 }, { "epoch": 1.002047666293724, "grad_norm": 0.1251555234193802, "learning_rate": 5.850625369595102e-06, "loss": 0.0045, "step": 118670 }, { "epoch": 1.0021321061408879, "grad_norm": 0.033722344785928726, "learning_rate": 5.849899220410294e-06, "loss": 0.0061, "step": 118680 }, { "epoch": 1.0022165459880517, "grad_norm": 0.3040551543235779, "learning_rate": 5.849173052765849e-06, "loss": 0.0043, "step": 118690 }, { "epoch": 1.0023009858352157, "grad_norm": 0.36514410376548767, "learning_rate": 5.848446866677538e-06, "loss": 0.0058, "step": 118700 }, { "epoch": 1.0023854256823794, "grad_norm": 0.2504737675189972, "learning_rate": 5.847720662161134e-06, "loss": 0.0077, "step": 118710 }, { "epoch": 1.0024698655295434, "grad_norm": 0.2589143216609955, "learning_rate": 5.846994439232413e-06, "loss": 0.0061, "step": 118720 }, { "epoch": 1.0025543053767072, "grad_norm": 0.1481446921825409, "learning_rate": 5.846268197907146e-06, "loss": 0.0106, "step": 118730 }, { "epoch": 1.0026387452238712, "grad_norm": 0.26356759667396545, "learning_rate": 5.845541938201106e-06, "loss": 0.011, "step": 118740 }, { "epoch": 1.002723185071035, "grad_norm": 0.17748667299747467, "learning_rate": 5.8448156601300695e-06, "loss": 0.0103, "step": 118750 }, { "epoch": 1.002807624918199, "grad_norm": 0.04287335276603699, "learning_rate": 5.8440893637098095e-06, "loss": 0.0111, "step": 118760 }, { "epoch": 1.0028920647653627, "grad_norm": 0.26045259833335876, "learning_rate": 5.8433630489561024e-06, "loss": 0.0042, "step": 118770 }, { "epoch": 1.0029765046125267, "grad_norm": 0.8947144746780396, "learning_rate": 5.842636715884721e-06, "loss": 0.0119, "step": 118780 }, { "epoch": 1.0030609444596905, "grad_norm": 0.36373865604400635, "learning_rate": 5.841910364511444e-06, "loss": 0.0062, "step": 118790 }, { "epoch": 1.0031453843068545, "grad_norm": 0.04546966031193733, "learning_rate": 5.841183994852045e-06, "loss": 0.0053, "step": 118800 }, { "epoch": 1.0032298241540183, "grad_norm": 0.23906457424163818, "learning_rate": 5.840457606922304e-06, "loss": 0.0102, "step": 118810 }, { "epoch": 1.003314264001182, "grad_norm": 0.24078278243541718, "learning_rate": 5.839731200737995e-06, "loss": 0.0051, "step": 118820 }, { "epoch": 1.003398703848346, "grad_norm": 0.4489987790584564, "learning_rate": 5.839004776314896e-06, "loss": 0.0068, "step": 118830 }, { "epoch": 1.0034831436955098, "grad_norm": 0.16843004524707794, "learning_rate": 5.8382783336687864e-06, "loss": 0.0105, "step": 118840 }, { "epoch": 1.0035675835426738, "grad_norm": 0.2441468983888626, "learning_rate": 5.8375518728154426e-06, "loss": 0.0091, "step": 118850 }, { "epoch": 1.0036520233898376, "grad_norm": 0.1572643369436264, "learning_rate": 5.836825393770642e-06, "loss": 0.0177, "step": 118860 }, { "epoch": 1.0037364632370016, "grad_norm": 0.21359488368034363, "learning_rate": 5.836098896550168e-06, "loss": 0.0096, "step": 118870 }, { "epoch": 1.0038209030841654, "grad_norm": 0.10816457867622375, "learning_rate": 5.835372381169796e-06, "loss": 0.0062, "step": 118880 }, { "epoch": 1.0039053429313294, "grad_norm": 0.11450067907571793, "learning_rate": 5.834645847645309e-06, "loss": 0.0061, "step": 118890 }, { "epoch": 1.0039897827784932, "grad_norm": 0.22185064852237701, "learning_rate": 5.8339192959924824e-06, "loss": 0.0094, "step": 118900 }, { "epoch": 1.0040742226256572, "grad_norm": 0.22334736585617065, "learning_rate": 5.8331927262271e-06, "loss": 0.0058, "step": 118910 }, { "epoch": 1.004158662472821, "grad_norm": 0.21506907045841217, "learning_rate": 5.832466138364943e-06, "loss": 0.0054, "step": 118920 }, { "epoch": 1.0042431023199847, "grad_norm": 0.19405217468738556, "learning_rate": 5.831739532421793e-06, "loss": 0.0082, "step": 118930 }, { "epoch": 1.0043275421671487, "grad_norm": 0.11854050308465958, "learning_rate": 5.8310129084134296e-06, "loss": 0.0051, "step": 118940 }, { "epoch": 1.0044119820143125, "grad_norm": 0.4894166886806488, "learning_rate": 5.830286266355636e-06, "loss": 0.0165, "step": 118950 }, { "epoch": 1.0044964218614765, "grad_norm": 0.4229698181152344, "learning_rate": 5.829559606264195e-06, "loss": 0.0046, "step": 118960 }, { "epoch": 1.0045808617086402, "grad_norm": 0.6852750778198242, "learning_rate": 5.82883292815489e-06, "loss": 0.0083, "step": 118970 }, { "epoch": 1.0046653015558042, "grad_norm": 0.19451801478862762, "learning_rate": 5.8281062320435035e-06, "loss": 0.0069, "step": 118980 }, { "epoch": 1.004749741402968, "grad_norm": 0.14586810767650604, "learning_rate": 5.8273795179458195e-06, "loss": 0.0105, "step": 118990 }, { "epoch": 1.004834181250132, "grad_norm": 0.1323934942483902, "learning_rate": 5.826652785877621e-06, "loss": 0.0086, "step": 119000 }, { "epoch": 1.0049186210972958, "grad_norm": 0.17778146266937256, "learning_rate": 5.8259260358546945e-06, "loss": 0.006, "step": 119010 }, { "epoch": 1.0050030609444598, "grad_norm": 0.272192120552063, "learning_rate": 5.825199267892822e-06, "loss": 0.0065, "step": 119020 }, { "epoch": 1.0050875007916236, "grad_norm": 0.4362308084964752, "learning_rate": 5.8244724820077925e-06, "loss": 0.0074, "step": 119030 }, { "epoch": 1.0051719406387873, "grad_norm": 0.3650059998035431, "learning_rate": 5.8237456782153875e-06, "loss": 0.0068, "step": 119040 }, { "epoch": 1.0052563804859513, "grad_norm": 0.6743541955947876, "learning_rate": 5.823018856531397e-06, "loss": 0.009, "step": 119050 }, { "epoch": 1.0053408203331151, "grad_norm": 0.6526094079017639, "learning_rate": 5.822292016971604e-06, "loss": 0.0124, "step": 119060 }, { "epoch": 1.0054252601802791, "grad_norm": 0.08630438894033432, "learning_rate": 5.8215651595517986e-06, "loss": 0.0052, "step": 119070 }, { "epoch": 1.0055097000274429, "grad_norm": 0.24800145626068115, "learning_rate": 5.820838284287766e-06, "loss": 0.0056, "step": 119080 }, { "epoch": 1.0055941398746069, "grad_norm": 0.27913394570350647, "learning_rate": 5.8201113911952935e-06, "loss": 0.0103, "step": 119090 }, { "epoch": 1.0056785797217707, "grad_norm": 0.20434008538722992, "learning_rate": 5.819384480290171e-06, "loss": 0.0098, "step": 119100 }, { "epoch": 1.0057630195689347, "grad_norm": 0.4290253520011902, "learning_rate": 5.818657551588184e-06, "loss": 0.0054, "step": 119110 }, { "epoch": 1.0058474594160984, "grad_norm": 0.36586523056030273, "learning_rate": 5.8179306051051225e-06, "loss": 0.0109, "step": 119120 }, { "epoch": 1.0059318992632624, "grad_norm": 0.2908741235733032, "learning_rate": 5.817203640856777e-06, "loss": 0.0112, "step": 119130 }, { "epoch": 1.0060163391104262, "grad_norm": 0.4862931966781616, "learning_rate": 5.8164766588589346e-06, "loss": 0.0138, "step": 119140 }, { "epoch": 1.00610077895759, "grad_norm": 0.19681331515312195, "learning_rate": 5.815749659127388e-06, "loss": 0.0059, "step": 119150 }, { "epoch": 1.006185218804754, "grad_norm": 0.16153262555599213, "learning_rate": 5.815022641677924e-06, "loss": 0.0066, "step": 119160 }, { "epoch": 1.0062696586519178, "grad_norm": 0.26643818616867065, "learning_rate": 5.814295606526337e-06, "loss": 0.0037, "step": 119170 }, { "epoch": 1.0063540984990818, "grad_norm": 0.8653206825256348, "learning_rate": 5.813568553688415e-06, "loss": 0.0048, "step": 119180 }, { "epoch": 1.0064385383462455, "grad_norm": 0.17379091680049896, "learning_rate": 5.812841483179952e-06, "loss": 0.0064, "step": 119190 }, { "epoch": 1.0065229781934095, "grad_norm": 0.07125119864940643, "learning_rate": 5.812114395016737e-06, "loss": 0.008, "step": 119200 }, { "epoch": 1.0066074180405733, "grad_norm": 0.025624360889196396, "learning_rate": 5.811387289214565e-06, "loss": 0.0102, "step": 119210 }, { "epoch": 1.0066918578877373, "grad_norm": 0.1496047079563141, "learning_rate": 5.810660165789228e-06, "loss": 0.0128, "step": 119220 }, { "epoch": 1.006776297734901, "grad_norm": 0.8056538105010986, "learning_rate": 5.809933024756518e-06, "loss": 0.0152, "step": 119230 }, { "epoch": 1.006860737582065, "grad_norm": 0.7212792038917542, "learning_rate": 5.809205866132227e-06, "loss": 0.0056, "step": 119240 }, { "epoch": 1.0069451774292288, "grad_norm": 0.08501647412776947, "learning_rate": 5.808478689932151e-06, "loss": 0.0057, "step": 119250 }, { "epoch": 1.0070296172763928, "grad_norm": 0.15407085418701172, "learning_rate": 5.807751496172085e-06, "loss": 0.0075, "step": 119260 }, { "epoch": 1.0071140571235566, "grad_norm": 0.6859539151191711, "learning_rate": 5.80702428486782e-06, "loss": 0.0095, "step": 119270 }, { "epoch": 1.0071984969707204, "grad_norm": 0.15012553334236145, "learning_rate": 5.806297056035154e-06, "loss": 0.0098, "step": 119280 }, { "epoch": 1.0072829368178844, "grad_norm": 0.2277165949344635, "learning_rate": 5.805569809689881e-06, "loss": 0.0063, "step": 119290 }, { "epoch": 1.0073673766650482, "grad_norm": 0.2954479455947876, "learning_rate": 5.804842545847797e-06, "loss": 0.0073, "step": 119300 }, { "epoch": 1.0074518165122122, "grad_norm": 0.5275477766990662, "learning_rate": 5.8041152645246975e-06, "loss": 0.0085, "step": 119310 }, { "epoch": 1.007536256359376, "grad_norm": 0.002431822009384632, "learning_rate": 5.8033879657363804e-06, "loss": 0.0048, "step": 119320 }, { "epoch": 1.00762069620654, "grad_norm": 0.4546912610530853, "learning_rate": 5.80266064949864e-06, "loss": 0.0093, "step": 119330 }, { "epoch": 1.0077051360537037, "grad_norm": 0.05683969706296921, "learning_rate": 5.801933315827276e-06, "loss": 0.0053, "step": 119340 }, { "epoch": 1.0077895759008677, "grad_norm": 0.08408617973327637, "learning_rate": 5.801205964738084e-06, "loss": 0.0094, "step": 119350 }, { "epoch": 1.0078740157480315, "grad_norm": 0.2161625772714615, "learning_rate": 5.800478596246862e-06, "loss": 0.0087, "step": 119360 }, { "epoch": 1.0079584555951955, "grad_norm": 0.269746869802475, "learning_rate": 5.799751210369409e-06, "loss": 0.0062, "step": 119370 }, { "epoch": 1.0080428954423593, "grad_norm": 0.20065350830554962, "learning_rate": 5.799023807121524e-06, "loss": 0.0071, "step": 119380 }, { "epoch": 1.008127335289523, "grad_norm": 0.40262484550476074, "learning_rate": 5.798296386519005e-06, "loss": 0.0115, "step": 119390 }, { "epoch": 1.008211775136687, "grad_norm": 0.16350875794887543, "learning_rate": 5.797568948577653e-06, "loss": 0.0096, "step": 119400 }, { "epoch": 1.0082962149838508, "grad_norm": 0.20918777585029602, "learning_rate": 5.7968414933132664e-06, "loss": 0.0046, "step": 119410 }, { "epoch": 1.0083806548310148, "grad_norm": 0.0867377296090126, "learning_rate": 5.796114020741647e-06, "loss": 0.0101, "step": 119420 }, { "epoch": 1.0084650946781786, "grad_norm": 0.7282864451408386, "learning_rate": 5.795386530878593e-06, "loss": 0.0078, "step": 119430 }, { "epoch": 1.0085495345253426, "grad_norm": 0.3580697476863861, "learning_rate": 5.794659023739907e-06, "loss": 0.0087, "step": 119440 }, { "epoch": 1.0086339743725063, "grad_norm": 0.05505533143877983, "learning_rate": 5.7939314993413896e-06, "loss": 0.0058, "step": 119450 }, { "epoch": 1.0087184142196703, "grad_norm": 0.3484649360179901, "learning_rate": 5.793203957698844e-06, "loss": 0.0179, "step": 119460 }, { "epoch": 1.0088028540668341, "grad_norm": 0.0008828114368952811, "learning_rate": 5.79247639882807e-06, "loss": 0.007, "step": 119470 }, { "epoch": 1.0088872939139981, "grad_norm": 0.13830101490020752, "learning_rate": 5.791748822744869e-06, "loss": 0.0103, "step": 119480 }, { "epoch": 1.008971733761162, "grad_norm": 0.11898770183324814, "learning_rate": 5.791021229465048e-06, "loss": 0.0067, "step": 119490 }, { "epoch": 1.0090561736083257, "grad_norm": 0.11088328063488007, "learning_rate": 5.790293619004408e-06, "loss": 0.0054, "step": 119500 }, { "epoch": 1.0091406134554897, "grad_norm": 0.24244675040245056, "learning_rate": 5.789565991378752e-06, "loss": 0.0086, "step": 119510 }, { "epoch": 1.0092250533026534, "grad_norm": 0.4365227520465851, "learning_rate": 5.788838346603884e-06, "loss": 0.0098, "step": 119520 }, { "epoch": 1.0093094931498174, "grad_norm": 0.3664737045764923, "learning_rate": 5.78811068469561e-06, "loss": 0.0089, "step": 119530 }, { "epoch": 1.0093939329969812, "grad_norm": 0.2319342941045761, "learning_rate": 5.787383005669734e-06, "loss": 0.0056, "step": 119540 }, { "epoch": 1.0094783728441452, "grad_norm": 0.27697885036468506, "learning_rate": 5.7866553095420575e-06, "loss": 0.0073, "step": 119550 }, { "epoch": 1.009562812691309, "grad_norm": 0.06013946235179901, "learning_rate": 5.78592759632839e-06, "loss": 0.0122, "step": 119560 }, { "epoch": 1.009647252538473, "grad_norm": 0.3446161448955536, "learning_rate": 5.785199866044536e-06, "loss": 0.0099, "step": 119570 }, { "epoch": 1.0097316923856368, "grad_norm": 0.42477941513061523, "learning_rate": 5.784472118706306e-06, "loss": 0.0047, "step": 119580 }, { "epoch": 1.0098161322328008, "grad_norm": 0.1940516084432602, "learning_rate": 5.783744354329498e-06, "loss": 0.0064, "step": 119590 }, { "epoch": 1.0099005720799645, "grad_norm": 0.39278388023376465, "learning_rate": 5.7830165729299235e-06, "loss": 0.0138, "step": 119600 }, { "epoch": 1.0099850119271283, "grad_norm": 0.23120145499706268, "learning_rate": 5.78228877452339e-06, "loss": 0.0133, "step": 119610 }, { "epoch": 1.0100694517742923, "grad_norm": 0.04871019348502159, "learning_rate": 5.781560959125705e-06, "loss": 0.0144, "step": 119620 }, { "epoch": 1.010153891621456, "grad_norm": 1.1819416284561157, "learning_rate": 5.780833126752676e-06, "loss": 0.0047, "step": 119630 }, { "epoch": 1.01023833146862, "grad_norm": 0.6579245924949646, "learning_rate": 5.780105277420111e-06, "loss": 0.0083, "step": 119640 }, { "epoch": 1.0103227713157839, "grad_norm": 0.26522591710090637, "learning_rate": 5.77937741114382e-06, "loss": 0.0114, "step": 119650 }, { "epoch": 1.0104072111629478, "grad_norm": 0.09635409712791443, "learning_rate": 5.77864952793961e-06, "loss": 0.0067, "step": 119660 }, { "epoch": 1.0104916510101116, "grad_norm": 0.3988816738128662, "learning_rate": 5.777921627823293e-06, "loss": 0.0095, "step": 119670 }, { "epoch": 1.0105760908572756, "grad_norm": 0.2030605673789978, "learning_rate": 5.777193710810678e-06, "loss": 0.0055, "step": 119680 }, { "epoch": 1.0106605307044394, "grad_norm": 0.30173081159591675, "learning_rate": 5.776465776917575e-06, "loss": 0.0067, "step": 119690 }, { "epoch": 1.0107449705516034, "grad_norm": 0.9246304035186768, "learning_rate": 5.775737826159793e-06, "loss": 0.0087, "step": 119700 }, { "epoch": 1.0108294103987672, "grad_norm": 0.4690011739730835, "learning_rate": 5.775009858553145e-06, "loss": 0.0128, "step": 119710 }, { "epoch": 1.010913850245931, "grad_norm": 0.37562406063079834, "learning_rate": 5.774281874113442e-06, "loss": 0.0046, "step": 119720 }, { "epoch": 1.010998290093095, "grad_norm": 0.4274977147579193, "learning_rate": 5.773553872856495e-06, "loss": 0.0084, "step": 119730 }, { "epoch": 1.0110827299402587, "grad_norm": 0.22788338363170624, "learning_rate": 5.772825854798118e-06, "loss": 0.0093, "step": 119740 }, { "epoch": 1.0111671697874227, "grad_norm": 0.10698094218969345, "learning_rate": 5.772097819954119e-06, "loss": 0.0055, "step": 119750 }, { "epoch": 1.0112516096345865, "grad_norm": 0.2743098735809326, "learning_rate": 5.771369768340315e-06, "loss": 0.0093, "step": 119760 }, { "epoch": 1.0113360494817505, "grad_norm": 0.22880341112613678, "learning_rate": 5.770641699972517e-06, "loss": 0.0073, "step": 119770 }, { "epoch": 1.0114204893289143, "grad_norm": 0.3047844469547272, "learning_rate": 5.76991361486654e-06, "loss": 0.0089, "step": 119780 }, { "epoch": 1.0115049291760783, "grad_norm": 0.32557374238967896, "learning_rate": 5.769185513038197e-06, "loss": 0.0065, "step": 119790 }, { "epoch": 1.011589369023242, "grad_norm": 0.19610388576984406, "learning_rate": 5.768457394503302e-06, "loss": 0.011, "step": 119800 }, { "epoch": 1.011673808870406, "grad_norm": 0.1165643110871315, "learning_rate": 5.76772925927767e-06, "loss": 0.0079, "step": 119810 }, { "epoch": 1.0117582487175698, "grad_norm": 0.4162571430206299, "learning_rate": 5.767001107377116e-06, "loss": 0.0069, "step": 119820 }, { "epoch": 1.0118426885647338, "grad_norm": 0.32656991481781006, "learning_rate": 5.7662729388174555e-06, "loss": 0.0053, "step": 119830 }, { "epoch": 1.0119271284118976, "grad_norm": 0.20100221037864685, "learning_rate": 5.765544753614504e-06, "loss": 0.0099, "step": 119840 }, { "epoch": 1.0120115682590614, "grad_norm": 0.14629283547401428, "learning_rate": 5.764816551784076e-06, "loss": 0.0082, "step": 119850 }, { "epoch": 1.0120960081062254, "grad_norm": 0.3166956603527069, "learning_rate": 5.764088333341988e-06, "loss": 0.0077, "step": 119860 }, { "epoch": 1.0121804479533891, "grad_norm": 1.0293128490447998, "learning_rate": 5.763360098304062e-06, "loss": 0.0094, "step": 119870 }, { "epoch": 1.0122648878005531, "grad_norm": 0.03053472749888897, "learning_rate": 5.762631846686108e-06, "loss": 0.0076, "step": 119880 }, { "epoch": 1.012349327647717, "grad_norm": 0.0018981529865413904, "learning_rate": 5.761903578503946e-06, "loss": 0.0091, "step": 119890 }, { "epoch": 1.012433767494881, "grad_norm": 0.2087165266275406, "learning_rate": 5.761175293773398e-06, "loss": 0.0178, "step": 119900 }, { "epoch": 1.0125182073420447, "grad_norm": 0.3808765411376953, "learning_rate": 5.760446992510276e-06, "loss": 0.0068, "step": 119910 }, { "epoch": 1.0126026471892087, "grad_norm": 0.05603707581758499, "learning_rate": 5.7597186747304015e-06, "loss": 0.0099, "step": 119920 }, { "epoch": 1.0126870870363724, "grad_norm": 0.43494701385498047, "learning_rate": 5.758990340449593e-06, "loss": 0.0095, "step": 119930 }, { "epoch": 1.0127715268835364, "grad_norm": 0.22824160754680634, "learning_rate": 5.75826198968367e-06, "loss": 0.0071, "step": 119940 }, { "epoch": 1.0128559667307002, "grad_norm": 0.19091260433197021, "learning_rate": 5.757533622448452e-06, "loss": 0.0095, "step": 119950 }, { "epoch": 1.012940406577864, "grad_norm": 0.5182886719703674, "learning_rate": 5.756805238759759e-06, "loss": 0.0083, "step": 119960 }, { "epoch": 1.013024846425028, "grad_norm": 0.2274032086133957, "learning_rate": 5.756076838633411e-06, "loss": 0.0076, "step": 119970 }, { "epoch": 1.0131092862721918, "grad_norm": 0.19715148210525513, "learning_rate": 5.755348422085229e-06, "loss": 0.0094, "step": 119980 }, { "epoch": 1.0131937261193558, "grad_norm": 0.10258292406797409, "learning_rate": 5.754619989131035e-06, "loss": 0.0091, "step": 119990 }, { "epoch": 1.0132781659665195, "grad_norm": 0.1533413678407669, "learning_rate": 5.753891539786648e-06, "loss": 0.0077, "step": 120000 }, { "epoch": 1.0133626058136835, "grad_norm": 0.166203111410141, "learning_rate": 5.753163074067892e-06, "loss": 0.0025, "step": 120010 }, { "epoch": 1.0134470456608473, "grad_norm": 0.3680994510650635, "learning_rate": 5.752434591990588e-06, "loss": 0.0048, "step": 120020 }, { "epoch": 1.0135314855080113, "grad_norm": 0.5977827906608582, "learning_rate": 5.751706093570559e-06, "loss": 0.0085, "step": 120030 }, { "epoch": 1.013615925355175, "grad_norm": 0.18222573399543762, "learning_rate": 5.750977578823629e-06, "loss": 0.0061, "step": 120040 }, { "epoch": 1.013700365202339, "grad_norm": 0.08008204400539398, "learning_rate": 5.750249047765619e-06, "loss": 0.0036, "step": 120050 }, { "epoch": 1.0137848050495029, "grad_norm": 0.25810182094573975, "learning_rate": 5.749520500412353e-06, "loss": 0.0191, "step": 120060 }, { "epoch": 1.0138692448966666, "grad_norm": 0.12124920636415482, "learning_rate": 5.748791936779656e-06, "loss": 0.005, "step": 120070 }, { "epoch": 1.0139536847438306, "grad_norm": 0.561780571937561, "learning_rate": 5.748063356883351e-06, "loss": 0.0079, "step": 120080 }, { "epoch": 1.0140381245909944, "grad_norm": 0.33737075328826904, "learning_rate": 5.747334760739262e-06, "loss": 0.007, "step": 120090 }, { "epoch": 1.0141225644381584, "grad_norm": 0.13561882078647614, "learning_rate": 5.746606148363216e-06, "loss": 0.0054, "step": 120100 }, { "epoch": 1.0142070042853222, "grad_norm": 0.17766073346138, "learning_rate": 5.7458775197710395e-06, "loss": 0.0085, "step": 120110 }, { "epoch": 1.0142914441324862, "grad_norm": 0.25489354133605957, "learning_rate": 5.745148874978555e-06, "loss": 0.0091, "step": 120120 }, { "epoch": 1.01437588397965, "grad_norm": 0.06232379749417305, "learning_rate": 5.744420214001587e-06, "loss": 0.0093, "step": 120130 }, { "epoch": 1.014460323826814, "grad_norm": 0.11229711771011353, "learning_rate": 5.743691536855966e-06, "loss": 0.0081, "step": 120140 }, { "epoch": 1.0145447636739777, "grad_norm": 0.25501692295074463, "learning_rate": 5.742962843557519e-06, "loss": 0.0038, "step": 120150 }, { "epoch": 1.0146292035211417, "grad_norm": 0.14725802838802338, "learning_rate": 5.74223413412207e-06, "loss": 0.0072, "step": 120160 }, { "epoch": 1.0147136433683055, "grad_norm": 0.09206872433423996, "learning_rate": 5.741505408565449e-06, "loss": 0.0055, "step": 120170 }, { "epoch": 1.0147980832154693, "grad_norm": 0.0718497782945633, "learning_rate": 5.74077666690348e-06, "loss": 0.0067, "step": 120180 }, { "epoch": 1.0148825230626333, "grad_norm": 0.5171045064926147, "learning_rate": 5.740047909151996e-06, "loss": 0.0112, "step": 120190 }, { "epoch": 1.014966962909797, "grad_norm": 0.061652202159166336, "learning_rate": 5.739319135326822e-06, "loss": 0.0048, "step": 120200 }, { "epoch": 1.015051402756961, "grad_norm": 0.040716752409935, "learning_rate": 5.738590345443787e-06, "loss": 0.0115, "step": 120210 }, { "epoch": 1.0151358426041248, "grad_norm": 0.13979728519916534, "learning_rate": 5.737861539518722e-06, "loss": 0.0106, "step": 120220 }, { "epoch": 1.0152202824512888, "grad_norm": 0.24000249803066254, "learning_rate": 5.737132717567457e-06, "loss": 0.0156, "step": 120230 }, { "epoch": 1.0153047222984526, "grad_norm": 0.15315639972686768, "learning_rate": 5.7364038796058175e-06, "loss": 0.0043, "step": 120240 }, { "epoch": 1.0153891621456166, "grad_norm": 0.642591655254364, "learning_rate": 5.735675025649637e-06, "loss": 0.0088, "step": 120250 }, { "epoch": 1.0154736019927804, "grad_norm": 0.2497599720954895, "learning_rate": 5.7349461557147465e-06, "loss": 0.0098, "step": 120260 }, { "epoch": 1.0155580418399444, "grad_norm": 0.3440316915512085, "learning_rate": 5.734217269816978e-06, "loss": 0.0067, "step": 120270 }, { "epoch": 1.0156424816871081, "grad_norm": 0.11547920852899551, "learning_rate": 5.7334883679721584e-06, "loss": 0.0122, "step": 120280 }, { "epoch": 1.0157269215342721, "grad_norm": 0.16970956325531006, "learning_rate": 5.732759450196123e-06, "loss": 0.0111, "step": 120290 }, { "epoch": 1.015811361381436, "grad_norm": 0.15712563693523407, "learning_rate": 5.732030516504702e-06, "loss": 0.0063, "step": 120300 }, { "epoch": 1.0158958012285997, "grad_norm": 0.5270316004753113, "learning_rate": 5.731301566913727e-06, "loss": 0.0103, "step": 120310 }, { "epoch": 1.0159802410757637, "grad_norm": 0.1502339243888855, "learning_rate": 5.730572601439035e-06, "loss": 0.0074, "step": 120320 }, { "epoch": 1.0160646809229275, "grad_norm": 0.4345383942127228, "learning_rate": 5.7298436200964525e-06, "loss": 0.0099, "step": 120330 }, { "epoch": 1.0161491207700915, "grad_norm": 0.1238199919462204, "learning_rate": 5.729114622901818e-06, "loss": 0.0104, "step": 120340 }, { "epoch": 1.0162335606172552, "grad_norm": 0.252937376499176, "learning_rate": 5.728385609870963e-06, "loss": 0.0102, "step": 120350 }, { "epoch": 1.0163180004644192, "grad_norm": 1.0483745336532593, "learning_rate": 5.727656581019722e-06, "loss": 0.0078, "step": 120360 }, { "epoch": 1.016402440311583, "grad_norm": 0.0828135684132576, "learning_rate": 5.726927536363928e-06, "loss": 0.0064, "step": 120370 }, { "epoch": 1.016486880158747, "grad_norm": 0.2317522168159485, "learning_rate": 5.726198475919417e-06, "loss": 0.0113, "step": 120380 }, { "epoch": 1.0165713200059108, "grad_norm": 0.955555260181427, "learning_rate": 5.725469399702025e-06, "loss": 0.0257, "step": 120390 }, { "epoch": 1.0166557598530748, "grad_norm": 0.14894439280033112, "learning_rate": 5.724740307727586e-06, "loss": 0.0046, "step": 120400 }, { "epoch": 1.0167401997002385, "grad_norm": 1.2050838470458984, "learning_rate": 5.724011200011936e-06, "loss": 0.0131, "step": 120410 }, { "epoch": 1.0168246395474023, "grad_norm": 0.21283426880836487, "learning_rate": 5.723282076570911e-06, "loss": 0.0086, "step": 120420 }, { "epoch": 1.0169090793945663, "grad_norm": 0.2934667468070984, "learning_rate": 5.722552937420347e-06, "loss": 0.0083, "step": 120430 }, { "epoch": 1.01699351924173, "grad_norm": 0.12250072509050369, "learning_rate": 5.7218237825760824e-06, "loss": 0.0079, "step": 120440 }, { "epoch": 1.017077959088894, "grad_norm": 0.612354040145874, "learning_rate": 5.721094612053952e-06, "loss": 0.0069, "step": 120450 }, { "epoch": 1.0171623989360579, "grad_norm": 0.2871488928794861, "learning_rate": 5.720365425869795e-06, "loss": 0.0115, "step": 120460 }, { "epoch": 1.0172468387832219, "grad_norm": 0.7219899296760559, "learning_rate": 5.7196362240394475e-06, "loss": 0.0098, "step": 120470 }, { "epoch": 1.0173312786303856, "grad_norm": 0.758553683757782, "learning_rate": 5.718907006578751e-06, "loss": 0.0117, "step": 120480 }, { "epoch": 1.0174157184775496, "grad_norm": 0.47873520851135254, "learning_rate": 5.718177773503539e-06, "loss": 0.0109, "step": 120490 }, { "epoch": 1.0175001583247134, "grad_norm": 0.3342618942260742, "learning_rate": 5.717448524829654e-06, "loss": 0.0054, "step": 120500 }, { "epoch": 1.0175845981718774, "grad_norm": 0.052155423909425735, "learning_rate": 5.716719260572936e-06, "loss": 0.0027, "step": 120510 }, { "epoch": 1.0176690380190412, "grad_norm": 0.3086029291152954, "learning_rate": 5.7159899807492215e-06, "loss": 0.0103, "step": 120520 }, { "epoch": 1.017753477866205, "grad_norm": 0.6701492667198181, "learning_rate": 5.715260685374349e-06, "loss": 0.0078, "step": 120530 }, { "epoch": 1.017837917713369, "grad_norm": 0.12761051952838898, "learning_rate": 5.714531374464163e-06, "loss": 0.0121, "step": 120540 }, { "epoch": 1.0179223575605327, "grad_norm": 0.25277477502822876, "learning_rate": 5.713802048034501e-06, "loss": 0.0096, "step": 120550 }, { "epoch": 1.0180067974076967, "grad_norm": 0.0016630336176604033, "learning_rate": 5.7130727061012055e-06, "loss": 0.0073, "step": 120560 }, { "epoch": 1.0180912372548605, "grad_norm": 1.3454763889312744, "learning_rate": 5.712343348680117e-06, "loss": 0.0076, "step": 120570 }, { "epoch": 1.0181756771020245, "grad_norm": 0.34894782304763794, "learning_rate": 5.711613975787077e-06, "loss": 0.0088, "step": 120580 }, { "epoch": 1.0182601169491883, "grad_norm": 0.009529557079076767, "learning_rate": 5.710884587437926e-06, "loss": 0.0085, "step": 120590 }, { "epoch": 1.0183445567963523, "grad_norm": 0.2628922164440155, "learning_rate": 5.710155183648509e-06, "loss": 0.0068, "step": 120600 }, { "epoch": 1.018428996643516, "grad_norm": 0.23193955421447754, "learning_rate": 5.709425764434667e-06, "loss": 0.0086, "step": 120610 }, { "epoch": 1.01851343649068, "grad_norm": 0.0074651604518294334, "learning_rate": 5.708696329812241e-06, "loss": 0.0071, "step": 120620 }, { "epoch": 1.0185978763378438, "grad_norm": 0.2829574942588806, "learning_rate": 5.707966879797077e-06, "loss": 0.0067, "step": 120630 }, { "epoch": 1.0186823161850076, "grad_norm": 0.15214557945728302, "learning_rate": 5.707237414405019e-06, "loss": 0.0058, "step": 120640 }, { "epoch": 1.0187667560321716, "grad_norm": 0.17306967079639435, "learning_rate": 5.706507933651908e-06, "loss": 0.0051, "step": 120650 }, { "epoch": 1.0188511958793354, "grad_norm": 0.39690208435058594, "learning_rate": 5.705778437553589e-06, "loss": 0.0068, "step": 120660 }, { "epoch": 1.0189356357264994, "grad_norm": 0.2928306460380554, "learning_rate": 5.705048926125907e-06, "loss": 0.01, "step": 120670 }, { "epoch": 1.0190200755736631, "grad_norm": 0.20366790890693665, "learning_rate": 5.704319399384708e-06, "loss": 0.0087, "step": 120680 }, { "epoch": 1.0191045154208271, "grad_norm": 0.10189270973205566, "learning_rate": 5.703589857345835e-06, "loss": 0.0098, "step": 120690 }, { "epoch": 1.019188955267991, "grad_norm": 0.02903863787651062, "learning_rate": 5.702860300025134e-06, "loss": 0.0158, "step": 120700 }, { "epoch": 1.019273395115155, "grad_norm": 0.18944399058818817, "learning_rate": 5.702130727438451e-06, "loss": 0.0106, "step": 120710 }, { "epoch": 1.0193578349623187, "grad_norm": 0.23480452597141266, "learning_rate": 5.701401139601634e-06, "loss": 0.0042, "step": 120720 }, { "epoch": 1.0194422748094827, "grad_norm": 0.1644701510667801, "learning_rate": 5.700671536530527e-06, "loss": 0.0096, "step": 120730 }, { "epoch": 1.0195267146566465, "grad_norm": 0.16266442835330963, "learning_rate": 5.6999419182409776e-06, "loss": 0.0073, "step": 120740 }, { "epoch": 1.0196111545038105, "grad_norm": 0.3310883641242981, "learning_rate": 5.699212284748834e-06, "loss": 0.007, "step": 120750 }, { "epoch": 1.0196955943509742, "grad_norm": 0.15930087864398956, "learning_rate": 5.698482636069943e-06, "loss": 0.0074, "step": 120760 }, { "epoch": 1.019780034198138, "grad_norm": 0.42869994044303894, "learning_rate": 5.697752972220152e-06, "loss": 0.005, "step": 120770 }, { "epoch": 1.019864474045302, "grad_norm": 0.37313851714134216, "learning_rate": 5.697023293215309e-06, "loss": 0.0115, "step": 120780 }, { "epoch": 1.0199489138924658, "grad_norm": 0.3365209400653839, "learning_rate": 5.696293599071263e-06, "loss": 0.0125, "step": 120790 }, { "epoch": 1.0200333537396298, "grad_norm": 0.37466782331466675, "learning_rate": 5.695563889803864e-06, "loss": 0.0061, "step": 120800 }, { "epoch": 1.0201177935867936, "grad_norm": 0.4763804078102112, "learning_rate": 5.694834165428957e-06, "loss": 0.0102, "step": 120810 }, { "epoch": 1.0202022334339576, "grad_norm": 0.10034726560115814, "learning_rate": 5.694104425962397e-06, "loss": 0.0054, "step": 120820 }, { "epoch": 1.0202866732811213, "grad_norm": 0.3103158473968506, "learning_rate": 5.693374671420029e-06, "loss": 0.0049, "step": 120830 }, { "epoch": 1.0203711131282853, "grad_norm": 0.3202266991138458, "learning_rate": 5.6926449018177075e-06, "loss": 0.0127, "step": 120840 }, { "epoch": 1.020455552975449, "grad_norm": 0.3165028989315033, "learning_rate": 5.691915117171279e-06, "loss": 0.007, "step": 120850 }, { "epoch": 1.020539992822613, "grad_norm": 0.06683549284934998, "learning_rate": 5.691185317496596e-06, "loss": 0.0045, "step": 120860 }, { "epoch": 1.0206244326697769, "grad_norm": 0.14381957054138184, "learning_rate": 5.69045550280951e-06, "loss": 0.0084, "step": 120870 }, { "epoch": 1.0207088725169406, "grad_norm": 0.2950487732887268, "learning_rate": 5.689725673125872e-06, "loss": 0.0064, "step": 120880 }, { "epoch": 1.0207933123641046, "grad_norm": 0.35750409960746765, "learning_rate": 5.6889958284615345e-06, "loss": 0.0081, "step": 120890 }, { "epoch": 1.0208777522112684, "grad_norm": 0.19229306280612946, "learning_rate": 5.688265968832348e-06, "loss": 0.005, "step": 120900 }, { "epoch": 1.0209621920584324, "grad_norm": 0.17110450565814972, "learning_rate": 5.687536094254165e-06, "loss": 0.0107, "step": 120910 }, { "epoch": 1.0210466319055962, "grad_norm": 0.09516807645559311, "learning_rate": 5.68680620474284e-06, "loss": 0.0164, "step": 120920 }, { "epoch": 1.0211310717527602, "grad_norm": 0.16014762222766876, "learning_rate": 5.686076300314225e-06, "loss": 0.0068, "step": 120930 }, { "epoch": 1.021215511599924, "grad_norm": 0.2931325435638428, "learning_rate": 5.685346380984172e-06, "loss": 0.0073, "step": 120940 }, { "epoch": 1.021299951447088, "grad_norm": 0.1803189218044281, "learning_rate": 5.684616446768537e-06, "loss": 0.0102, "step": 120950 }, { "epoch": 1.0213843912942517, "grad_norm": 0.2299409657716751, "learning_rate": 5.683886497683173e-06, "loss": 0.0078, "step": 120960 }, { "epoch": 1.0214688311414157, "grad_norm": 0.04316474497318268, "learning_rate": 5.6831565337439345e-06, "loss": 0.0062, "step": 120970 }, { "epoch": 1.0215532709885795, "grad_norm": 0.5093714594841003, "learning_rate": 5.682426554966676e-06, "loss": 0.0068, "step": 120980 }, { "epoch": 1.0216377108357433, "grad_norm": 0.13025033473968506, "learning_rate": 5.681696561367251e-06, "loss": 0.0071, "step": 120990 }, { "epoch": 1.0217221506829073, "grad_norm": 0.08696910738945007, "learning_rate": 5.68096655296152e-06, "loss": 0.0132, "step": 121000 }, { "epoch": 1.021806590530071, "grad_norm": 0.39822477102279663, "learning_rate": 5.6802365297653325e-06, "loss": 0.0106, "step": 121010 }, { "epoch": 1.021891030377235, "grad_norm": 0.10685347020626068, "learning_rate": 5.679506491794548e-06, "loss": 0.0079, "step": 121020 }, { "epoch": 1.0219754702243988, "grad_norm": 0.23017434775829315, "learning_rate": 5.678776439065021e-06, "loss": 0.009, "step": 121030 }, { "epoch": 1.0220599100715628, "grad_norm": 0.3700913190841675, "learning_rate": 5.678046371592608e-06, "loss": 0.0089, "step": 121040 }, { "epoch": 1.0221443499187266, "grad_norm": 0.5080171823501587, "learning_rate": 5.677316289393169e-06, "loss": 0.0055, "step": 121050 }, { "epoch": 1.0222287897658906, "grad_norm": 0.21940068900585175, "learning_rate": 5.676586192482557e-06, "loss": 0.0055, "step": 121060 }, { "epoch": 1.0223132296130544, "grad_norm": 0.2626083493232727, "learning_rate": 5.675856080876633e-06, "loss": 0.0107, "step": 121070 }, { "epoch": 1.0223976694602184, "grad_norm": 0.15469695627689362, "learning_rate": 5.675125954591252e-06, "loss": 0.0069, "step": 121080 }, { "epoch": 1.0224821093073821, "grad_norm": 0.33271172642707825, "learning_rate": 5.674395813642275e-06, "loss": 0.0075, "step": 121090 }, { "epoch": 1.022566549154546, "grad_norm": 0.12103760242462158, "learning_rate": 5.673665658045557e-06, "loss": 0.0083, "step": 121100 }, { "epoch": 1.02265098900171, "grad_norm": 0.07872705906629562, "learning_rate": 5.672935487816962e-06, "loss": 0.0066, "step": 121110 }, { "epoch": 1.0227354288488737, "grad_norm": 0.6348171234130859, "learning_rate": 5.672205302972344e-06, "loss": 0.0069, "step": 121120 }, { "epoch": 1.0228198686960377, "grad_norm": 0.10945864766836166, "learning_rate": 5.671475103527566e-06, "loss": 0.0057, "step": 121130 }, { "epoch": 1.0229043085432015, "grad_norm": 0.030311988666653633, "learning_rate": 5.670744889498485e-06, "loss": 0.0088, "step": 121140 }, { "epoch": 1.0229887483903655, "grad_norm": 0.029532797634601593, "learning_rate": 5.670014660900962e-06, "loss": 0.0093, "step": 121150 }, { "epoch": 1.0230731882375292, "grad_norm": 0.43595561385154724, "learning_rate": 5.669284417750859e-06, "loss": 0.0094, "step": 121160 }, { "epoch": 1.0231576280846932, "grad_norm": 0.1279631108045578, "learning_rate": 5.668554160064036e-06, "loss": 0.0051, "step": 121170 }, { "epoch": 1.023242067931857, "grad_norm": 0.43224185705184937, "learning_rate": 5.667823887856353e-06, "loss": 0.0111, "step": 121180 }, { "epoch": 1.023326507779021, "grad_norm": 0.3226146996021271, "learning_rate": 5.6670936011436715e-06, "loss": 0.0066, "step": 121190 }, { "epoch": 1.0234109476261848, "grad_norm": 0.21706154942512512, "learning_rate": 5.666363299941854e-06, "loss": 0.0063, "step": 121200 }, { "epoch": 1.0234953874733486, "grad_norm": 0.11825783550739288, "learning_rate": 5.665632984266763e-06, "loss": 0.0116, "step": 121210 }, { "epoch": 1.0235798273205126, "grad_norm": 0.8549097180366516, "learning_rate": 5.664902654134259e-06, "loss": 0.0104, "step": 121220 }, { "epoch": 1.0236642671676763, "grad_norm": 0.09454146027565002, "learning_rate": 5.6641723095602066e-06, "loss": 0.0074, "step": 121230 }, { "epoch": 1.0237487070148403, "grad_norm": 0.10934837907552719, "learning_rate": 5.663441950560466e-06, "loss": 0.0117, "step": 121240 }, { "epoch": 1.023833146862004, "grad_norm": 0.1760939359664917, "learning_rate": 5.662711577150905e-06, "loss": 0.0057, "step": 121250 }, { "epoch": 1.023917586709168, "grad_norm": 0.23918218910694122, "learning_rate": 5.661981189347382e-06, "loss": 0.0103, "step": 121260 }, { "epoch": 1.0240020265563319, "grad_norm": 0.11797197163105011, "learning_rate": 5.661250787165763e-06, "loss": 0.0126, "step": 121270 }, { "epoch": 1.0240864664034959, "grad_norm": 0.03090112842619419, "learning_rate": 5.660520370621914e-06, "loss": 0.0103, "step": 121280 }, { "epoch": 1.0241709062506597, "grad_norm": 0.16669431328773499, "learning_rate": 5.659789939731697e-06, "loss": 0.0072, "step": 121290 }, { "epoch": 1.0242553460978236, "grad_norm": 0.3719242513179779, "learning_rate": 5.659059494510978e-06, "loss": 0.0107, "step": 121300 }, { "epoch": 1.0243397859449874, "grad_norm": 0.08066840469837189, "learning_rate": 5.658329034975621e-06, "loss": 0.0039, "step": 121310 }, { "epoch": 1.0244242257921514, "grad_norm": 0.13294708728790283, "learning_rate": 5.657598561141493e-06, "loss": 0.0172, "step": 121320 }, { "epoch": 1.0245086656393152, "grad_norm": 0.29908472299575806, "learning_rate": 5.6568680730244595e-06, "loss": 0.0079, "step": 121330 }, { "epoch": 1.024593105486479, "grad_norm": 0.4746619760990143, "learning_rate": 5.6561375706403845e-06, "loss": 0.006, "step": 121340 }, { "epoch": 1.024677545333643, "grad_norm": 0.5792072415351868, "learning_rate": 5.655407054005138e-06, "loss": 0.0128, "step": 121350 }, { "epoch": 1.0247619851808067, "grad_norm": 0.2619064152240753, "learning_rate": 5.654676523134583e-06, "loss": 0.0074, "step": 121360 }, { "epoch": 1.0248464250279707, "grad_norm": 0.2623988687992096, "learning_rate": 5.6539459780445886e-06, "loss": 0.0086, "step": 121370 }, { "epoch": 1.0249308648751345, "grad_norm": 0.9466577768325806, "learning_rate": 5.653215418751023e-06, "loss": 0.0077, "step": 121380 }, { "epoch": 1.0250153047222985, "grad_norm": 0.10376111418008804, "learning_rate": 5.65248484526975e-06, "loss": 0.0073, "step": 121390 }, { "epoch": 1.0250997445694623, "grad_norm": 0.29015594720840454, "learning_rate": 5.651754257616639e-06, "loss": 0.0113, "step": 121400 }, { "epoch": 1.0251841844166263, "grad_norm": 0.35430946946144104, "learning_rate": 5.651023655807563e-06, "loss": 0.007, "step": 121410 }, { "epoch": 1.02526862426379, "grad_norm": 0.26102709770202637, "learning_rate": 5.650293039858383e-06, "loss": 0.0043, "step": 121420 }, { "epoch": 1.025353064110954, "grad_norm": 0.07316302508115768, "learning_rate": 5.649562409784973e-06, "loss": 0.0117, "step": 121430 }, { "epoch": 1.0254375039581178, "grad_norm": 0.42598506808280945, "learning_rate": 5.6488317656032e-06, "loss": 0.0109, "step": 121440 }, { "epoch": 1.0255219438052816, "grad_norm": 0.5158089399337769, "learning_rate": 5.648101107328936e-06, "loss": 0.0051, "step": 121450 }, { "epoch": 1.0256063836524456, "grad_norm": 0.42144453525543213, "learning_rate": 5.6473704349780465e-06, "loss": 0.0165, "step": 121460 }, { "epoch": 1.0256908234996094, "grad_norm": 0.08009283989667892, "learning_rate": 5.646639748566405e-06, "loss": 0.0065, "step": 121470 }, { "epoch": 1.0257752633467734, "grad_norm": 0.40825214982032776, "learning_rate": 5.64590904810988e-06, "loss": 0.0097, "step": 121480 }, { "epoch": 1.0258597031939372, "grad_norm": 0.5513325929641724, "learning_rate": 5.645178333624343e-06, "loss": 0.0067, "step": 121490 }, { "epoch": 1.0259441430411012, "grad_norm": 0.11431463807821274, "learning_rate": 5.644447605125666e-06, "loss": 0.008, "step": 121500 }, { "epoch": 1.026028582888265, "grad_norm": 0.34980303049087524, "learning_rate": 5.643716862629717e-06, "loss": 0.0136, "step": 121510 }, { "epoch": 1.026113022735429, "grad_norm": 0.14984382688999176, "learning_rate": 5.6429861061523704e-06, "loss": 0.0043, "step": 121520 }, { "epoch": 1.0261974625825927, "grad_norm": 0.37970271706581116, "learning_rate": 5.6422553357094964e-06, "loss": 0.0073, "step": 121530 }, { "epoch": 1.0262819024297567, "grad_norm": 0.5116836428642273, "learning_rate": 5.641524551316971e-06, "loss": 0.0085, "step": 121540 }, { "epoch": 1.0263663422769205, "grad_norm": 0.24255657196044922, "learning_rate": 5.6407937529906606e-06, "loss": 0.0068, "step": 121550 }, { "epoch": 1.0264507821240842, "grad_norm": 0.5952017903327942, "learning_rate": 5.640062940746444e-06, "loss": 0.0121, "step": 121560 }, { "epoch": 1.0265352219712482, "grad_norm": 0.41267144680023193, "learning_rate": 5.63933211460019e-06, "loss": 0.0112, "step": 121570 }, { "epoch": 1.026619661818412, "grad_norm": 0.48799970746040344, "learning_rate": 5.638601274567773e-06, "loss": 0.012, "step": 121580 }, { "epoch": 1.026704101665576, "grad_norm": 0.22599105536937714, "learning_rate": 5.637870420665069e-06, "loss": 0.009, "step": 121590 }, { "epoch": 1.0267885415127398, "grad_norm": 0.07311712205410004, "learning_rate": 5.637139552907948e-06, "loss": 0.0072, "step": 121600 }, { "epoch": 1.0268729813599038, "grad_norm": 0.4466342329978943, "learning_rate": 5.636408671312288e-06, "loss": 0.0057, "step": 121610 }, { "epoch": 1.0269574212070676, "grad_norm": 0.5781732797622681, "learning_rate": 5.635677775893962e-06, "loss": 0.0086, "step": 121620 }, { "epoch": 1.0270418610542316, "grad_norm": 0.3126606047153473, "learning_rate": 5.634946866668845e-06, "loss": 0.0082, "step": 121630 }, { "epoch": 1.0271263009013953, "grad_norm": 0.47150465846061707, "learning_rate": 5.63421594365281e-06, "loss": 0.0085, "step": 121640 }, { "epoch": 1.0272107407485593, "grad_norm": 0.11264925450086594, "learning_rate": 5.633485006861736e-06, "loss": 0.01, "step": 121650 }, { "epoch": 1.0272951805957231, "grad_norm": 0.19147846102714539, "learning_rate": 5.632754056311499e-06, "loss": 0.0122, "step": 121660 }, { "epoch": 1.027379620442887, "grad_norm": 0.2760177254676819, "learning_rate": 5.632023092017972e-06, "loss": 0.0094, "step": 121670 }, { "epoch": 1.0274640602900509, "grad_norm": 0.6149808764457703, "learning_rate": 5.631292113997033e-06, "loss": 0.0048, "step": 121680 }, { "epoch": 1.0275485001372147, "grad_norm": 0.18276378512382507, "learning_rate": 5.63056112226456e-06, "loss": 0.0109, "step": 121690 }, { "epoch": 1.0276329399843787, "grad_norm": 0.27438512444496155, "learning_rate": 5.6298301168364275e-06, "loss": 0.0052, "step": 121700 }, { "epoch": 1.0277173798315424, "grad_norm": 0.04019560664892197, "learning_rate": 5.629099097728514e-06, "loss": 0.0119, "step": 121710 }, { "epoch": 1.0278018196787064, "grad_norm": 0.2944898009300232, "learning_rate": 5.628368064956696e-06, "loss": 0.0091, "step": 121720 }, { "epoch": 1.0278862595258702, "grad_norm": 0.3804556727409363, "learning_rate": 5.627637018536853e-06, "loss": 0.0104, "step": 121730 }, { "epoch": 1.0279706993730342, "grad_norm": 0.1458982229232788, "learning_rate": 5.626905958484864e-06, "loss": 0.0108, "step": 121740 }, { "epoch": 1.028055139220198, "grad_norm": 0.2252902239561081, "learning_rate": 5.626174884816605e-06, "loss": 0.0108, "step": 121750 }, { "epoch": 1.028139579067362, "grad_norm": 0.24740318953990936, "learning_rate": 5.625443797547956e-06, "loss": 0.0068, "step": 121760 }, { "epoch": 1.0282240189145258, "grad_norm": 0.25486719608306885, "learning_rate": 5.624712696694795e-06, "loss": 0.0064, "step": 121770 }, { "epoch": 1.0283084587616897, "grad_norm": 0.234811931848526, "learning_rate": 5.623981582273004e-06, "loss": 0.006, "step": 121780 }, { "epoch": 1.0283928986088535, "grad_norm": 0.361092209815979, "learning_rate": 5.623250454298459e-06, "loss": 0.0052, "step": 121790 }, { "epoch": 1.0284773384560173, "grad_norm": 0.3531896770000458, "learning_rate": 5.622519312787042e-06, "loss": 0.0081, "step": 121800 }, { "epoch": 1.0285617783031813, "grad_norm": 0.1358303725719452, "learning_rate": 5.621788157754634e-06, "loss": 0.0041, "step": 121810 }, { "epoch": 1.028646218150345, "grad_norm": 0.16131161153316498, "learning_rate": 5.6210569892171166e-06, "loss": 0.0102, "step": 121820 }, { "epoch": 1.028730657997509, "grad_norm": 0.5505090951919556, "learning_rate": 5.620325807190365e-06, "loss": 0.0076, "step": 121830 }, { "epoch": 1.0288150978446728, "grad_norm": 0.5621902346611023, "learning_rate": 5.619594611690265e-06, "loss": 0.0092, "step": 121840 }, { "epoch": 1.0288995376918368, "grad_norm": 0.33431118726730347, "learning_rate": 5.6188634027326984e-06, "loss": 0.0054, "step": 121850 }, { "epoch": 1.0289839775390006, "grad_norm": 0.26030778884887695, "learning_rate": 5.618132180333545e-06, "loss": 0.0085, "step": 121860 }, { "epoch": 1.0290684173861646, "grad_norm": 0.5764931440353394, "learning_rate": 5.617400944508686e-06, "loss": 0.0101, "step": 121870 }, { "epoch": 1.0291528572333284, "grad_norm": 0.1885429471731186, "learning_rate": 5.6166696952740065e-06, "loss": 0.0064, "step": 121880 }, { "epoch": 1.0292372970804924, "grad_norm": 0.11758068948984146, "learning_rate": 5.615938432645387e-06, "loss": 0.015, "step": 121890 }, { "epoch": 1.0293217369276562, "grad_norm": 0.1904309242963791, "learning_rate": 5.615207156638711e-06, "loss": 0.008, "step": 121900 }, { "epoch": 1.02940617677482, "grad_norm": 0.3537720739841461, "learning_rate": 5.614475867269862e-06, "loss": 0.0092, "step": 121910 }, { "epoch": 1.029490616621984, "grad_norm": 0.6353546977043152, "learning_rate": 5.6137445645547215e-06, "loss": 0.0086, "step": 121920 }, { "epoch": 1.0295750564691477, "grad_norm": 0.6777573227882385, "learning_rate": 5.613013248509176e-06, "loss": 0.0112, "step": 121930 }, { "epoch": 1.0296594963163117, "grad_norm": 0.3907168209552765, "learning_rate": 5.61228191914911e-06, "loss": 0.0091, "step": 121940 }, { "epoch": 1.0297439361634755, "grad_norm": 0.5828613638877869, "learning_rate": 5.611550576490404e-06, "loss": 0.0085, "step": 121950 }, { "epoch": 1.0298283760106395, "grad_norm": 0.21708649396896362, "learning_rate": 5.6108192205489455e-06, "loss": 0.008, "step": 121960 }, { "epoch": 1.0299128158578033, "grad_norm": 0.306640088558197, "learning_rate": 5.610087851340617e-06, "loss": 0.0075, "step": 121970 }, { "epoch": 1.0299972557049673, "grad_norm": 0.13631348311901093, "learning_rate": 5.609356468881308e-06, "loss": 0.0047, "step": 121980 }, { "epoch": 1.030081695552131, "grad_norm": 0.06417787075042725, "learning_rate": 5.608625073186901e-06, "loss": 0.0059, "step": 121990 }, { "epoch": 1.030166135399295, "grad_norm": 0.542443037033081, "learning_rate": 5.607893664273281e-06, "loss": 0.0089, "step": 122000 }, { "epoch": 1.030166135399295, "eval_loss": 0.00824071653187275, "eval_runtime": 3.0788, "eval_samples_per_second": 64.96, "eval_steps_per_second": 32.48, "step": 122000 }, { "epoch": 1.0302505752464588, "grad_norm": 0.37915849685668945, "learning_rate": 5.6071622421563355e-06, "loss": 0.0116, "step": 122010 }, { "epoch": 1.0303350150936226, "grad_norm": 0.24329346418380737, "learning_rate": 5.606430806851951e-06, "loss": 0.0047, "step": 122020 }, { "epoch": 1.0304194549407866, "grad_norm": 0.2642771303653717, "learning_rate": 5.605699358376013e-06, "loss": 0.0077, "step": 122030 }, { "epoch": 1.0305038947879503, "grad_norm": 0.0133597357198596, "learning_rate": 5.604967896744409e-06, "loss": 0.0089, "step": 122040 }, { "epoch": 1.0305883346351143, "grad_norm": 0.3637317717075348, "learning_rate": 5.604236421973026e-06, "loss": 0.006, "step": 122050 }, { "epoch": 1.0306727744822781, "grad_norm": 0.6165900826454163, "learning_rate": 5.603504934077753e-06, "loss": 0.0101, "step": 122060 }, { "epoch": 1.0307572143294421, "grad_norm": 0.5983216166496277, "learning_rate": 5.602773433074477e-06, "loss": 0.0067, "step": 122070 }, { "epoch": 1.030841654176606, "grad_norm": 0.40052807331085205, "learning_rate": 5.602041918979083e-06, "loss": 0.0076, "step": 122080 }, { "epoch": 1.03092609402377, "grad_norm": 0.25365909934043884, "learning_rate": 5.601310391807464e-06, "loss": 0.011, "step": 122090 }, { "epoch": 1.0310105338709337, "grad_norm": 0.17564454674720764, "learning_rate": 5.6005788515755045e-06, "loss": 0.005, "step": 122100 }, { "epoch": 1.0310949737180977, "grad_norm": 0.39958733320236206, "learning_rate": 5.599847298299098e-06, "loss": 0.0119, "step": 122110 }, { "epoch": 1.0311794135652614, "grad_norm": 0.24311962723731995, "learning_rate": 5.599115731994129e-06, "loss": 0.0026, "step": 122120 }, { "epoch": 1.0312638534124252, "grad_norm": 0.5665916800498962, "learning_rate": 5.598384152676489e-06, "loss": 0.0112, "step": 122130 }, { "epoch": 1.0313482932595892, "grad_norm": 0.4384024441242218, "learning_rate": 5.59765256036207e-06, "loss": 0.0142, "step": 122140 }, { "epoch": 1.031432733106753, "grad_norm": 0.4140993356704712, "learning_rate": 5.596920955066758e-06, "loss": 0.0076, "step": 122150 }, { "epoch": 1.031517172953917, "grad_norm": 0.1939525008201599, "learning_rate": 5.596189336806446e-06, "loss": 0.0039, "step": 122160 }, { "epoch": 1.0316016128010808, "grad_norm": 0.002396015450358391, "learning_rate": 5.595457705597023e-06, "loss": 0.0049, "step": 122170 }, { "epoch": 1.0316860526482448, "grad_norm": 0.29483696818351746, "learning_rate": 5.594726061454382e-06, "loss": 0.0078, "step": 122180 }, { "epoch": 1.0317704924954085, "grad_norm": 0.2731517553329468, "learning_rate": 5.593994404394413e-06, "loss": 0.0159, "step": 122190 }, { "epoch": 1.0318549323425725, "grad_norm": 0.06404633074998856, "learning_rate": 5.593262734433005e-06, "loss": 0.007, "step": 122200 }, { "epoch": 1.0319393721897363, "grad_norm": 0.2549768388271332, "learning_rate": 5.592531051586053e-06, "loss": 0.0061, "step": 122210 }, { "epoch": 1.0320238120369003, "grad_norm": 0.09029047936201096, "learning_rate": 5.591799355869449e-06, "loss": 0.0105, "step": 122220 }, { "epoch": 1.032108251884064, "grad_norm": 0.020961439236998558, "learning_rate": 5.591067647299083e-06, "loss": 0.0047, "step": 122230 }, { "epoch": 1.032192691731228, "grad_norm": 0.14633554220199585, "learning_rate": 5.590335925890849e-06, "loss": 0.0097, "step": 122240 }, { "epoch": 1.0322771315783918, "grad_norm": 0.6680298447608948, "learning_rate": 5.58960419166064e-06, "loss": 0.0124, "step": 122250 }, { "epoch": 1.0323615714255556, "grad_norm": 0.33021917939186096, "learning_rate": 5.588872444624348e-06, "loss": 0.0161, "step": 122260 }, { "epoch": 1.0324460112727196, "grad_norm": 0.04992092400789261, "learning_rate": 5.588140684797869e-06, "loss": 0.0065, "step": 122270 }, { "epoch": 1.0325304511198834, "grad_norm": 0.24425122141838074, "learning_rate": 5.587408912197093e-06, "loss": 0.0069, "step": 122280 }, { "epoch": 1.0326148909670474, "grad_norm": 0.24134552478790283, "learning_rate": 5.586677126837916e-06, "loss": 0.0054, "step": 122290 }, { "epoch": 1.0326993308142112, "grad_norm": 0.16712963581085205, "learning_rate": 5.585945328736233e-06, "loss": 0.0097, "step": 122300 }, { "epoch": 1.0327837706613752, "grad_norm": 0.20565883815288544, "learning_rate": 5.585213517907938e-06, "loss": 0.0051, "step": 122310 }, { "epoch": 1.032868210508539, "grad_norm": 0.28071674704551697, "learning_rate": 5.584481694368924e-06, "loss": 0.0043, "step": 122320 }, { "epoch": 1.032952650355703, "grad_norm": 0.01419959869235754, "learning_rate": 5.583749858135088e-06, "loss": 0.0069, "step": 122330 }, { "epoch": 1.0330370902028667, "grad_norm": 0.09659633785486221, "learning_rate": 5.583018009222324e-06, "loss": 0.0058, "step": 122340 }, { "epoch": 1.0331215300500307, "grad_norm": 0.0022102196235209703, "learning_rate": 5.58228614764653e-06, "loss": 0.0057, "step": 122350 }, { "epoch": 1.0332059698971945, "grad_norm": 0.47726500034332275, "learning_rate": 5.581554273423598e-06, "loss": 0.0135, "step": 122360 }, { "epoch": 1.0332904097443583, "grad_norm": 0.20801997184753418, "learning_rate": 5.580822386569427e-06, "loss": 0.0043, "step": 122370 }, { "epoch": 1.0333748495915223, "grad_norm": 0.6109471321105957, "learning_rate": 5.580090487099913e-06, "loss": 0.0125, "step": 122380 }, { "epoch": 1.033459289438686, "grad_norm": 0.12457927316427231, "learning_rate": 5.579358575030953e-06, "loss": 0.0137, "step": 122390 }, { "epoch": 1.03354372928585, "grad_norm": 0.07107671350240707, "learning_rate": 5.578626650378443e-06, "loss": 0.0142, "step": 122400 }, { "epoch": 1.0336281691330138, "grad_norm": 0.038641307502985, "learning_rate": 5.577894713158282e-06, "loss": 0.0108, "step": 122410 }, { "epoch": 1.0337126089801778, "grad_norm": 0.18297551572322845, "learning_rate": 5.577162763386364e-06, "loss": 0.0053, "step": 122420 }, { "epoch": 1.0337970488273416, "grad_norm": 0.2680034041404724, "learning_rate": 5.57643080107859e-06, "loss": 0.0066, "step": 122430 }, { "epoch": 1.0338814886745056, "grad_norm": 0.27848508954048157, "learning_rate": 5.5756988262508586e-06, "loss": 0.0084, "step": 122440 }, { "epoch": 1.0339659285216694, "grad_norm": 0.6310656666755676, "learning_rate": 5.574966838919065e-06, "loss": 0.0058, "step": 122450 }, { "epoch": 1.0340503683688334, "grad_norm": 0.44446682929992676, "learning_rate": 5.5742348390991086e-06, "loss": 0.0056, "step": 122460 }, { "epoch": 1.0341348082159971, "grad_norm": 0.1580418348312378, "learning_rate": 5.573502826806891e-06, "loss": 0.0068, "step": 122470 }, { "epoch": 1.034219248063161, "grad_norm": 0.19178876280784607, "learning_rate": 5.572770802058308e-06, "loss": 0.009, "step": 122480 }, { "epoch": 1.034303687910325, "grad_norm": 0.13287176191806793, "learning_rate": 5.572038764869262e-06, "loss": 0.0072, "step": 122490 }, { "epoch": 1.0343881277574887, "grad_norm": 0.22014300525188446, "learning_rate": 5.57130671525565e-06, "loss": 0.0055, "step": 122500 }, { "epoch": 1.0344725676046527, "grad_norm": 0.2638593912124634, "learning_rate": 5.570574653233375e-06, "loss": 0.0048, "step": 122510 }, { "epoch": 1.0345570074518164, "grad_norm": 0.16922567784786224, "learning_rate": 5.569842578818334e-06, "loss": 0.0078, "step": 122520 }, { "epoch": 1.0346414472989804, "grad_norm": 0.6876919865608215, "learning_rate": 5.569110492026429e-06, "loss": 0.0119, "step": 122530 }, { "epoch": 1.0347258871461442, "grad_norm": 0.10626549273729324, "learning_rate": 5.5683783928735615e-06, "loss": 0.0085, "step": 122540 }, { "epoch": 1.0348103269933082, "grad_norm": 0.6230131983757019, "learning_rate": 5.567646281375631e-06, "loss": 0.0077, "step": 122550 }, { "epoch": 1.034894766840472, "grad_norm": 0.5938384532928467, "learning_rate": 5.566914157548541e-06, "loss": 0.0186, "step": 122560 }, { "epoch": 1.034979206687636, "grad_norm": 0.26627933979034424, "learning_rate": 5.5661820214081895e-06, "loss": 0.008, "step": 122570 }, { "epoch": 1.0350636465347998, "grad_norm": 0.3490466773509979, "learning_rate": 5.565449872970481e-06, "loss": 0.0085, "step": 122580 }, { "epoch": 1.0351480863819638, "grad_norm": 0.2838904857635498, "learning_rate": 5.564717712251318e-06, "loss": 0.0048, "step": 122590 }, { "epoch": 1.0352325262291275, "grad_norm": 0.21032127737998962, "learning_rate": 5.563985539266602e-06, "loss": 0.0053, "step": 122600 }, { "epoch": 1.0353169660762913, "grad_norm": 0.2703399062156677, "learning_rate": 5.563253354032235e-06, "loss": 0.0062, "step": 122610 }, { "epoch": 1.0354014059234553, "grad_norm": 0.6413044929504395, "learning_rate": 5.562521156564121e-06, "loss": 0.0171, "step": 122620 }, { "epoch": 1.035485845770619, "grad_norm": 0.08630356192588806, "learning_rate": 5.561788946878164e-06, "loss": 0.0093, "step": 122630 }, { "epoch": 1.035570285617783, "grad_norm": 0.3024037182331085, "learning_rate": 5.561056724990264e-06, "loss": 0.011, "step": 122640 }, { "epoch": 1.0356547254649469, "grad_norm": 0.4062208831310272, "learning_rate": 5.560324490916329e-06, "loss": 0.0118, "step": 122650 }, { "epoch": 1.0357391653121109, "grad_norm": 0.2308102250099182, "learning_rate": 5.559592244672258e-06, "loss": 0.0045, "step": 122660 }, { "epoch": 1.0358236051592746, "grad_norm": 1.3726602792739868, "learning_rate": 5.55885998627396e-06, "loss": 0.0061, "step": 122670 }, { "epoch": 1.0359080450064386, "grad_norm": 0.6328408122062683, "learning_rate": 5.558127715737338e-06, "loss": 0.0111, "step": 122680 }, { "epoch": 1.0359924848536024, "grad_norm": 0.20019204914569855, "learning_rate": 5.5573954330782965e-06, "loss": 0.0087, "step": 122690 }, { "epoch": 1.0360769247007662, "grad_norm": 0.2807539403438568, "learning_rate": 5.5566631383127385e-06, "loss": 0.0147, "step": 122700 }, { "epoch": 1.0361613645479302, "grad_norm": 0.1985677033662796, "learning_rate": 5.555930831456573e-06, "loss": 0.0068, "step": 122710 }, { "epoch": 1.036245804395094, "grad_norm": 0.25644078850746155, "learning_rate": 5.555198512525704e-06, "loss": 0.0072, "step": 122720 }, { "epoch": 1.036330244242258, "grad_norm": 0.17603187263011932, "learning_rate": 5.5544661815360354e-06, "loss": 0.0133, "step": 122730 }, { "epoch": 1.0364146840894217, "grad_norm": 0.04034702479839325, "learning_rate": 5.5537338385034755e-06, "loss": 0.0086, "step": 122740 }, { "epoch": 1.0364991239365857, "grad_norm": 0.5189946293830872, "learning_rate": 5.55300148344393e-06, "loss": 0.0069, "step": 122750 }, { "epoch": 1.0365835637837495, "grad_norm": 0.7422388792037964, "learning_rate": 5.5522691163733064e-06, "loss": 0.0089, "step": 122760 }, { "epoch": 1.0366680036309135, "grad_norm": 0.32849419116973877, "learning_rate": 5.551536737307511e-06, "loss": 0.0085, "step": 122770 }, { "epoch": 1.0367524434780773, "grad_norm": 0.042061783373355865, "learning_rate": 5.55080434626245e-06, "loss": 0.006, "step": 122780 }, { "epoch": 1.0368368833252413, "grad_norm": 0.014243385754525661, "learning_rate": 5.550071943254032e-06, "loss": 0.0064, "step": 122790 }, { "epoch": 1.036921323172405, "grad_norm": 0.2963848412036896, "learning_rate": 5.549339528298163e-06, "loss": 0.0078, "step": 122800 }, { "epoch": 1.037005763019569, "grad_norm": 0.02360871434211731, "learning_rate": 5.548607101410753e-06, "loss": 0.0052, "step": 122810 }, { "epoch": 1.0370902028667328, "grad_norm": 0.21782056987285614, "learning_rate": 5.5478746626077085e-06, "loss": 0.0145, "step": 122820 }, { "epoch": 1.0371746427138966, "grad_norm": 0.12611372768878937, "learning_rate": 5.547142211904939e-06, "loss": 0.0048, "step": 122830 }, { "epoch": 1.0372590825610606, "grad_norm": 0.45713141560554504, "learning_rate": 5.546409749318354e-06, "loss": 0.0063, "step": 122840 }, { "epoch": 1.0373435224082244, "grad_norm": 0.12426844239234924, "learning_rate": 5.545677274863859e-06, "loss": 0.0093, "step": 122850 }, { "epoch": 1.0374279622553884, "grad_norm": 0.4126318693161011, "learning_rate": 5.544944788557366e-06, "loss": 0.0056, "step": 122860 }, { "epoch": 1.0375124021025521, "grad_norm": 0.2721317708492279, "learning_rate": 5.544212290414783e-06, "loss": 0.0071, "step": 122870 }, { "epoch": 1.0375968419497161, "grad_norm": 0.17174670100212097, "learning_rate": 5.543479780452024e-06, "loss": 0.0064, "step": 122880 }, { "epoch": 1.03768128179688, "grad_norm": 0.4239267110824585, "learning_rate": 5.542747258684993e-06, "loss": 0.0088, "step": 122890 }, { "epoch": 1.037765721644044, "grad_norm": 0.23192498087882996, "learning_rate": 5.542014725129603e-06, "loss": 0.0037, "step": 122900 }, { "epoch": 1.0378501614912077, "grad_norm": 0.1688176989555359, "learning_rate": 5.541282179801764e-06, "loss": 0.0076, "step": 122910 }, { "epoch": 1.0379346013383717, "grad_norm": 0.004336785990744829, "learning_rate": 5.540549622717388e-06, "loss": 0.0062, "step": 122920 }, { "epoch": 1.0380190411855355, "grad_norm": 0.1723889410495758, "learning_rate": 5.539817053892385e-06, "loss": 0.0061, "step": 122930 }, { "epoch": 1.0381034810326992, "grad_norm": 0.15208493173122406, "learning_rate": 5.539084473342664e-06, "loss": 0.0048, "step": 122940 }, { "epoch": 1.0381879208798632, "grad_norm": 0.15073755383491516, "learning_rate": 5.53835188108414e-06, "loss": 0.0074, "step": 122950 }, { "epoch": 1.038272360727027, "grad_norm": 0.6640814542770386, "learning_rate": 5.537619277132724e-06, "loss": 0.012, "step": 122960 }, { "epoch": 1.038356800574191, "grad_norm": 0.22353048622608185, "learning_rate": 5.536886661504327e-06, "loss": 0.0136, "step": 122970 }, { "epoch": 1.0384412404213548, "grad_norm": 0.4835212826728821, "learning_rate": 5.536154034214861e-06, "loss": 0.0071, "step": 122980 }, { "epoch": 1.0385256802685188, "grad_norm": 0.4968363642692566, "learning_rate": 5.53542139528024e-06, "loss": 0.008, "step": 122990 }, { "epoch": 1.0386101201156825, "grad_norm": 0.0800437182188034, "learning_rate": 5.534688744716377e-06, "loss": 0.0078, "step": 123000 }, { "epoch": 1.0386945599628465, "grad_norm": 0.4000222980976105, "learning_rate": 5.5339560825391835e-06, "loss": 0.0079, "step": 123010 }, { "epoch": 1.0387789998100103, "grad_norm": 0.23064962029457092, "learning_rate": 5.5332234087645735e-06, "loss": 0.004, "step": 123020 }, { "epoch": 1.0388634396571743, "grad_norm": 0.12301883846521378, "learning_rate": 5.532490723408459e-06, "loss": 0.0065, "step": 123030 }, { "epoch": 1.038947879504338, "grad_norm": 0.34055495262145996, "learning_rate": 5.531758026486757e-06, "loss": 0.0076, "step": 123040 }, { "epoch": 1.0390323193515019, "grad_norm": 0.29626166820526123, "learning_rate": 5.5310253180153785e-06, "loss": 0.0101, "step": 123050 }, { "epoch": 1.0391167591986659, "grad_norm": 0.5288642644882202, "learning_rate": 5.530292598010239e-06, "loss": 0.0057, "step": 123060 }, { "epoch": 1.0392011990458296, "grad_norm": 0.13327498733997345, "learning_rate": 5.529559866487252e-06, "loss": 0.0058, "step": 123070 }, { "epoch": 1.0392856388929936, "grad_norm": 0.29514607787132263, "learning_rate": 5.528827123462335e-06, "loss": 0.0097, "step": 123080 }, { "epoch": 1.0393700787401574, "grad_norm": 0.23460954427719116, "learning_rate": 5.528094368951399e-06, "loss": 0.0062, "step": 123090 }, { "epoch": 1.0394545185873214, "grad_norm": 0.211610347032547, "learning_rate": 5.527361602970363e-06, "loss": 0.0073, "step": 123100 }, { "epoch": 1.0395389584344852, "grad_norm": 0.530876100063324, "learning_rate": 5.526628825535141e-06, "loss": 0.0084, "step": 123110 }, { "epoch": 1.0396233982816492, "grad_norm": 0.21354088187217712, "learning_rate": 5.525896036661649e-06, "loss": 0.0055, "step": 123120 }, { "epoch": 1.039707838128813, "grad_norm": 0.28894099593162537, "learning_rate": 5.525163236365802e-06, "loss": 0.0085, "step": 123130 }, { "epoch": 1.039792277975977, "grad_norm": 0.1517752707004547, "learning_rate": 5.524430424663518e-06, "loss": 0.0056, "step": 123140 }, { "epoch": 1.0398767178231407, "grad_norm": 0.23017296195030212, "learning_rate": 5.523697601570712e-06, "loss": 0.0069, "step": 123150 }, { "epoch": 1.0399611576703047, "grad_norm": 0.059538453817367554, "learning_rate": 5.522964767103302e-06, "loss": 0.0081, "step": 123160 }, { "epoch": 1.0400455975174685, "grad_norm": 0.2920083701610565, "learning_rate": 5.5222319212772046e-06, "loss": 0.0097, "step": 123170 }, { "epoch": 1.0401300373646323, "grad_norm": 0.3202478885650635, "learning_rate": 5.521499064108337e-06, "loss": 0.0069, "step": 123180 }, { "epoch": 1.0402144772117963, "grad_norm": 0.20270493626594543, "learning_rate": 5.520766195612615e-06, "loss": 0.0047, "step": 123190 }, { "epoch": 1.04029891705896, "grad_norm": 0.4440656006336212, "learning_rate": 5.520033315805959e-06, "loss": 0.0083, "step": 123200 }, { "epoch": 1.040383356906124, "grad_norm": 0.23225726187229156, "learning_rate": 5.5193004247042855e-06, "loss": 0.0072, "step": 123210 }, { "epoch": 1.0404677967532878, "grad_norm": 0.03201013803482056, "learning_rate": 5.518567522323514e-06, "loss": 0.0085, "step": 123220 }, { "epoch": 1.0405522366004518, "grad_norm": 0.4149024784564972, "learning_rate": 5.517834608679561e-06, "loss": 0.0134, "step": 123230 }, { "epoch": 1.0406366764476156, "grad_norm": 0.9697461724281311, "learning_rate": 5.517101683788348e-06, "loss": 0.0132, "step": 123240 }, { "epoch": 1.0407211162947796, "grad_norm": 0.2977733910083771, "learning_rate": 5.516368747665792e-06, "loss": 0.0069, "step": 123250 }, { "epoch": 1.0408055561419434, "grad_norm": 0.2804575562477112, "learning_rate": 5.515635800327812e-06, "loss": 0.0096, "step": 123260 }, { "epoch": 1.0408899959891074, "grad_norm": 0.22050771117210388, "learning_rate": 5.514902841790328e-06, "loss": 0.0081, "step": 123270 }, { "epoch": 1.0409744358362711, "grad_norm": 0.5119603276252747, "learning_rate": 5.514169872069261e-06, "loss": 0.0057, "step": 123280 }, { "epoch": 1.041058875683435, "grad_norm": 0.2140996903181076, "learning_rate": 5.5134368911805295e-06, "loss": 0.0139, "step": 123290 }, { "epoch": 1.041143315530599, "grad_norm": 0.3106233477592468, "learning_rate": 5.512703899140053e-06, "loss": 0.006, "step": 123300 }, { "epoch": 1.0412277553777627, "grad_norm": 0.5017881393432617, "learning_rate": 5.511970895963752e-06, "loss": 0.0099, "step": 123310 }, { "epoch": 1.0413121952249267, "grad_norm": 0.2693482041358948, "learning_rate": 5.5112378816675496e-06, "loss": 0.0097, "step": 123320 }, { "epoch": 1.0413966350720905, "grad_norm": 0.5566788911819458, "learning_rate": 5.510504856267365e-06, "loss": 0.0078, "step": 123330 }, { "epoch": 1.0414810749192545, "grad_norm": 0.9977454543113708, "learning_rate": 5.509771819779118e-06, "loss": 0.0135, "step": 123340 }, { "epoch": 1.0415655147664182, "grad_norm": 0.12776590883731842, "learning_rate": 5.509038772218733e-06, "loss": 0.0079, "step": 123350 }, { "epoch": 1.0416499546135822, "grad_norm": 0.20524567365646362, "learning_rate": 5.508305713602131e-06, "loss": 0.0079, "step": 123360 }, { "epoch": 1.041734394460746, "grad_norm": 0.3710850775241852, "learning_rate": 5.507572643945232e-06, "loss": 0.0063, "step": 123370 }, { "epoch": 1.04181883430791, "grad_norm": 0.026862788945436478, "learning_rate": 5.506839563263959e-06, "loss": 0.0116, "step": 123380 }, { "epoch": 1.0419032741550738, "grad_norm": 0.3720870018005371, "learning_rate": 5.506106471574235e-06, "loss": 0.0094, "step": 123390 }, { "epoch": 1.0419877140022376, "grad_norm": 0.20994892716407776, "learning_rate": 5.505373368891982e-06, "loss": 0.0052, "step": 123400 }, { "epoch": 1.0420721538494016, "grad_norm": 0.02623041719198227, "learning_rate": 5.504640255233124e-06, "loss": 0.0066, "step": 123410 }, { "epoch": 1.0421565936965653, "grad_norm": 0.23224815726280212, "learning_rate": 5.5039071306135815e-06, "loss": 0.0096, "step": 123420 }, { "epoch": 1.0422410335437293, "grad_norm": 0.21745215356349945, "learning_rate": 5.503173995049279e-06, "loss": 0.0112, "step": 123430 }, { "epoch": 1.042325473390893, "grad_norm": 0.2158960998058319, "learning_rate": 5.502440848556141e-06, "loss": 0.0039, "step": 123440 }, { "epoch": 1.042409913238057, "grad_norm": 0.2592749297618866, "learning_rate": 5.501707691150092e-06, "loss": 0.0085, "step": 123450 }, { "epoch": 1.0424943530852209, "grad_norm": 0.3609877824783325, "learning_rate": 5.500974522847053e-06, "loss": 0.0076, "step": 123460 }, { "epoch": 1.0425787929323849, "grad_norm": 0.28600943088531494, "learning_rate": 5.500241343662951e-06, "loss": 0.0038, "step": 123470 }, { "epoch": 1.0426632327795486, "grad_norm": 0.4554922580718994, "learning_rate": 5.499508153613711e-06, "loss": 0.0049, "step": 123480 }, { "epoch": 1.0427476726267126, "grad_norm": 0.30459079146385193, "learning_rate": 5.4987749527152545e-06, "loss": 0.0049, "step": 123490 }, { "epoch": 1.0428321124738764, "grad_norm": 0.36774131655693054, "learning_rate": 5.4980417409835085e-06, "loss": 0.006, "step": 123500 }, { "epoch": 1.0429165523210404, "grad_norm": 0.18773305416107178, "learning_rate": 5.497308518434398e-06, "loss": 0.0083, "step": 123510 }, { "epoch": 1.0430009921682042, "grad_norm": 0.2955644428730011, "learning_rate": 5.496575285083847e-06, "loss": 0.0038, "step": 123520 }, { "epoch": 1.043085432015368, "grad_norm": 0.2641494870185852, "learning_rate": 5.495842040947784e-06, "loss": 0.0078, "step": 123530 }, { "epoch": 1.043169871862532, "grad_norm": 0.04410334676504135, "learning_rate": 5.4951087860421336e-06, "loss": 0.0045, "step": 123540 }, { "epoch": 1.0432543117096957, "grad_norm": 0.309318482875824, "learning_rate": 5.494375520382821e-06, "loss": 0.0088, "step": 123550 }, { "epoch": 1.0433387515568597, "grad_norm": 0.36142846941947937, "learning_rate": 5.493642243985774e-06, "loss": 0.0095, "step": 123560 }, { "epoch": 1.0434231914040235, "grad_norm": 0.2326018512248993, "learning_rate": 5.492908956866919e-06, "loss": 0.0072, "step": 123570 }, { "epoch": 1.0435076312511875, "grad_norm": 0.06398294121026993, "learning_rate": 5.492175659042182e-06, "loss": 0.0066, "step": 123580 }, { "epoch": 1.0435920710983513, "grad_norm": 0.012549548409879208, "learning_rate": 5.491442350527489e-06, "loss": 0.0084, "step": 123590 }, { "epoch": 1.0436765109455153, "grad_norm": 0.27790093421936035, "learning_rate": 5.490709031338771e-06, "loss": 0.0141, "step": 123600 }, { "epoch": 1.043760950792679, "grad_norm": 0.03457944467663765, "learning_rate": 5.489975701491953e-06, "loss": 0.0048, "step": 123610 }, { "epoch": 1.0438453906398428, "grad_norm": 0.4772583544254303, "learning_rate": 5.4892423610029635e-06, "loss": 0.0102, "step": 123620 }, { "epoch": 1.0439298304870068, "grad_norm": 0.2662844657897949, "learning_rate": 5.4885090098877295e-06, "loss": 0.0086, "step": 123630 }, { "epoch": 1.0440142703341706, "grad_norm": 0.18696853518486023, "learning_rate": 5.487775648162179e-06, "loss": 0.0048, "step": 123640 }, { "epoch": 1.0440987101813346, "grad_norm": 0.21148888766765594, "learning_rate": 5.487042275842243e-06, "loss": 0.0069, "step": 123650 }, { "epoch": 1.0441831500284984, "grad_norm": 0.10631844401359558, "learning_rate": 5.486308892943848e-06, "loss": 0.004, "step": 123660 }, { "epoch": 1.0442675898756624, "grad_norm": 0.6310610175132751, "learning_rate": 5.485575499482924e-06, "loss": 0.0108, "step": 123670 }, { "epoch": 1.0443520297228261, "grad_norm": 0.05281950160861015, "learning_rate": 5.4848420954753976e-06, "loss": 0.0084, "step": 123680 }, { "epoch": 1.0444364695699901, "grad_norm": 0.2232089340686798, "learning_rate": 5.484108680937204e-06, "loss": 0.0063, "step": 123690 }, { "epoch": 1.044520909417154, "grad_norm": 0.5656384825706482, "learning_rate": 5.4833752558842656e-06, "loss": 0.0072, "step": 123700 }, { "epoch": 1.044605349264318, "grad_norm": 0.27007856965065, "learning_rate": 5.4826418203325175e-06, "loss": 0.0087, "step": 123710 }, { "epoch": 1.0446897891114817, "grad_norm": 0.9570157527923584, "learning_rate": 5.481908374297887e-06, "loss": 0.0114, "step": 123720 }, { "epoch": 1.0447742289586457, "grad_norm": 0.20386813580989838, "learning_rate": 5.4811749177963055e-06, "loss": 0.0089, "step": 123730 }, { "epoch": 1.0448586688058095, "grad_norm": 0.2912225127220154, "learning_rate": 5.480441450843704e-06, "loss": 0.0153, "step": 123740 }, { "epoch": 1.0449431086529732, "grad_norm": 0.2876770496368408, "learning_rate": 5.479707973456012e-06, "loss": 0.0038, "step": 123750 }, { "epoch": 1.0450275485001372, "grad_norm": 0.15917623043060303, "learning_rate": 5.478974485649161e-06, "loss": 0.0078, "step": 123760 }, { "epoch": 1.045111988347301, "grad_norm": 0.545594334602356, "learning_rate": 5.478240987439083e-06, "loss": 0.0095, "step": 123770 }, { "epoch": 1.045196428194465, "grad_norm": 0.31804636120796204, "learning_rate": 5.477507478841709e-06, "loss": 0.0075, "step": 123780 }, { "epoch": 1.0452808680416288, "grad_norm": 0.5586056113243103, "learning_rate": 5.47677395987297e-06, "loss": 0.006, "step": 123790 }, { "epoch": 1.0453653078887928, "grad_norm": 0.6171161532402039, "learning_rate": 5.476040430548797e-06, "loss": 0.0115, "step": 123800 }, { "epoch": 1.0454497477359566, "grad_norm": 0.3943711221218109, "learning_rate": 5.475306890885126e-06, "loss": 0.0042, "step": 123810 }, { "epoch": 1.0455341875831206, "grad_norm": 0.31898894906044006, "learning_rate": 5.474573340897884e-06, "loss": 0.0102, "step": 123820 }, { "epoch": 1.0456186274302843, "grad_norm": 0.12183146178722382, "learning_rate": 5.4738397806030086e-06, "loss": 0.0036, "step": 123830 }, { "epoch": 1.0457030672774483, "grad_norm": 0.6396414041519165, "learning_rate": 5.473106210016429e-06, "loss": 0.0088, "step": 123840 }, { "epoch": 1.045787507124612, "grad_norm": 0.239518404006958, "learning_rate": 5.472372629154079e-06, "loss": 0.0056, "step": 123850 }, { "epoch": 1.0458719469717759, "grad_norm": 0.30988389253616333, "learning_rate": 5.471639038031894e-06, "loss": 0.0076, "step": 123860 }, { "epoch": 1.0459563868189399, "grad_norm": 0.10047536343336105, "learning_rate": 5.4709054366658046e-06, "loss": 0.0138, "step": 123870 }, { "epoch": 1.0460408266661037, "grad_norm": 0.33432456851005554, "learning_rate": 5.470171825071745e-06, "loss": 0.0101, "step": 123880 }, { "epoch": 1.0461252665132676, "grad_norm": 0.30807918310165405, "learning_rate": 5.46943820326565e-06, "loss": 0.0093, "step": 123890 }, { "epoch": 1.0462097063604314, "grad_norm": 0.42678987979888916, "learning_rate": 5.468704571263455e-06, "loss": 0.0097, "step": 123900 }, { "epoch": 1.0462941462075954, "grad_norm": 0.27331608533859253, "learning_rate": 5.4679709290810914e-06, "loss": 0.012, "step": 123910 }, { "epoch": 1.0463785860547592, "grad_norm": 1.6208999156951904, "learning_rate": 5.467237276734495e-06, "loss": 0.0086, "step": 123920 }, { "epoch": 1.0464630259019232, "grad_norm": 0.15742918848991394, "learning_rate": 5.466503614239601e-06, "loss": 0.0075, "step": 123930 }, { "epoch": 1.046547465749087, "grad_norm": 0.14630445837974548, "learning_rate": 5.465769941612344e-06, "loss": 0.0159, "step": 123940 }, { "epoch": 1.046631905596251, "grad_norm": 0.2455417662858963, "learning_rate": 5.465036258868661e-06, "loss": 0.0082, "step": 123950 }, { "epoch": 1.0467163454434147, "grad_norm": 0.20155270397663116, "learning_rate": 5.464302566024483e-06, "loss": 0.0039, "step": 123960 }, { "epoch": 1.0468007852905785, "grad_norm": 0.22451132535934448, "learning_rate": 5.463568863095749e-06, "loss": 0.009, "step": 123970 }, { "epoch": 1.0468852251377425, "grad_norm": 0.09078578650951385, "learning_rate": 5.462835150098397e-06, "loss": 0.0102, "step": 123980 }, { "epoch": 1.0469696649849063, "grad_norm": 0.13675536215305328, "learning_rate": 5.462101427048357e-06, "loss": 0.0071, "step": 123990 }, { "epoch": 1.0470541048320703, "grad_norm": 0.31566423177719116, "learning_rate": 5.46136769396157e-06, "loss": 0.0056, "step": 124000 }, { "epoch": 1.047138544679234, "grad_norm": 0.15524204075336456, "learning_rate": 5.4606339508539706e-06, "loss": 0.0071, "step": 124010 }, { "epoch": 1.047222984526398, "grad_norm": 0.42873600125312805, "learning_rate": 5.4599001977414975e-06, "loss": 0.0141, "step": 124020 }, { "epoch": 1.0473074243735618, "grad_norm": 0.5131728053092957, "learning_rate": 5.459166434640085e-06, "loss": 0.0103, "step": 124030 }, { "epoch": 1.0473918642207258, "grad_norm": 0.3475855886936188, "learning_rate": 5.458432661565672e-06, "loss": 0.01, "step": 124040 }, { "epoch": 1.0474763040678896, "grad_norm": 0.472584068775177, "learning_rate": 5.4576988785341955e-06, "loss": 0.0126, "step": 124050 }, { "epoch": 1.0475607439150536, "grad_norm": 0.4170897901058197, "learning_rate": 5.4569650855615954e-06, "loss": 0.0082, "step": 124060 }, { "epoch": 1.0476451837622174, "grad_norm": 0.6503387689590454, "learning_rate": 5.4562312826638055e-06, "loss": 0.0142, "step": 124070 }, { "epoch": 1.0477296236093814, "grad_norm": 0.15188336372375488, "learning_rate": 5.455497469856765e-06, "loss": 0.0049, "step": 124080 }, { "epoch": 1.0478140634565452, "grad_norm": 0.16247044503688812, "learning_rate": 5.454763647156413e-06, "loss": 0.0064, "step": 124090 }, { "epoch": 1.047898503303709, "grad_norm": 0.4424838721752167, "learning_rate": 5.454029814578688e-06, "loss": 0.0064, "step": 124100 }, { "epoch": 1.047982943150873, "grad_norm": 0.13998429477214813, "learning_rate": 5.4532959721395285e-06, "loss": 0.005, "step": 124110 }, { "epoch": 1.0480673829980367, "grad_norm": 0.3904265761375427, "learning_rate": 5.4525621198548735e-06, "loss": 0.0055, "step": 124120 }, { "epoch": 1.0481518228452007, "grad_norm": 0.2549152374267578, "learning_rate": 5.451828257740661e-06, "loss": 0.0053, "step": 124130 }, { "epoch": 1.0482362626923645, "grad_norm": 0.5053877234458923, "learning_rate": 5.451094385812834e-06, "loss": 0.0099, "step": 124140 }, { "epoch": 1.0483207025395285, "grad_norm": 0.6760095357894897, "learning_rate": 5.450360504087327e-06, "loss": 0.0093, "step": 124150 }, { "epoch": 1.0484051423866922, "grad_norm": 0.12400055676698685, "learning_rate": 5.449626612580082e-06, "loss": 0.003, "step": 124160 }, { "epoch": 1.0484895822338562, "grad_norm": 0.28146830201148987, "learning_rate": 5.44889271130704e-06, "loss": 0.0067, "step": 124170 }, { "epoch": 1.04857402208102, "grad_norm": 0.21239745616912842, "learning_rate": 5.44815880028414e-06, "loss": 0.0136, "step": 124180 }, { "epoch": 1.048658461928184, "grad_norm": 0.2384330928325653, "learning_rate": 5.4474248795273245e-06, "loss": 0.0041, "step": 124190 }, { "epoch": 1.0487429017753478, "grad_norm": 0.34750688076019287, "learning_rate": 5.4466909490525296e-06, "loss": 0.0087, "step": 124200 }, { "epoch": 1.0488273416225116, "grad_norm": 0.39001286029815674, "learning_rate": 5.4459570088757e-06, "loss": 0.0086, "step": 124210 }, { "epoch": 1.0489117814696756, "grad_norm": 1.0635960102081299, "learning_rate": 5.445223059012774e-06, "loss": 0.0091, "step": 124220 }, { "epoch": 1.0489962213168393, "grad_norm": 0.23342259228229523, "learning_rate": 5.444489099479697e-06, "loss": 0.0113, "step": 124230 }, { "epoch": 1.0490806611640033, "grad_norm": 0.14338825643062592, "learning_rate": 5.443755130292407e-06, "loss": 0.0149, "step": 124240 }, { "epoch": 1.0491651010111671, "grad_norm": 0.021529868245124817, "learning_rate": 5.443021151466845e-06, "loss": 0.0066, "step": 124250 }, { "epoch": 1.049249540858331, "grad_norm": 0.0017705813515931368, "learning_rate": 5.442287163018956e-06, "loss": 0.008, "step": 124260 }, { "epoch": 1.0493339807054949, "grad_norm": 0.22845378518104553, "learning_rate": 5.44155316496468e-06, "loss": 0.007, "step": 124270 }, { "epoch": 1.0494184205526589, "grad_norm": 0.30801719427108765, "learning_rate": 5.4408191573199595e-06, "loss": 0.0067, "step": 124280 }, { "epoch": 1.0495028603998227, "grad_norm": 0.2925913631916046, "learning_rate": 5.440085140100737e-06, "loss": 0.0067, "step": 124290 }, { "epoch": 1.0495873002469867, "grad_norm": 0.14436380565166473, "learning_rate": 5.439351113322957e-06, "loss": 0.0105, "step": 124300 }, { "epoch": 1.0496717400941504, "grad_norm": 1.6610963344573975, "learning_rate": 5.4386170770025605e-06, "loss": 0.0136, "step": 124310 }, { "epoch": 1.0497561799413142, "grad_norm": 0.16853538155555725, "learning_rate": 5.43788303115549e-06, "loss": 0.0042, "step": 124320 }, { "epoch": 1.0498406197884782, "grad_norm": 0.6232098937034607, "learning_rate": 5.437148975797689e-06, "loss": 0.0098, "step": 124330 }, { "epoch": 1.049925059635642, "grad_norm": 0.37137526273727417, "learning_rate": 5.436414910945103e-06, "loss": 0.01, "step": 124340 }, { "epoch": 1.050009499482806, "grad_norm": 0.8075339198112488, "learning_rate": 5.435680836613675e-06, "loss": 0.0077, "step": 124350 }, { "epoch": 1.0500939393299697, "grad_norm": 0.001213240553624928, "learning_rate": 5.434946752819348e-06, "loss": 0.0069, "step": 124360 }, { "epoch": 1.0501783791771337, "grad_norm": 0.19618956744670868, "learning_rate": 5.434212659578065e-06, "loss": 0.0111, "step": 124370 }, { "epoch": 1.0502628190242975, "grad_norm": 0.18288873136043549, "learning_rate": 5.433478556905773e-06, "loss": 0.0086, "step": 124380 }, { "epoch": 1.0503472588714615, "grad_norm": 0.3035861551761627, "learning_rate": 5.432744444818417e-06, "loss": 0.0064, "step": 124390 }, { "epoch": 1.0504316987186253, "grad_norm": 0.038676436990499496, "learning_rate": 5.43201032333194e-06, "loss": 0.0064, "step": 124400 }, { "epoch": 1.0505161385657893, "grad_norm": 0.2166108936071396, "learning_rate": 5.431276192462286e-06, "loss": 0.0089, "step": 124410 }, { "epoch": 1.050600578412953, "grad_norm": 0.42021727561950684, "learning_rate": 5.4305420522254035e-06, "loss": 0.0055, "step": 124420 }, { "epoch": 1.0506850182601168, "grad_norm": 0.6529443860054016, "learning_rate": 5.429807902637235e-06, "loss": 0.0087, "step": 124430 }, { "epoch": 1.0507694581072808, "grad_norm": 0.3095797598361969, "learning_rate": 5.4290737437137265e-06, "loss": 0.0097, "step": 124440 }, { "epoch": 1.0508538979544446, "grad_norm": 0.2827061414718628, "learning_rate": 5.4283395754708235e-06, "loss": 0.005, "step": 124450 }, { "epoch": 1.0509383378016086, "grad_norm": 0.2906537652015686, "learning_rate": 5.427605397924473e-06, "loss": 0.0054, "step": 124460 }, { "epoch": 1.0510227776487724, "grad_norm": 1.0205591917037964, "learning_rate": 5.426871211090623e-06, "loss": 0.0123, "step": 124470 }, { "epoch": 1.0511072174959364, "grad_norm": 0.11329010874032974, "learning_rate": 5.426137014985216e-06, "loss": 0.0062, "step": 124480 }, { "epoch": 1.0511916573431002, "grad_norm": 0.5306150913238525, "learning_rate": 5.4254028096242e-06, "loss": 0.0096, "step": 124490 }, { "epoch": 1.0512760971902642, "grad_norm": 0.08447951823472977, "learning_rate": 5.424668595023521e-06, "loss": 0.0054, "step": 124500 }, { "epoch": 1.051360537037428, "grad_norm": 0.9482076168060303, "learning_rate": 5.42393437119913e-06, "loss": 0.0128, "step": 124510 }, { "epoch": 1.051444976884592, "grad_norm": 0.4336390197277069, "learning_rate": 5.42320013816697e-06, "loss": 0.0043, "step": 124520 }, { "epoch": 1.0515294167317557, "grad_norm": 1.1378337144851685, "learning_rate": 5.422465895942989e-06, "loss": 0.0057, "step": 124530 }, { "epoch": 1.0516138565789195, "grad_norm": 0.386235773563385, "learning_rate": 5.421731644543137e-06, "loss": 0.0094, "step": 124540 }, { "epoch": 1.0516982964260835, "grad_norm": 0.18786175549030304, "learning_rate": 5.420997383983359e-06, "loss": 0.0113, "step": 124550 }, { "epoch": 1.0517827362732473, "grad_norm": 0.20011048018932343, "learning_rate": 5.420263114279605e-06, "loss": 0.0064, "step": 124560 }, { "epoch": 1.0518671761204113, "grad_norm": 0.16752687096595764, "learning_rate": 5.419528835447821e-06, "loss": 0.0099, "step": 124570 }, { "epoch": 1.051951615967575, "grad_norm": 0.7923988103866577, "learning_rate": 5.418794547503957e-06, "loss": 0.0172, "step": 124580 }, { "epoch": 1.052036055814739, "grad_norm": 0.5504199266433716, "learning_rate": 5.418060250463963e-06, "loss": 0.0089, "step": 124590 }, { "epoch": 1.0521204956619028, "grad_norm": 0.0644976794719696, "learning_rate": 5.4173259443437834e-06, "loss": 0.0108, "step": 124600 }, { "epoch": 1.0522049355090668, "grad_norm": 0.030621258541941643, "learning_rate": 5.416591629159371e-06, "loss": 0.0062, "step": 124610 }, { "epoch": 1.0522893753562306, "grad_norm": 0.29862403869628906, "learning_rate": 5.415857304926673e-06, "loss": 0.0108, "step": 124620 }, { "epoch": 1.0523738152033946, "grad_norm": 0.3306201994419098, "learning_rate": 5.415122971661642e-06, "loss": 0.0093, "step": 124630 }, { "epoch": 1.0524582550505583, "grad_norm": 0.2359623908996582, "learning_rate": 5.414388629380222e-06, "loss": 0.0058, "step": 124640 }, { "epoch": 1.0525426948977223, "grad_norm": 0.20541012287139893, "learning_rate": 5.413654278098367e-06, "loss": 0.0096, "step": 124650 }, { "epoch": 1.0526271347448861, "grad_norm": 0.033316828310489655, "learning_rate": 5.4129199178320265e-06, "loss": 0.0069, "step": 124660 }, { "epoch": 1.05271157459205, "grad_norm": 0.11657007783651352, "learning_rate": 5.41218554859715e-06, "loss": 0.0094, "step": 124670 }, { "epoch": 1.052796014439214, "grad_norm": 0.22283032536506653, "learning_rate": 5.411451170409688e-06, "loss": 0.0115, "step": 124680 }, { "epoch": 1.0528804542863777, "grad_norm": 1.0036929845809937, "learning_rate": 5.41071678328559e-06, "loss": 0.0126, "step": 124690 }, { "epoch": 1.0529648941335417, "grad_norm": 0.31345683336257935, "learning_rate": 5.409982387240808e-06, "loss": 0.0102, "step": 124700 }, { "epoch": 1.0530493339807054, "grad_norm": 0.1458352655172348, "learning_rate": 5.4092479822912925e-06, "loss": 0.0088, "step": 124710 }, { "epoch": 1.0531337738278694, "grad_norm": 0.21655748784542084, "learning_rate": 5.408513568452994e-06, "loss": 0.0178, "step": 124720 }, { "epoch": 1.0532182136750332, "grad_norm": 0.7443088293075562, "learning_rate": 5.407779145741866e-06, "loss": 0.0093, "step": 124730 }, { "epoch": 1.0533026535221972, "grad_norm": 0.15716734528541565, "learning_rate": 5.407044714173857e-06, "loss": 0.0109, "step": 124740 }, { "epoch": 1.053387093369361, "grad_norm": 0.0984114333987236, "learning_rate": 5.406310273764922e-06, "loss": 0.0085, "step": 124750 }, { "epoch": 1.053471533216525, "grad_norm": 0.13606446981430054, "learning_rate": 5.40557582453101e-06, "loss": 0.0077, "step": 124760 }, { "epoch": 1.0535559730636888, "grad_norm": 0.1922970712184906, "learning_rate": 5.404841366488074e-06, "loss": 0.0039, "step": 124770 }, { "epoch": 1.0536404129108525, "grad_norm": 0.45286989212036133, "learning_rate": 5.404106899652067e-06, "loss": 0.0095, "step": 124780 }, { "epoch": 1.0537248527580165, "grad_norm": 0.783566415309906, "learning_rate": 5.40337242403894e-06, "loss": 0.01, "step": 124790 }, { "epoch": 1.0538092926051803, "grad_norm": 0.3016217350959778, "learning_rate": 5.402637939664649e-06, "loss": 0.0089, "step": 124800 }, { "epoch": 1.0538937324523443, "grad_norm": 0.1618906706571579, "learning_rate": 5.401903446545142e-06, "loss": 0.0078, "step": 124810 }, { "epoch": 1.053978172299508, "grad_norm": 0.48500168323516846, "learning_rate": 5.401168944696375e-06, "loss": 0.0103, "step": 124820 }, { "epoch": 1.054062612146672, "grad_norm": 0.35811349749565125, "learning_rate": 5.400434434134301e-06, "loss": 0.007, "step": 124830 }, { "epoch": 1.0541470519938358, "grad_norm": 0.10387787967920303, "learning_rate": 5.399699914874873e-06, "loss": 0.0078, "step": 124840 }, { "epoch": 1.0542314918409998, "grad_norm": 0.29378530383110046, "learning_rate": 5.398965386934045e-06, "loss": 0.012, "step": 124850 }, { "epoch": 1.0543159316881636, "grad_norm": 0.2178533375263214, "learning_rate": 5.3982308503277706e-06, "loss": 0.0074, "step": 124860 }, { "epoch": 1.0544003715353276, "grad_norm": 0.3891032636165619, "learning_rate": 5.397496305072004e-06, "loss": 0.0084, "step": 124870 }, { "epoch": 1.0544848113824914, "grad_norm": 0.08599410206079483, "learning_rate": 5.396761751182699e-06, "loss": 0.0084, "step": 124880 }, { "epoch": 1.0545692512296552, "grad_norm": 0.2162819653749466, "learning_rate": 5.396027188675808e-06, "loss": 0.0062, "step": 124890 }, { "epoch": 1.0546536910768192, "grad_norm": 0.19064901769161224, "learning_rate": 5.395292617567291e-06, "loss": 0.0065, "step": 124900 }, { "epoch": 1.054738130923983, "grad_norm": 0.7345873117446899, "learning_rate": 5.394558037873098e-06, "loss": 0.0067, "step": 124910 }, { "epoch": 1.054822570771147, "grad_norm": 0.21150976419448853, "learning_rate": 5.393823449609185e-06, "loss": 0.0158, "step": 124920 }, { "epoch": 1.0549070106183107, "grad_norm": 0.03541354462504387, "learning_rate": 5.393088852791507e-06, "loss": 0.0085, "step": 124930 }, { "epoch": 1.0549914504654747, "grad_norm": 0.12243052572011948, "learning_rate": 5.39235424743602e-06, "loss": 0.0049, "step": 124940 }, { "epoch": 1.0550758903126385, "grad_norm": 0.23176267743110657, "learning_rate": 5.391619633558679e-06, "loss": 0.0095, "step": 124950 }, { "epoch": 1.0551603301598025, "grad_norm": 0.21319930255413055, "learning_rate": 5.390885011175441e-06, "loss": 0.0103, "step": 124960 }, { "epoch": 1.0552447700069663, "grad_norm": 0.3692367374897003, "learning_rate": 5.390150380302259e-06, "loss": 0.0073, "step": 124970 }, { "epoch": 1.0553292098541303, "grad_norm": 0.6159309148788452, "learning_rate": 5.389415740955091e-06, "loss": 0.0071, "step": 124980 }, { "epoch": 1.055413649701294, "grad_norm": 0.2758677005767822, "learning_rate": 5.388681093149892e-06, "loss": 0.011, "step": 124990 }, { "epoch": 1.055498089548458, "grad_norm": 0.2030784636735916, "learning_rate": 5.387946436902622e-06, "loss": 0.0071, "step": 125000 }, { "epoch": 1.0555825293956218, "grad_norm": 0.2570057213306427, "learning_rate": 5.387211772229234e-06, "loss": 0.0058, "step": 125010 }, { "epoch": 1.0556669692427856, "grad_norm": 0.39609745144844055, "learning_rate": 5.386477099145685e-06, "loss": 0.0143, "step": 125020 }, { "epoch": 1.0557514090899496, "grad_norm": 0.17301452159881592, "learning_rate": 5.385742417667932e-06, "loss": 0.0062, "step": 125030 }, { "epoch": 1.0558358489371134, "grad_norm": 0.4175302982330322, "learning_rate": 5.385007727811935e-06, "loss": 0.0094, "step": 125040 }, { "epoch": 1.0559202887842773, "grad_norm": 0.3378206491470337, "learning_rate": 5.384273029593646e-06, "loss": 0.0063, "step": 125050 }, { "epoch": 1.0560047286314411, "grad_norm": 0.5194893479347229, "learning_rate": 5.383538323029027e-06, "loss": 0.0086, "step": 125060 }, { "epoch": 1.0560891684786051, "grad_norm": 0.26080676913261414, "learning_rate": 5.382803608134034e-06, "loss": 0.0102, "step": 125070 }, { "epoch": 1.056173608325769, "grad_norm": 0.023045077919960022, "learning_rate": 5.382068884924626e-06, "loss": 0.0054, "step": 125080 }, { "epoch": 1.056258048172933, "grad_norm": 0.1777762472629547, "learning_rate": 5.38133415341676e-06, "loss": 0.0109, "step": 125090 }, { "epoch": 1.0563424880200967, "grad_norm": 0.5650061964988708, "learning_rate": 5.380599413626393e-06, "loss": 0.0073, "step": 125100 }, { "epoch": 1.0564269278672607, "grad_norm": 0.2279907912015915, "learning_rate": 5.379864665569486e-06, "loss": 0.0089, "step": 125110 }, { "epoch": 1.0565113677144244, "grad_norm": 0.08956508338451385, "learning_rate": 5.379129909261996e-06, "loss": 0.0081, "step": 125120 }, { "epoch": 1.0565958075615882, "grad_norm": 0.2147578001022339, "learning_rate": 5.3783951447198825e-06, "loss": 0.0066, "step": 125130 }, { "epoch": 1.0566802474087522, "grad_norm": 0.4482603669166565, "learning_rate": 5.377660371959103e-06, "loss": 0.0079, "step": 125140 }, { "epoch": 1.056764687255916, "grad_norm": 0.08472343534231186, "learning_rate": 5.376925590995618e-06, "loss": 0.0075, "step": 125150 }, { "epoch": 1.05684912710308, "grad_norm": 0.42337191104888916, "learning_rate": 5.376190801845387e-06, "loss": 0.0075, "step": 125160 }, { "epoch": 1.0569335669502438, "grad_norm": 0.10163937509059906, "learning_rate": 5.375456004524367e-06, "loss": 0.0067, "step": 125170 }, { "epoch": 1.0570180067974078, "grad_norm": 0.16270434856414795, "learning_rate": 5.374721199048521e-06, "loss": 0.0133, "step": 125180 }, { "epoch": 1.0571024466445715, "grad_norm": 0.24005316197872162, "learning_rate": 5.373986385433807e-06, "loss": 0.0052, "step": 125190 }, { "epoch": 1.0571868864917355, "grad_norm": 0.6170262694358826, "learning_rate": 5.373251563696185e-06, "loss": 0.0138, "step": 125200 }, { "epoch": 1.0572713263388993, "grad_norm": 0.9106960892677307, "learning_rate": 5.372516733851617e-06, "loss": 0.0121, "step": 125210 }, { "epoch": 1.0573557661860633, "grad_norm": 0.19919359683990479, "learning_rate": 5.37178189591606e-06, "loss": 0.0049, "step": 125220 }, { "epoch": 1.057440206033227, "grad_norm": 0.5484331250190735, "learning_rate": 5.371047049905477e-06, "loss": 0.0083, "step": 125230 }, { "epoch": 1.0575246458803909, "grad_norm": 0.18960928916931152, "learning_rate": 5.37031219583583e-06, "loss": 0.0097, "step": 125240 }, { "epoch": 1.0576090857275549, "grad_norm": 0.25082966685295105, "learning_rate": 5.3695773337230765e-06, "loss": 0.0057, "step": 125250 }, { "epoch": 1.0576935255747186, "grad_norm": 0.27526673674583435, "learning_rate": 5.3688424635831785e-06, "loss": 0.0094, "step": 125260 }, { "epoch": 1.0577779654218826, "grad_norm": 0.37469998002052307, "learning_rate": 5.368107585432097e-06, "loss": 0.0063, "step": 125270 }, { "epoch": 1.0578624052690464, "grad_norm": 0.3393143117427826, "learning_rate": 5.3673726992857955e-06, "loss": 0.0089, "step": 125280 }, { "epoch": 1.0579468451162104, "grad_norm": 0.2532089352607727, "learning_rate": 5.366637805160235e-06, "loss": 0.0083, "step": 125290 }, { "epoch": 1.0580312849633742, "grad_norm": 0.165281280875206, "learning_rate": 5.365902903071376e-06, "loss": 0.0082, "step": 125300 }, { "epoch": 1.0581157248105382, "grad_norm": 0.13202686607837677, "learning_rate": 5.36516799303518e-06, "loss": 0.01, "step": 125310 }, { "epoch": 1.058200164657702, "grad_norm": 0.19420769810676575, "learning_rate": 5.364433075067611e-06, "loss": 0.0045, "step": 125320 }, { "epoch": 1.058284604504866, "grad_norm": 0.335644006729126, "learning_rate": 5.363698149184629e-06, "loss": 0.0059, "step": 125330 }, { "epoch": 1.0583690443520297, "grad_norm": 0.6548271775245667, "learning_rate": 5.3629632154021994e-06, "loss": 0.0127, "step": 125340 }, { "epoch": 1.0584534841991935, "grad_norm": 0.28199252486228943, "learning_rate": 5.362228273736282e-06, "loss": 0.0114, "step": 125350 }, { "epoch": 1.0585379240463575, "grad_norm": 0.35012948513031006, "learning_rate": 5.361493324202841e-06, "loss": 0.0093, "step": 125360 }, { "epoch": 1.0586223638935213, "grad_norm": 0.001497117686085403, "learning_rate": 5.3607583668178395e-06, "loss": 0.011, "step": 125370 }, { "epoch": 1.0587068037406853, "grad_norm": 1.167445182800293, "learning_rate": 5.36002340159724e-06, "loss": 0.0061, "step": 125380 }, { "epoch": 1.058791243587849, "grad_norm": 0.7423276901245117, "learning_rate": 5.359288428557006e-06, "loss": 0.0055, "step": 125390 }, { "epoch": 1.058875683435013, "grad_norm": 0.4822964072227478, "learning_rate": 5.358553447713099e-06, "loss": 0.0104, "step": 125400 }, { "epoch": 1.0589601232821768, "grad_norm": 0.0061292992904782295, "learning_rate": 5.357818459081489e-06, "loss": 0.0101, "step": 125410 }, { "epoch": 1.0590445631293408, "grad_norm": 0.4327634572982788, "learning_rate": 5.357083462678132e-06, "loss": 0.0076, "step": 125420 }, { "epoch": 1.0591290029765046, "grad_norm": 0.42416805028915405, "learning_rate": 5.356348458518995e-06, "loss": 0.0071, "step": 125430 }, { "epoch": 1.0592134428236686, "grad_norm": 0.14065435528755188, "learning_rate": 5.355613446620044e-06, "loss": 0.0058, "step": 125440 }, { "epoch": 1.0592978826708324, "grad_norm": 0.21736973524093628, "learning_rate": 5.354878426997242e-06, "loss": 0.0148, "step": 125450 }, { "epoch": 1.0593823225179961, "grad_norm": 0.3497792184352875, "learning_rate": 5.354143399666553e-06, "loss": 0.0109, "step": 125460 }, { "epoch": 1.0594667623651601, "grad_norm": 0.2288077026605606, "learning_rate": 5.35340836464394e-06, "loss": 0.0071, "step": 125470 }, { "epoch": 1.059551202212324, "grad_norm": 0.26613524556159973, "learning_rate": 5.352673321945373e-06, "loss": 0.0044, "step": 125480 }, { "epoch": 1.059635642059488, "grad_norm": 0.18439380824565887, "learning_rate": 5.3519382715868125e-06, "loss": 0.0099, "step": 125490 }, { "epoch": 1.0597200819066517, "grad_norm": 0.49087560176849365, "learning_rate": 5.351203213584224e-06, "loss": 0.0066, "step": 125500 }, { "epoch": 1.0598045217538157, "grad_norm": 0.12188009917736053, "learning_rate": 5.350468147953573e-06, "loss": 0.0098, "step": 125510 }, { "epoch": 1.0598889616009795, "grad_norm": 0.38043686747550964, "learning_rate": 5.349733074710828e-06, "loss": 0.0144, "step": 125520 }, { "epoch": 1.0599734014481434, "grad_norm": 0.1572372317314148, "learning_rate": 5.3489979938719516e-06, "loss": 0.0032, "step": 125530 }, { "epoch": 1.0600578412953072, "grad_norm": 0.21261267364025116, "learning_rate": 5.348262905452909e-06, "loss": 0.0103, "step": 125540 }, { "epoch": 1.0601422811424712, "grad_norm": 0.3240811824798584, "learning_rate": 5.347527809469668e-06, "loss": 0.0081, "step": 125550 }, { "epoch": 1.060226720989635, "grad_norm": 0.2544930577278137, "learning_rate": 5.346792705938194e-06, "loss": 0.0067, "step": 125560 }, { "epoch": 1.060311160836799, "grad_norm": 0.3589008152484894, "learning_rate": 5.346057594874455e-06, "loss": 0.0065, "step": 125570 }, { "epoch": 1.0603956006839628, "grad_norm": 0.10221447050571442, "learning_rate": 5.345322476294414e-06, "loss": 0.0105, "step": 125580 }, { "epoch": 1.0604800405311265, "grad_norm": 0.2016606479883194, "learning_rate": 5.34458735021404e-06, "loss": 0.0078, "step": 125590 }, { "epoch": 1.0605644803782905, "grad_norm": 0.26270124316215515, "learning_rate": 5.3438522166493e-06, "loss": 0.0079, "step": 125600 }, { "epoch": 1.0606489202254543, "grad_norm": 0.23354223370552063, "learning_rate": 5.343117075616162e-06, "loss": 0.0071, "step": 125610 }, { "epoch": 1.0607333600726183, "grad_norm": 0.273039847612381, "learning_rate": 5.342381927130589e-06, "loss": 0.0077, "step": 125620 }, { "epoch": 1.060817799919782, "grad_norm": 0.2992783188819885, "learning_rate": 5.341646771208551e-06, "loss": 0.0039, "step": 125630 }, { "epoch": 1.060902239766946, "grad_norm": 0.49040618538856506, "learning_rate": 5.340911607866015e-06, "loss": 0.0118, "step": 125640 }, { "epoch": 1.0609866796141099, "grad_norm": 0.16850923001766205, "learning_rate": 5.34017643711895e-06, "loss": 0.0041, "step": 125650 }, { "epoch": 1.0610711194612739, "grad_norm": 0.2387738823890686, "learning_rate": 5.339441258983321e-06, "loss": 0.014, "step": 125660 }, { "epoch": 1.0611555593084376, "grad_norm": 0.5460194945335388, "learning_rate": 5.338706073475099e-06, "loss": 0.0076, "step": 125670 }, { "epoch": 1.0612399991556016, "grad_norm": 0.2156769335269928, "learning_rate": 5.337970880610248e-06, "loss": 0.0076, "step": 125680 }, { "epoch": 1.0613244390027654, "grad_norm": 0.0743633583188057, "learning_rate": 5.337235680404741e-06, "loss": 0.0097, "step": 125690 }, { "epoch": 1.0614088788499292, "grad_norm": 0.2263997197151184, "learning_rate": 5.3365004728745425e-06, "loss": 0.0043, "step": 125700 }, { "epoch": 1.0614933186970932, "grad_norm": 0.03252115845680237, "learning_rate": 5.3357652580356234e-06, "loss": 0.0064, "step": 125710 }, { "epoch": 1.061577758544257, "grad_norm": 0.3468990921974182, "learning_rate": 5.335030035903953e-06, "loss": 0.0055, "step": 125720 }, { "epoch": 1.061662198391421, "grad_norm": 0.345550000667572, "learning_rate": 5.334294806495498e-06, "loss": 0.0093, "step": 125730 }, { "epoch": 1.0617466382385847, "grad_norm": 0.5287927389144897, "learning_rate": 5.333559569826229e-06, "loss": 0.0117, "step": 125740 }, { "epoch": 1.0618310780857487, "grad_norm": 0.2860032021999359, "learning_rate": 5.332824325912112e-06, "loss": 0.0094, "step": 125750 }, { "epoch": 1.0619155179329125, "grad_norm": 0.22221466898918152, "learning_rate": 5.3320890747691204e-06, "loss": 0.0089, "step": 125760 }, { "epoch": 1.0619999577800765, "grad_norm": 0.20177528262138367, "learning_rate": 5.331353816413223e-06, "loss": 0.009, "step": 125770 }, { "epoch": 1.0620843976272403, "grad_norm": 0.06487520784139633, "learning_rate": 5.330618550860388e-06, "loss": 0.0048, "step": 125780 }, { "epoch": 1.0621688374744043, "grad_norm": 0.1837437003850937, "learning_rate": 5.329883278126585e-06, "loss": 0.0071, "step": 125790 }, { "epoch": 1.062253277321568, "grad_norm": 0.325231671333313, "learning_rate": 5.329147998227785e-06, "loss": 0.0066, "step": 125800 }, { "epoch": 1.0623377171687318, "grad_norm": 0.19372862577438354, "learning_rate": 5.328412711179959e-06, "loss": 0.0118, "step": 125810 }, { "epoch": 1.0624221570158958, "grad_norm": 0.22090181708335876, "learning_rate": 5.3276774169990754e-06, "loss": 0.0071, "step": 125820 }, { "epoch": 1.0625065968630596, "grad_norm": 0.058700453490018845, "learning_rate": 5.326942115701105e-06, "loss": 0.0121, "step": 125830 }, { "epoch": 1.0625910367102236, "grad_norm": 0.10739181935787201, "learning_rate": 5.32620680730202e-06, "loss": 0.0076, "step": 125840 }, { "epoch": 1.0626754765573874, "grad_norm": 0.1938665807247162, "learning_rate": 5.325471491817788e-06, "loss": 0.0056, "step": 125850 }, { "epoch": 1.0627599164045514, "grad_norm": 0.039247892796993256, "learning_rate": 5.3247361692643855e-06, "loss": 0.0082, "step": 125860 }, { "epoch": 1.0628443562517151, "grad_norm": 0.030876409262418747, "learning_rate": 5.3240008396577765e-06, "loss": 0.0076, "step": 125870 }, { "epoch": 1.0629287960988791, "grad_norm": 0.02065609022974968, "learning_rate": 5.3232655030139365e-06, "loss": 0.0078, "step": 125880 }, { "epoch": 1.063013235946043, "grad_norm": 0.4791720509529114, "learning_rate": 5.322530159348836e-06, "loss": 0.0121, "step": 125890 }, { "epoch": 1.063097675793207, "grad_norm": 0.10744661837816238, "learning_rate": 5.321794808678449e-06, "loss": 0.004, "step": 125900 }, { "epoch": 1.0631821156403707, "grad_norm": 0.31612083315849304, "learning_rate": 5.321059451018742e-06, "loss": 0.0101, "step": 125910 }, { "epoch": 1.0632665554875347, "grad_norm": 0.20169170200824738, "learning_rate": 5.32032408638569e-06, "loss": 0.0073, "step": 125920 }, { "epoch": 1.0633509953346985, "grad_norm": 0.7925587296485901, "learning_rate": 5.319588714795265e-06, "loss": 0.0119, "step": 125930 }, { "epoch": 1.0634354351818622, "grad_norm": 0.2113182693719864, "learning_rate": 5.3188533362634376e-06, "loss": 0.0074, "step": 125940 }, { "epoch": 1.0635198750290262, "grad_norm": 0.06277358531951904, "learning_rate": 5.318117950806182e-06, "loss": 0.0072, "step": 125950 }, { "epoch": 1.06360431487619, "grad_norm": 0.6569404602050781, "learning_rate": 5.31738255843947e-06, "loss": 0.0139, "step": 125960 }, { "epoch": 1.063688754723354, "grad_norm": 0.2842097580432892, "learning_rate": 5.3166471591792725e-06, "loss": 0.0119, "step": 125970 }, { "epoch": 1.0637731945705178, "grad_norm": 0.18312761187553406, "learning_rate": 5.315911753041565e-06, "loss": 0.0099, "step": 125980 }, { "epoch": 1.0638576344176818, "grad_norm": 0.21330617368221283, "learning_rate": 5.315176340042318e-06, "loss": 0.0073, "step": 125990 }, { "epoch": 1.0639420742648455, "grad_norm": 0.3740156590938568, "learning_rate": 5.314440920197506e-06, "loss": 0.0092, "step": 126000 }, { "epoch": 1.0640265141120095, "grad_norm": 0.1355341672897339, "learning_rate": 5.313705493523101e-06, "loss": 0.0072, "step": 126010 }, { "epoch": 1.0641109539591733, "grad_norm": 0.39514368772506714, "learning_rate": 5.312970060035079e-06, "loss": 0.0074, "step": 126020 }, { "epoch": 1.064195393806337, "grad_norm": 0.6158833503723145, "learning_rate": 5.312234619749409e-06, "loss": 0.0121, "step": 126030 }, { "epoch": 1.064279833653501, "grad_norm": 0.13410989940166473, "learning_rate": 5.311499172682067e-06, "loss": 0.0149, "step": 126040 }, { "epoch": 1.0643642735006649, "grad_norm": 0.24519124627113342, "learning_rate": 5.310763718849028e-06, "loss": 0.0081, "step": 126050 }, { "epoch": 1.0644487133478289, "grad_norm": 0.4266355335712433, "learning_rate": 5.310028258266265e-06, "loss": 0.0077, "step": 126060 }, { "epoch": 1.0645331531949926, "grad_norm": 0.3816389739513397, "learning_rate": 5.3092927909497505e-06, "loss": 0.0087, "step": 126070 }, { "epoch": 1.0646175930421566, "grad_norm": 0.11546322703361511, "learning_rate": 5.308557316915461e-06, "loss": 0.0083, "step": 126080 }, { "epoch": 1.0647020328893204, "grad_norm": 0.21870438754558563, "learning_rate": 5.30782183617937e-06, "loss": 0.0074, "step": 126090 }, { "epoch": 1.0647864727364844, "grad_norm": 0.11732622981071472, "learning_rate": 5.307086348757452e-06, "loss": 0.0074, "step": 126100 }, { "epoch": 1.0648709125836482, "grad_norm": 0.20375269651412964, "learning_rate": 5.306350854665679e-06, "loss": 0.0076, "step": 126110 }, { "epoch": 1.0649553524308122, "grad_norm": 0.14740228652954102, "learning_rate": 5.305615353920031e-06, "loss": 0.0076, "step": 126120 }, { "epoch": 1.065039792277976, "grad_norm": 0.3838978707790375, "learning_rate": 5.304879846536478e-06, "loss": 0.0087, "step": 126130 }, { "epoch": 1.06512423212514, "grad_norm": 0.16874311864376068, "learning_rate": 5.304144332530999e-06, "loss": 0.0124, "step": 126140 }, { "epoch": 1.0652086719723037, "grad_norm": 0.05548752099275589, "learning_rate": 5.303408811919566e-06, "loss": 0.0092, "step": 126150 }, { "epoch": 1.0652931118194675, "grad_norm": 0.4188258647918701, "learning_rate": 5.302673284718156e-06, "loss": 0.0123, "step": 126160 }, { "epoch": 1.0653775516666315, "grad_norm": 0.05231890454888344, "learning_rate": 5.301937750942745e-06, "loss": 0.0136, "step": 126170 }, { "epoch": 1.0654619915137953, "grad_norm": 0.3732181489467621, "learning_rate": 5.301202210609308e-06, "loss": 0.0105, "step": 126180 }, { "epoch": 1.0655464313609593, "grad_norm": 0.14787907898426056, "learning_rate": 5.3004666637338196e-06, "loss": 0.0066, "step": 126190 }, { "epoch": 1.065630871208123, "grad_norm": 0.816761314868927, "learning_rate": 5.299731110332259e-06, "loss": 0.0095, "step": 126200 }, { "epoch": 1.065715311055287, "grad_norm": 0.33915528655052185, "learning_rate": 5.298995550420598e-06, "loss": 0.0059, "step": 126210 }, { "epoch": 1.0657997509024508, "grad_norm": 0.4263230264186859, "learning_rate": 5.2982599840148166e-06, "loss": 0.0096, "step": 126220 }, { "epoch": 1.0658841907496148, "grad_norm": 0.03918982669711113, "learning_rate": 5.297524411130888e-06, "loss": 0.0054, "step": 126230 }, { "epoch": 1.0659686305967786, "grad_norm": 0.23187585175037384, "learning_rate": 5.29678883178479e-06, "loss": 0.0063, "step": 126240 }, { "epoch": 1.0660530704439426, "grad_norm": 0.4119589626789093, "learning_rate": 5.2960532459925e-06, "loss": 0.0077, "step": 126250 }, { "epoch": 1.0661375102911064, "grad_norm": 0.401093989610672, "learning_rate": 5.295317653769995e-06, "loss": 0.0066, "step": 126260 }, { "epoch": 1.0662219501382701, "grad_norm": 0.2671140134334564, "learning_rate": 5.294582055133251e-06, "loss": 0.011, "step": 126270 }, { "epoch": 1.0663063899854341, "grad_norm": 0.19703032076358795, "learning_rate": 5.293846450098244e-06, "loss": 0.0092, "step": 126280 }, { "epoch": 1.066390829832598, "grad_norm": 0.49534502625465393, "learning_rate": 5.293110838680952e-06, "loss": 0.0132, "step": 126290 }, { "epoch": 1.066475269679762, "grad_norm": 0.35979965329170227, "learning_rate": 5.292375220897353e-06, "loss": 0.0057, "step": 126300 }, { "epoch": 1.0665597095269257, "grad_norm": 0.11305107176303864, "learning_rate": 5.291639596763426e-06, "loss": 0.006, "step": 126310 }, { "epoch": 1.0666441493740897, "grad_norm": 0.3749064803123474, "learning_rate": 5.290903966295144e-06, "loss": 0.0082, "step": 126320 }, { "epoch": 1.0667285892212535, "grad_norm": 0.36810070276260376, "learning_rate": 5.2901683295084895e-06, "loss": 0.0071, "step": 126330 }, { "epoch": 1.0668130290684175, "grad_norm": 0.2934487462043762, "learning_rate": 5.289432686419437e-06, "loss": 0.0139, "step": 126340 }, { "epoch": 1.0668974689155812, "grad_norm": 0.15140071511268616, "learning_rate": 5.288697037043967e-06, "loss": 0.0151, "step": 126350 }, { "epoch": 1.0669819087627452, "grad_norm": 0.38056716322898865, "learning_rate": 5.287961381398056e-06, "loss": 0.0046, "step": 126360 }, { "epoch": 1.067066348609909, "grad_norm": 0.2935745418071747, "learning_rate": 5.287225719497682e-06, "loss": 0.0081, "step": 126370 }, { "epoch": 1.0671507884570728, "grad_norm": 0.216764435172081, "learning_rate": 5.286490051358826e-06, "loss": 0.0069, "step": 126380 }, { "epoch": 1.0672352283042368, "grad_norm": 0.2796928584575653, "learning_rate": 5.2857543769974626e-06, "loss": 0.0044, "step": 126390 }, { "epoch": 1.0673196681514006, "grad_norm": 0.1510712206363678, "learning_rate": 5.285018696429574e-06, "loss": 0.0056, "step": 126400 }, { "epoch": 1.0674041079985646, "grad_norm": 0.104182168841362, "learning_rate": 5.2842830096711364e-06, "loss": 0.008, "step": 126410 }, { "epoch": 1.0674885478457283, "grad_norm": 0.17864349484443665, "learning_rate": 5.283547316738132e-06, "loss": 0.0048, "step": 126420 }, { "epoch": 1.0675729876928923, "grad_norm": 0.15282072126865387, "learning_rate": 5.282811617646538e-06, "loss": 0.0083, "step": 126430 }, { "epoch": 1.067657427540056, "grad_norm": 0.015166349709033966, "learning_rate": 5.282075912412332e-06, "loss": 0.0069, "step": 126440 }, { "epoch": 1.06774186738722, "grad_norm": 0.3276069164276123, "learning_rate": 5.2813402010514956e-06, "loss": 0.0113, "step": 126450 }, { "epoch": 1.0678263072343839, "grad_norm": 0.36970534920692444, "learning_rate": 5.280604483580008e-06, "loss": 0.0038, "step": 126460 }, { "epoch": 1.0679107470815479, "grad_norm": 0.3263583183288574, "learning_rate": 5.279868760013848e-06, "loss": 0.0111, "step": 126470 }, { "epoch": 1.0679951869287116, "grad_norm": 0.09869538247585297, "learning_rate": 5.279133030368997e-06, "loss": 0.0098, "step": 126480 }, { "epoch": 1.0680796267758756, "grad_norm": 0.2438792735338211, "learning_rate": 5.278397294661433e-06, "loss": 0.0087, "step": 126490 }, { "epoch": 1.0681640666230394, "grad_norm": 0.1916191428899765, "learning_rate": 5.2776615529071365e-06, "loss": 0.0082, "step": 126500 }, { "epoch": 1.0682485064702032, "grad_norm": 0.4559755325317383, "learning_rate": 5.2769258051220885e-06, "loss": 0.0075, "step": 126510 }, { "epoch": 1.0683329463173672, "grad_norm": 0.13121172785758972, "learning_rate": 5.276190051322269e-06, "loss": 0.0048, "step": 126520 }, { "epoch": 1.068417386164531, "grad_norm": 0.40499067306518555, "learning_rate": 5.275454291523657e-06, "loss": 0.0096, "step": 126530 }, { "epoch": 1.068501826011695, "grad_norm": 0.24179334938526154, "learning_rate": 5.274718525742236e-06, "loss": 0.0048, "step": 126540 }, { "epoch": 1.0685862658588587, "grad_norm": 0.2558806836605072, "learning_rate": 5.273982753993985e-06, "loss": 0.0061, "step": 126550 }, { "epoch": 1.0686707057060227, "grad_norm": 0.6320770382881165, "learning_rate": 5.2732469762948826e-06, "loss": 0.0134, "step": 126560 }, { "epoch": 1.0687551455531865, "grad_norm": 0.29832372069358826, "learning_rate": 5.2725111926609126e-06, "loss": 0.011, "step": 126570 }, { "epoch": 1.0688395854003505, "grad_norm": 0.39004001021385193, "learning_rate": 5.271775403108055e-06, "loss": 0.0126, "step": 126580 }, { "epoch": 1.0689240252475143, "grad_norm": 0.07180260121822357, "learning_rate": 5.271039607652293e-06, "loss": 0.007, "step": 126590 }, { "epoch": 1.069008465094678, "grad_norm": 0.12804466485977173, "learning_rate": 5.270303806309605e-06, "loss": 0.0174, "step": 126600 }, { "epoch": 1.069092904941842, "grad_norm": 0.18445374071598053, "learning_rate": 5.269567999095974e-06, "loss": 0.0071, "step": 126610 }, { "epoch": 1.0691773447890058, "grad_norm": 0.19652515649795532, "learning_rate": 5.26883218602738e-06, "loss": 0.0098, "step": 126620 }, { "epoch": 1.0692617846361698, "grad_norm": 0.3056824207305908, "learning_rate": 5.268096367119808e-06, "loss": 0.0076, "step": 126630 }, { "epoch": 1.0693462244833336, "grad_norm": 0.4262535870075226, "learning_rate": 5.267360542389237e-06, "loss": 0.0095, "step": 126640 }, { "epoch": 1.0694306643304976, "grad_norm": 0.2994915544986725, "learning_rate": 5.26662471185165e-06, "loss": 0.0097, "step": 126650 }, { "epoch": 1.0695151041776614, "grad_norm": 0.1198340654373169, "learning_rate": 5.265888875523029e-06, "loss": 0.0039, "step": 126660 }, { "epoch": 1.0695995440248254, "grad_norm": 0.21666833758354187, "learning_rate": 5.265153033419357e-06, "loss": 0.0084, "step": 126670 }, { "epoch": 1.0696839838719892, "grad_norm": 0.3061729073524475, "learning_rate": 5.264417185556614e-06, "loss": 0.0121, "step": 126680 }, { "epoch": 1.0697684237191531, "grad_norm": 0.5079744458198547, "learning_rate": 5.263681331950784e-06, "loss": 0.0052, "step": 126690 }, { "epoch": 1.069852863566317, "grad_norm": 0.609523594379425, "learning_rate": 5.262945472617849e-06, "loss": 0.0045, "step": 126700 }, { "epoch": 1.069937303413481, "grad_norm": 0.2333095520734787, "learning_rate": 5.262209607573795e-06, "loss": 0.0056, "step": 126710 }, { "epoch": 1.0700217432606447, "grad_norm": 0.45359548926353455, "learning_rate": 5.261473736834599e-06, "loss": 0.0114, "step": 126720 }, { "epoch": 1.0701061831078085, "grad_norm": 0.25777116417884827, "learning_rate": 5.260737860416248e-06, "loss": 0.0074, "step": 126730 }, { "epoch": 1.0701906229549725, "grad_norm": 0.34808149933815, "learning_rate": 5.2600019783347235e-06, "loss": 0.0097, "step": 126740 }, { "epoch": 1.0702750628021362, "grad_norm": 0.18491138517856598, "learning_rate": 5.259266090606011e-06, "loss": 0.0097, "step": 126750 }, { "epoch": 1.0703595026493002, "grad_norm": 0.5201746821403503, "learning_rate": 5.258530197246091e-06, "loss": 0.0081, "step": 126760 }, { "epoch": 1.070443942496464, "grad_norm": 0.3640165627002716, "learning_rate": 5.257794298270947e-06, "loss": 0.0065, "step": 126770 }, { "epoch": 1.070528382343628, "grad_norm": 0.2575225234031677, "learning_rate": 5.257058393696566e-06, "loss": 0.0121, "step": 126780 }, { "epoch": 1.0706128221907918, "grad_norm": 0.2248019278049469, "learning_rate": 5.256322483538928e-06, "loss": 0.0089, "step": 126790 }, { "epoch": 1.0706972620379558, "grad_norm": 0.4318895637989044, "learning_rate": 5.255586567814019e-06, "loss": 0.0066, "step": 126800 }, { "epoch": 1.0707817018851196, "grad_norm": 0.5283620357513428, "learning_rate": 5.254850646537821e-06, "loss": 0.0106, "step": 126810 }, { "epoch": 1.0708661417322836, "grad_norm": 0.6938744783401489, "learning_rate": 5.25411471972632e-06, "loss": 0.0151, "step": 126820 }, { "epoch": 1.0709505815794473, "grad_norm": 0.33192139863967896, "learning_rate": 5.2533787873955e-06, "loss": 0.0075, "step": 126830 }, { "epoch": 1.0710350214266113, "grad_norm": 0.6835845112800598, "learning_rate": 5.252642849561343e-06, "loss": 0.0084, "step": 126840 }, { "epoch": 1.071119461273775, "grad_norm": 0.15046513080596924, "learning_rate": 5.2519069062398364e-06, "loss": 0.0093, "step": 126850 }, { "epoch": 1.0712039011209389, "grad_norm": 0.29169803857803345, "learning_rate": 5.251170957446962e-06, "loss": 0.0094, "step": 126860 }, { "epoch": 1.0712883409681029, "grad_norm": 0.10757753998041153, "learning_rate": 5.250435003198707e-06, "loss": 0.0085, "step": 126870 }, { "epoch": 1.0713727808152667, "grad_norm": 0.21628119051456451, "learning_rate": 5.249699043511055e-06, "loss": 0.0077, "step": 126880 }, { "epoch": 1.0714572206624307, "grad_norm": 0.26516056060791016, "learning_rate": 5.24896307839999e-06, "loss": 0.0153, "step": 126890 }, { "epoch": 1.0715416605095944, "grad_norm": 0.418528288602829, "learning_rate": 5.248227107881498e-06, "loss": 0.0179, "step": 126900 }, { "epoch": 1.0716261003567584, "grad_norm": 0.1405918449163437, "learning_rate": 5.247491131971566e-06, "loss": 0.0101, "step": 126910 }, { "epoch": 1.0717105402039222, "grad_norm": 0.1672086864709854, "learning_rate": 5.246755150686177e-06, "loss": 0.008, "step": 126920 }, { "epoch": 1.0717949800510862, "grad_norm": 0.1026756763458252, "learning_rate": 5.2460191640413165e-06, "loss": 0.0038, "step": 126930 }, { "epoch": 1.07187941989825, "grad_norm": 0.5619480013847351, "learning_rate": 5.245283172052968e-06, "loss": 0.0066, "step": 126940 }, { "epoch": 1.0719638597454137, "grad_norm": 0.31995153427124023, "learning_rate": 5.24454717473712e-06, "loss": 0.0104, "step": 126950 }, { "epoch": 1.0720482995925777, "grad_norm": 0.4296863377094269, "learning_rate": 5.243811172109759e-06, "loss": 0.0052, "step": 126960 }, { "epoch": 1.0721327394397415, "grad_norm": 0.16243098676204681, "learning_rate": 5.243075164186869e-06, "loss": 0.0142, "step": 126970 }, { "epoch": 1.0722171792869055, "grad_norm": 0.10052942484617233, "learning_rate": 5.242339150984436e-06, "loss": 0.009, "step": 126980 }, { "epoch": 1.0723016191340693, "grad_norm": 0.16439993679523468, "learning_rate": 5.241603132518446e-06, "loss": 0.0056, "step": 126990 }, { "epoch": 1.0723860589812333, "grad_norm": 0.2861888110637665, "learning_rate": 5.240867108804886e-06, "loss": 0.0052, "step": 127000 }, { "epoch": 1.072470498828397, "grad_norm": 0.07378265261650085, "learning_rate": 5.240131079859741e-06, "loss": 0.0053, "step": 127010 }, { "epoch": 1.072554938675561, "grad_norm": 0.06203993782401085, "learning_rate": 5.239395045698999e-06, "loss": 0.0071, "step": 127020 }, { "epoch": 1.0726393785227248, "grad_norm": 0.17143286764621735, "learning_rate": 5.238659006338645e-06, "loss": 0.0044, "step": 127030 }, { "epoch": 1.0727238183698888, "grad_norm": 0.3553009033203125, "learning_rate": 5.237922961794666e-06, "loss": 0.0058, "step": 127040 }, { "epoch": 1.0728082582170526, "grad_norm": 0.15303143858909607, "learning_rate": 5.2371869120830494e-06, "loss": 0.0028, "step": 127050 }, { "epoch": 1.0728926980642166, "grad_norm": 0.5473106503486633, "learning_rate": 5.236450857219781e-06, "loss": 0.0116, "step": 127060 }, { "epoch": 1.0729771379113804, "grad_norm": 0.22776281833648682, "learning_rate": 5.2357147972208485e-06, "loss": 0.0096, "step": 127070 }, { "epoch": 1.0730615777585442, "grad_norm": 0.4407344460487366, "learning_rate": 5.2349787321022405e-06, "loss": 0.0077, "step": 127080 }, { "epoch": 1.0731460176057082, "grad_norm": 0.271597683429718, "learning_rate": 5.234242661879941e-06, "loss": 0.0122, "step": 127090 }, { "epoch": 1.073230457452872, "grad_norm": 0.5016777515411377, "learning_rate": 5.23350658656994e-06, "loss": 0.0104, "step": 127100 }, { "epoch": 1.073314897300036, "grad_norm": 0.07599420100450516, "learning_rate": 5.232770506188223e-06, "loss": 0.0065, "step": 127110 }, { "epoch": 1.0733993371471997, "grad_norm": 0.27078139781951904, "learning_rate": 5.232034420750779e-06, "loss": 0.0051, "step": 127120 }, { "epoch": 1.0734837769943637, "grad_norm": 0.05445903539657593, "learning_rate": 5.231298330273594e-06, "loss": 0.0065, "step": 127130 }, { "epoch": 1.0735682168415275, "grad_norm": 0.16464637219905853, "learning_rate": 5.230562234772658e-06, "loss": 0.0072, "step": 127140 }, { "epoch": 1.0736526566886915, "grad_norm": 0.2495768964290619, "learning_rate": 5.229826134263957e-06, "loss": 0.0136, "step": 127150 }, { "epoch": 1.0737370965358553, "grad_norm": 0.4624119997024536, "learning_rate": 5.229090028763479e-06, "loss": 0.0083, "step": 127160 }, { "epoch": 1.0738215363830192, "grad_norm": 0.31316956877708435, "learning_rate": 5.228353918287213e-06, "loss": 0.0054, "step": 127170 }, { "epoch": 1.073905976230183, "grad_norm": 0.48297789692878723, "learning_rate": 5.227617802851146e-06, "loss": 0.0071, "step": 127180 }, { "epoch": 1.0739904160773468, "grad_norm": 0.27179771661758423, "learning_rate": 5.226881682471266e-06, "loss": 0.0083, "step": 127190 }, { "epoch": 1.0740748559245108, "grad_norm": 0.5043314695358276, "learning_rate": 5.226145557163565e-06, "loss": 0.0108, "step": 127200 }, { "epoch": 1.0741592957716746, "grad_norm": 0.1655651032924652, "learning_rate": 5.2254094269440285e-06, "loss": 0.0067, "step": 127210 }, { "epoch": 1.0742437356188386, "grad_norm": 0.1921079009771347, "learning_rate": 5.224673291828644e-06, "loss": 0.0031, "step": 127220 }, { "epoch": 1.0743281754660023, "grad_norm": 0.2334672510623932, "learning_rate": 5.2239371518334015e-06, "loss": 0.0093, "step": 127230 }, { "epoch": 1.0744126153131663, "grad_norm": 0.013443177565932274, "learning_rate": 5.223201006974291e-06, "loss": 0.0118, "step": 127240 }, { "epoch": 1.0744970551603301, "grad_norm": 0.40460750460624695, "learning_rate": 5.2224648572673e-06, "loss": 0.0064, "step": 127250 }, { "epoch": 1.0745814950074941, "grad_norm": 0.2852310836315155, "learning_rate": 5.221728702728418e-06, "loss": 0.0051, "step": 127260 }, { "epoch": 1.074665934854658, "grad_norm": 0.1259039342403412, "learning_rate": 5.220992543373633e-06, "loss": 0.0071, "step": 127270 }, { "epoch": 1.0747503747018219, "grad_norm": 0.5398706197738647, "learning_rate": 5.220256379218938e-06, "loss": 0.01, "step": 127280 }, { "epoch": 1.0748348145489857, "grad_norm": 0.39165255427360535, "learning_rate": 5.219520210280316e-06, "loss": 0.0057, "step": 127290 }, { "epoch": 1.0749192543961494, "grad_norm": 0.34523072838783264, "learning_rate": 5.2187840365737615e-06, "loss": 0.0097, "step": 127300 }, { "epoch": 1.0750036942433134, "grad_norm": 0.3143366575241089, "learning_rate": 5.218047858115263e-06, "loss": 0.0123, "step": 127310 }, { "epoch": 1.0750881340904772, "grad_norm": 0.3893834054470062, "learning_rate": 5.217311674920809e-06, "loss": 0.005, "step": 127320 }, { "epoch": 1.0751725739376412, "grad_norm": 0.8054980039596558, "learning_rate": 5.2165754870063916e-06, "loss": 0.0094, "step": 127330 }, { "epoch": 1.075257013784805, "grad_norm": 0.7159196734428406, "learning_rate": 5.215839294387997e-06, "loss": 0.0075, "step": 127340 }, { "epoch": 1.075341453631969, "grad_norm": 0.3206014633178711, "learning_rate": 5.2151030970816175e-06, "loss": 0.0222, "step": 127350 }, { "epoch": 1.0754258934791328, "grad_norm": 0.184356689453125, "learning_rate": 5.214366895103244e-06, "loss": 0.0098, "step": 127360 }, { "epoch": 1.0755103333262968, "grad_norm": 0.2276214361190796, "learning_rate": 5.213630688468865e-06, "loss": 0.0151, "step": 127370 }, { "epoch": 1.0755947731734605, "grad_norm": 0.3248308002948761, "learning_rate": 5.212894477194471e-06, "loss": 0.0049, "step": 127380 }, { "epoch": 1.0756792130206245, "grad_norm": 0.3846907615661621, "learning_rate": 5.212158261296052e-06, "loss": 0.0067, "step": 127390 }, { "epoch": 1.0757636528677883, "grad_norm": 0.3984543979167938, "learning_rate": 5.2114220407896e-06, "loss": 0.0067, "step": 127400 }, { "epoch": 1.0758480927149523, "grad_norm": 0.25965604186058044, "learning_rate": 5.210685815691104e-06, "loss": 0.0058, "step": 127410 }, { "epoch": 1.075932532562116, "grad_norm": 0.1352541297674179, "learning_rate": 5.209949586016555e-06, "loss": 0.0047, "step": 127420 }, { "epoch": 1.0760169724092798, "grad_norm": 0.1303548961877823, "learning_rate": 5.209213351781944e-06, "loss": 0.0099, "step": 127430 }, { "epoch": 1.0761014122564438, "grad_norm": 0.1327579766511917, "learning_rate": 5.208477113003263e-06, "loss": 0.0082, "step": 127440 }, { "epoch": 1.0761858521036076, "grad_norm": 0.4215180575847626, "learning_rate": 5.207740869696502e-06, "loss": 0.0072, "step": 127450 }, { "epoch": 1.0762702919507716, "grad_norm": 0.3610762357711792, "learning_rate": 5.207004621877651e-06, "loss": 0.0136, "step": 127460 }, { "epoch": 1.0763547317979354, "grad_norm": 0.39887723326683044, "learning_rate": 5.2062683695627005e-06, "loss": 0.0077, "step": 127470 }, { "epoch": 1.0764391716450994, "grad_norm": 0.2989296019077301, "learning_rate": 5.2055321127676465e-06, "loss": 0.0054, "step": 127480 }, { "epoch": 1.0765236114922632, "grad_norm": 0.2529523968696594, "learning_rate": 5.204795851508474e-06, "loss": 0.0059, "step": 127490 }, { "epoch": 1.0766080513394272, "grad_norm": 0.2265818566083908, "learning_rate": 5.20405958580118e-06, "loss": 0.0114, "step": 127500 }, { "epoch": 1.076692491186591, "grad_norm": 0.35035791993141174, "learning_rate": 5.203323315661753e-06, "loss": 0.0128, "step": 127510 }, { "epoch": 1.0767769310337547, "grad_norm": 0.006355029530823231, "learning_rate": 5.202587041106184e-06, "loss": 0.0108, "step": 127520 }, { "epoch": 1.0768613708809187, "grad_norm": 0.18507590889930725, "learning_rate": 5.201850762150468e-06, "loss": 0.0065, "step": 127530 }, { "epoch": 1.0769458107280825, "grad_norm": 0.4137211740016937, "learning_rate": 5.201114478810593e-06, "loss": 0.0046, "step": 127540 }, { "epoch": 1.0770302505752465, "grad_norm": 0.25325340032577515, "learning_rate": 5.200378191102553e-06, "loss": 0.0091, "step": 127550 }, { "epoch": 1.0771146904224103, "grad_norm": 0.09075555950403214, "learning_rate": 5.199641899042341e-06, "loss": 0.0132, "step": 127560 }, { "epoch": 1.0771991302695743, "grad_norm": 1.0985207557678223, "learning_rate": 5.198905602645946e-06, "loss": 0.0079, "step": 127570 }, { "epoch": 1.077283570116738, "grad_norm": 0.07311984151601791, "learning_rate": 5.198169301929364e-06, "loss": 0.0069, "step": 127580 }, { "epoch": 1.077368009963902, "grad_norm": 0.3763098418712616, "learning_rate": 5.197432996908584e-06, "loss": 0.0097, "step": 127590 }, { "epoch": 1.0774524498110658, "grad_norm": 0.47300177812576294, "learning_rate": 5.1966966875996e-06, "loss": 0.0033, "step": 127600 }, { "epoch": 1.0775368896582298, "grad_norm": 0.1846773326396942, "learning_rate": 5.195960374018405e-06, "loss": 0.011, "step": 127610 }, { "epoch": 1.0776213295053936, "grad_norm": 0.28380855917930603, "learning_rate": 5.1952240561809905e-06, "loss": 0.0079, "step": 127620 }, { "epoch": 1.0777057693525576, "grad_norm": 0.4233703911304474, "learning_rate": 5.19448773410335e-06, "loss": 0.0095, "step": 127630 }, { "epoch": 1.0777902091997213, "grad_norm": 0.36555251479148865, "learning_rate": 5.193751407801474e-06, "loss": 0.007, "step": 127640 }, { "epoch": 1.0778746490468851, "grad_norm": 0.19439324736595154, "learning_rate": 5.193015077291359e-06, "loss": 0.008, "step": 127650 }, { "epoch": 1.0779590888940491, "grad_norm": 0.10536433756351471, "learning_rate": 5.192278742588994e-06, "loss": 0.004, "step": 127660 }, { "epoch": 1.078043528741213, "grad_norm": 0.2796688675880432, "learning_rate": 5.191542403710376e-06, "loss": 0.0113, "step": 127670 }, { "epoch": 1.078127968588377, "grad_norm": 0.5592665076255798, "learning_rate": 5.1908060606714946e-06, "loss": 0.0124, "step": 127680 }, { "epoch": 1.0782124084355407, "grad_norm": 0.06744001805782318, "learning_rate": 5.190069713488347e-06, "loss": 0.0071, "step": 127690 }, { "epoch": 1.0782968482827047, "grad_norm": 0.2642166018486023, "learning_rate": 5.1893333621769224e-06, "loss": 0.0052, "step": 127700 }, { "epoch": 1.0783812881298684, "grad_norm": 0.4983058273792267, "learning_rate": 5.1885970067532165e-06, "loss": 0.0075, "step": 127710 }, { "epoch": 1.0784657279770324, "grad_norm": 0.3589322566986084, "learning_rate": 5.187860647233223e-06, "loss": 0.0075, "step": 127720 }, { "epoch": 1.0785501678241962, "grad_norm": 0.1852547973394394, "learning_rate": 5.1871242836329335e-06, "loss": 0.005, "step": 127730 }, { "epoch": 1.0786346076713602, "grad_norm": 1.0721808671951294, "learning_rate": 5.186387915968344e-06, "loss": 0.0181, "step": 127740 }, { "epoch": 1.078719047518524, "grad_norm": 0.44650763273239136, "learning_rate": 5.185651544255445e-06, "loss": 0.0109, "step": 127750 }, { "epoch": 1.078803487365688, "grad_norm": 0.3297818899154663, "learning_rate": 5.184915168510235e-06, "loss": 0.0081, "step": 127760 }, { "epoch": 1.0788879272128518, "grad_norm": 0.47368624806404114, "learning_rate": 5.184178788748705e-06, "loss": 0.0063, "step": 127770 }, { "epoch": 1.0789723670600155, "grad_norm": 0.28134581446647644, "learning_rate": 5.183442404986848e-06, "loss": 0.0081, "step": 127780 }, { "epoch": 1.0790568069071795, "grad_norm": 0.2977412939071655, "learning_rate": 5.18270601724066e-06, "loss": 0.0105, "step": 127790 }, { "epoch": 1.0791412467543433, "grad_norm": 0.5637330412864685, "learning_rate": 5.181969625526135e-06, "loss": 0.0084, "step": 127800 }, { "epoch": 1.0792256866015073, "grad_norm": 0.2842867076396942, "learning_rate": 5.181233229859268e-06, "loss": 0.0079, "step": 127810 }, { "epoch": 1.079310126448671, "grad_norm": 0.24561910331249237, "learning_rate": 5.1804968302560525e-06, "loss": 0.0064, "step": 127820 }, { "epoch": 1.079394566295835, "grad_norm": 0.15863189101219177, "learning_rate": 5.179760426732481e-06, "loss": 0.008, "step": 127830 }, { "epoch": 1.0794790061429989, "grad_norm": 0.1749935895204544, "learning_rate": 5.1790240193045506e-06, "loss": 0.0086, "step": 127840 }, { "epoch": 1.0795634459901629, "grad_norm": 0.2237180471420288, "learning_rate": 5.178287607988257e-06, "loss": 0.0072, "step": 127850 }, { "epoch": 1.0796478858373266, "grad_norm": 0.0923093780875206, "learning_rate": 5.177551192799592e-06, "loss": 0.0065, "step": 127860 }, { "epoch": 1.0797323256844904, "grad_norm": 0.08027594536542892, "learning_rate": 5.176814773754549e-06, "loss": 0.006, "step": 127870 }, { "epoch": 1.0798167655316544, "grad_norm": 0.12555408477783203, "learning_rate": 5.176078350869127e-06, "loss": 0.0088, "step": 127880 }, { "epoch": 1.0799012053788182, "grad_norm": 0.14131683111190796, "learning_rate": 5.17534192415932e-06, "loss": 0.0036, "step": 127890 }, { "epoch": 1.0799856452259822, "grad_norm": 0.1840013563632965, "learning_rate": 5.1746054936411225e-06, "loss": 0.0116, "step": 127900 }, { "epoch": 1.080070085073146, "grad_norm": 0.16821280121803284, "learning_rate": 5.173869059330529e-06, "loss": 0.0053, "step": 127910 }, { "epoch": 1.08015452492031, "grad_norm": 0.2578239142894745, "learning_rate": 5.173132621243534e-06, "loss": 0.0055, "step": 127920 }, { "epoch": 1.0802389647674737, "grad_norm": 0.3308115005493164, "learning_rate": 5.172396179396137e-06, "loss": 0.0116, "step": 127930 }, { "epoch": 1.0803234046146377, "grad_norm": 0.12678568065166473, "learning_rate": 5.1716597338043276e-06, "loss": 0.0069, "step": 127940 }, { "epoch": 1.0804078444618015, "grad_norm": 0.27152785658836365, "learning_rate": 5.170923284484104e-06, "loss": 0.0079, "step": 127950 }, { "epoch": 1.0804922843089655, "grad_norm": 0.2753114104270935, "learning_rate": 5.170186831451461e-06, "loss": 0.0112, "step": 127960 }, { "epoch": 1.0805767241561293, "grad_norm": 0.2762686014175415, "learning_rate": 5.169450374722398e-06, "loss": 0.0118, "step": 127970 }, { "epoch": 1.0806611640032933, "grad_norm": 0.12616050243377686, "learning_rate": 5.168713914312906e-06, "loss": 0.0102, "step": 127980 }, { "epoch": 1.080745603850457, "grad_norm": 0.30870234966278076, "learning_rate": 5.167977450238982e-06, "loss": 0.0102, "step": 127990 }, { "epoch": 1.0808300436976208, "grad_norm": 0.16958755254745483, "learning_rate": 5.167240982516622e-06, "loss": 0.0055, "step": 128000 }, { "epoch": 1.0809144835447848, "grad_norm": 0.33436620235443115, "learning_rate": 5.166504511161824e-06, "loss": 0.0062, "step": 128010 }, { "epoch": 1.0809989233919486, "grad_norm": 0.18991617858409882, "learning_rate": 5.165768036190579e-06, "loss": 0.0056, "step": 128020 }, { "epoch": 1.0810833632391126, "grad_norm": 0.48708680272102356, "learning_rate": 5.165031557618888e-06, "loss": 0.0115, "step": 128030 }, { "epoch": 1.0811678030862764, "grad_norm": 0.10131119936704636, "learning_rate": 5.164295075462745e-06, "loss": 0.0064, "step": 128040 }, { "epoch": 1.0812522429334404, "grad_norm": 0.379794180393219, "learning_rate": 5.163558589738148e-06, "loss": 0.0066, "step": 128050 }, { "epoch": 1.0813366827806041, "grad_norm": 0.10196898877620697, "learning_rate": 5.16282210046109e-06, "loss": 0.0071, "step": 128060 }, { "epoch": 1.0814211226277681, "grad_norm": 0.26236993074417114, "learning_rate": 5.16208560764757e-06, "loss": 0.0198, "step": 128070 }, { "epoch": 1.081505562474932, "grad_norm": 0.4140731990337372, "learning_rate": 5.1613491113135834e-06, "loss": 0.0042, "step": 128080 }, { "epoch": 1.081590002322096, "grad_norm": 0.24258564412593842, "learning_rate": 5.160612611475129e-06, "loss": 0.0064, "step": 128090 }, { "epoch": 1.0816744421692597, "grad_norm": 0.13884064555168152, "learning_rate": 5.159876108148201e-06, "loss": 0.0065, "step": 128100 }, { "epoch": 1.0817588820164235, "grad_norm": 0.152103990316391, "learning_rate": 5.159139601348796e-06, "loss": 0.0039, "step": 128110 }, { "epoch": 1.0818433218635874, "grad_norm": 0.12196781486272812, "learning_rate": 5.158403091092911e-06, "loss": 0.0039, "step": 128120 }, { "epoch": 1.0819277617107512, "grad_norm": 0.14619845151901245, "learning_rate": 5.157666577396544e-06, "loss": 0.0073, "step": 128130 }, { "epoch": 1.0820122015579152, "grad_norm": 0.1863761693239212, "learning_rate": 5.156930060275691e-06, "loss": 0.0086, "step": 128140 }, { "epoch": 1.082096641405079, "grad_norm": 0.26566728949546814, "learning_rate": 5.156193539746349e-06, "loss": 0.0073, "step": 128150 }, { "epoch": 1.082181081252243, "grad_norm": 0.5877792835235596, "learning_rate": 5.155457015824515e-06, "loss": 0.0122, "step": 128160 }, { "epoch": 1.0822655210994068, "grad_norm": 0.17343206703662872, "learning_rate": 5.154720488526189e-06, "loss": 0.0121, "step": 128170 }, { "epoch": 1.0823499609465708, "grad_norm": 0.36769983172416687, "learning_rate": 5.153983957867364e-06, "loss": 0.0051, "step": 128180 }, { "epoch": 1.0824344007937345, "grad_norm": 0.389763742685318, "learning_rate": 5.153247423864037e-06, "loss": 0.0048, "step": 128190 }, { "epoch": 1.0825188406408985, "grad_norm": 0.572396993637085, "learning_rate": 5.1525108865322095e-06, "loss": 0.005, "step": 128200 }, { "epoch": 1.0826032804880623, "grad_norm": 0.30924493074417114, "learning_rate": 5.151774345887878e-06, "loss": 0.0042, "step": 128210 }, { "epoch": 1.082687720335226, "grad_norm": 0.20804063975811005, "learning_rate": 5.151037801947038e-06, "loss": 0.008, "step": 128220 }, { "epoch": 1.08277216018239, "grad_norm": 0.03525227680802345, "learning_rate": 5.1503012547256855e-06, "loss": 0.0089, "step": 128230 }, { "epoch": 1.0828566000295539, "grad_norm": 0.07392214238643646, "learning_rate": 5.149564704239823e-06, "loss": 0.0097, "step": 128240 }, { "epoch": 1.0829410398767179, "grad_norm": 0.6082401275634766, "learning_rate": 5.148828150505444e-06, "loss": 0.0056, "step": 128250 }, { "epoch": 1.0830254797238816, "grad_norm": 0.1959741860628128, "learning_rate": 5.14809159353855e-06, "loss": 0.0058, "step": 128260 }, { "epoch": 1.0831099195710456, "grad_norm": 0.0422636978328228, "learning_rate": 5.147355033355136e-06, "loss": 0.004, "step": 128270 }, { "epoch": 1.0831943594182094, "grad_norm": 0.4340185225009918, "learning_rate": 5.1466184699712e-06, "loss": 0.012, "step": 128280 }, { "epoch": 1.0832787992653734, "grad_norm": 0.14030419290065765, "learning_rate": 5.1458819034027415e-06, "loss": 0.007, "step": 128290 }, { "epoch": 1.0833632391125372, "grad_norm": 0.25719618797302246, "learning_rate": 5.145145333665759e-06, "loss": 0.0067, "step": 128300 }, { "epoch": 1.0834476789597012, "grad_norm": 0.021014027297496796, "learning_rate": 5.144408760776248e-06, "loss": 0.0062, "step": 128310 }, { "epoch": 1.083532118806865, "grad_norm": 0.25384053587913513, "learning_rate": 5.143672184750211e-06, "loss": 0.0138, "step": 128320 }, { "epoch": 1.083616558654029, "grad_norm": 0.3621293008327484, "learning_rate": 5.14293560560364e-06, "loss": 0.0132, "step": 128330 }, { "epoch": 1.0837009985011927, "grad_norm": 0.3524792790412903, "learning_rate": 5.14219902335254e-06, "loss": 0.0084, "step": 128340 }, { "epoch": 1.0837854383483565, "grad_norm": 0.23900331556797028, "learning_rate": 5.1414624380129055e-06, "loss": 0.007, "step": 128350 }, { "epoch": 1.0838698781955205, "grad_norm": 0.31787094473838806, "learning_rate": 5.140725849600734e-06, "loss": 0.0069, "step": 128360 }, { "epoch": 1.0839543180426843, "grad_norm": 0.1968695968389511, "learning_rate": 5.1399892581320276e-06, "loss": 0.0073, "step": 128370 }, { "epoch": 1.0840387578898483, "grad_norm": 0.20069049298763275, "learning_rate": 5.1392526636227835e-06, "loss": 0.0055, "step": 128380 }, { "epoch": 1.084123197737012, "grad_norm": 0.09916574507951736, "learning_rate": 5.138516066088999e-06, "loss": 0.0041, "step": 128390 }, { "epoch": 1.084207637584176, "grad_norm": 0.5485280752182007, "learning_rate": 5.137779465546674e-06, "loss": 0.0102, "step": 128400 }, { "epoch": 1.0842920774313398, "grad_norm": 0.25919073820114136, "learning_rate": 5.137042862011808e-06, "loss": 0.0092, "step": 128410 }, { "epoch": 1.0843765172785038, "grad_norm": 0.5121563076972961, "learning_rate": 5.136306255500398e-06, "loss": 0.007, "step": 128420 }, { "epoch": 1.0844609571256676, "grad_norm": 0.6961989998817444, "learning_rate": 5.135569646028444e-06, "loss": 0.0125, "step": 128430 }, { "epoch": 1.0845453969728314, "grad_norm": 0.42558953166007996, "learning_rate": 5.134833033611948e-06, "loss": 0.0067, "step": 128440 }, { "epoch": 1.0846298368199954, "grad_norm": 0.5705264806747437, "learning_rate": 5.134096418266903e-06, "loss": 0.0062, "step": 128450 }, { "epoch": 1.0847142766671591, "grad_norm": 0.19480641186237335, "learning_rate": 5.133359800009313e-06, "loss": 0.008, "step": 128460 }, { "epoch": 1.0847987165143231, "grad_norm": 0.43055641651153564, "learning_rate": 5.1326231788551736e-06, "loss": 0.011, "step": 128470 }, { "epoch": 1.084883156361487, "grad_norm": 0.5089156031608582, "learning_rate": 5.131886554820488e-06, "loss": 0.0082, "step": 128480 }, { "epoch": 1.084967596208651, "grad_norm": 0.2849996089935303, "learning_rate": 5.131149927921252e-06, "loss": 0.0074, "step": 128490 }, { "epoch": 1.0850520360558147, "grad_norm": 0.29133862257003784, "learning_rate": 5.130413298173467e-06, "loss": 0.0108, "step": 128500 }, { "epoch": 1.0851364759029787, "grad_norm": 0.17159567773342133, "learning_rate": 5.129676665593132e-06, "loss": 0.0042, "step": 128510 }, { "epoch": 1.0852209157501425, "grad_norm": 0.04156051203608513, "learning_rate": 5.1289400301962455e-06, "loss": 0.0108, "step": 128520 }, { "epoch": 1.0853053555973065, "grad_norm": 0.5550218820571899, "learning_rate": 5.128203391998808e-06, "loss": 0.0077, "step": 128530 }, { "epoch": 1.0853897954444702, "grad_norm": 0.35364073514938354, "learning_rate": 5.127466751016821e-06, "loss": 0.0097, "step": 128540 }, { "epoch": 1.0854742352916342, "grad_norm": 0.07785867154598236, "learning_rate": 5.12673010726628e-06, "loss": 0.0077, "step": 128550 }, { "epoch": 1.085558675138798, "grad_norm": 0.34608495235443115, "learning_rate": 5.125993460763189e-06, "loss": 0.0126, "step": 128560 }, { "epoch": 1.0856431149859618, "grad_norm": 0.09372451901435852, "learning_rate": 5.125256811523544e-06, "loss": 0.0084, "step": 128570 }, { "epoch": 1.0857275548331258, "grad_norm": 0.21779987215995789, "learning_rate": 5.124520159563349e-06, "loss": 0.0057, "step": 128580 }, { "epoch": 1.0858119946802895, "grad_norm": 0.008771440014243126, "learning_rate": 5.123783504898601e-06, "loss": 0.0036, "step": 128590 }, { "epoch": 1.0858964345274535, "grad_norm": 0.5118667483329773, "learning_rate": 5.123046847545299e-06, "loss": 0.01, "step": 128600 }, { "epoch": 1.0859808743746173, "grad_norm": 0.32651859521865845, "learning_rate": 5.122310187519446e-06, "loss": 0.0072, "step": 128610 }, { "epoch": 1.0860653142217813, "grad_norm": 0.2857527434825897, "learning_rate": 5.12157352483704e-06, "loss": 0.0036, "step": 128620 }, { "epoch": 1.086149754068945, "grad_norm": 0.4201042354106903, "learning_rate": 5.120836859514082e-06, "loss": 0.0065, "step": 128630 }, { "epoch": 1.086234193916109, "grad_norm": 0.15275853872299194, "learning_rate": 5.1201001915665726e-06, "loss": 0.0054, "step": 128640 }, { "epoch": 1.0863186337632729, "grad_norm": 0.2502474784851074, "learning_rate": 5.119363521010512e-06, "loss": 0.008, "step": 128650 }, { "epoch": 1.0864030736104369, "grad_norm": 0.3130776286125183, "learning_rate": 5.1186268478619e-06, "loss": 0.0051, "step": 128660 }, { "epoch": 1.0864875134576006, "grad_norm": 0.1691315472126007, "learning_rate": 5.117890172136737e-06, "loss": 0.0062, "step": 128670 }, { "epoch": 1.0865719533047644, "grad_norm": 0.0053212596103549, "learning_rate": 5.117153493851024e-06, "loss": 0.0047, "step": 128680 }, { "epoch": 1.0866563931519284, "grad_norm": 0.25657397508621216, "learning_rate": 5.1164168130207605e-06, "loss": 0.009, "step": 128690 }, { "epoch": 1.0867408329990922, "grad_norm": 0.23477429151535034, "learning_rate": 5.115680129661947e-06, "loss": 0.0052, "step": 128700 }, { "epoch": 1.0868252728462562, "grad_norm": 0.05326471105217934, "learning_rate": 5.114943443790587e-06, "loss": 0.0063, "step": 128710 }, { "epoch": 1.08690971269342, "grad_norm": 0.3527718484401703, "learning_rate": 5.114206755422677e-06, "loss": 0.0085, "step": 128720 }, { "epoch": 1.086994152540584, "grad_norm": 0.29241272807121277, "learning_rate": 5.11347006457422e-06, "loss": 0.0084, "step": 128730 }, { "epoch": 1.0870785923877477, "grad_norm": 0.24361076951026917, "learning_rate": 5.112733371261217e-06, "loss": 0.0069, "step": 128740 }, { "epoch": 1.0871630322349117, "grad_norm": 0.47507259249687195, "learning_rate": 5.111996675499668e-06, "loss": 0.012, "step": 128750 }, { "epoch": 1.0872474720820755, "grad_norm": 1.0335438251495361, "learning_rate": 5.1112599773055736e-06, "loss": 0.0117, "step": 128760 }, { "epoch": 1.0873319119292395, "grad_norm": 0.3054843246936798, "learning_rate": 5.110523276694936e-06, "loss": 0.0127, "step": 128770 }, { "epoch": 1.0874163517764033, "grad_norm": 0.7300265431404114, "learning_rate": 5.1097865736837556e-06, "loss": 0.0103, "step": 128780 }, { "epoch": 1.087500791623567, "grad_norm": 0.3685157895088196, "learning_rate": 5.109049868288033e-06, "loss": 0.0063, "step": 128790 }, { "epoch": 1.087585231470731, "grad_norm": 0.2067112773656845, "learning_rate": 5.1083131605237715e-06, "loss": 0.0061, "step": 128800 }, { "epoch": 1.0876696713178948, "grad_norm": 0.1585688441991806, "learning_rate": 5.107576450406968e-06, "loss": 0.0081, "step": 128810 }, { "epoch": 1.0877541111650588, "grad_norm": 0.273044228553772, "learning_rate": 5.106839737953626e-06, "loss": 0.0058, "step": 128820 }, { "epoch": 1.0878385510122226, "grad_norm": 0.3841927945613861, "learning_rate": 5.106103023179748e-06, "loss": 0.0079, "step": 128830 }, { "epoch": 1.0879229908593866, "grad_norm": 0.06656518578529358, "learning_rate": 5.105366306101333e-06, "loss": 0.0096, "step": 128840 }, { "epoch": 1.0880074307065504, "grad_norm": 0.18809771537780762, "learning_rate": 5.104629586734383e-06, "loss": 0.0042, "step": 128850 }, { "epoch": 1.0880918705537144, "grad_norm": 0.2978881895542145, "learning_rate": 5.1038928650949e-06, "loss": 0.0037, "step": 128860 }, { "epoch": 1.0881763104008781, "grad_norm": 0.5939686298370361, "learning_rate": 5.103156141198887e-06, "loss": 0.0078, "step": 128870 }, { "epoch": 1.0882607502480421, "grad_norm": 0.22296282649040222, "learning_rate": 5.102419415062343e-06, "loss": 0.0097, "step": 128880 }, { "epoch": 1.088345190095206, "grad_norm": 0.06701623648405075, "learning_rate": 5.1016826867012695e-06, "loss": 0.0095, "step": 128890 }, { "epoch": 1.08842962994237, "grad_norm": 0.37012723088264465, "learning_rate": 5.100945956131669e-06, "loss": 0.0124, "step": 128900 }, { "epoch": 1.0885140697895337, "grad_norm": 0.15407392382621765, "learning_rate": 5.100209223369544e-06, "loss": 0.0083, "step": 128910 }, { "epoch": 1.0885985096366975, "grad_norm": 0.24784258008003235, "learning_rate": 5.099472488430895e-06, "loss": 0.0055, "step": 128920 }, { "epoch": 1.0886829494838615, "grad_norm": 0.19956344366073608, "learning_rate": 5.098735751331724e-06, "loss": 0.0091, "step": 128930 }, { "epoch": 1.0887673893310252, "grad_norm": 0.1740753948688507, "learning_rate": 5.097999012088032e-06, "loss": 0.0066, "step": 128940 }, { "epoch": 1.0888518291781892, "grad_norm": 0.3337039649486542, "learning_rate": 5.0972622707158225e-06, "loss": 0.0098, "step": 128950 }, { "epoch": 1.088936269025353, "grad_norm": 0.38645049929618835, "learning_rate": 5.096525527231095e-06, "loss": 0.0077, "step": 128960 }, { "epoch": 1.089020708872517, "grad_norm": 0.17702899873256683, "learning_rate": 5.095788781649853e-06, "loss": 0.0078, "step": 128970 }, { "epoch": 1.0891051487196808, "grad_norm": 0.19204623997211456, "learning_rate": 5.095052033988099e-06, "loss": 0.0072, "step": 128980 }, { "epoch": 1.0891895885668448, "grad_norm": 0.18173901736736298, "learning_rate": 5.0943152842618344e-06, "loss": 0.0071, "step": 128990 }, { "epoch": 1.0892740284140086, "grad_norm": 0.23693512380123138, "learning_rate": 5.093578532487061e-06, "loss": 0.0068, "step": 129000 }, { "epoch": 1.0893584682611723, "grad_norm": 0.30711114406585693, "learning_rate": 5.09284177867978e-06, "loss": 0.0119, "step": 129010 }, { "epoch": 1.0894429081083363, "grad_norm": 0.09535738080739975, "learning_rate": 5.092105022855995e-06, "loss": 0.0059, "step": 129020 }, { "epoch": 1.0895273479555, "grad_norm": 0.2863468527793884, "learning_rate": 5.09136826503171e-06, "loss": 0.0068, "step": 129030 }, { "epoch": 1.089611787802664, "grad_norm": 0.11108958721160889, "learning_rate": 5.090631505222924e-06, "loss": 0.0146, "step": 129040 }, { "epoch": 1.0896962276498279, "grad_norm": 0.17374540865421295, "learning_rate": 5.0898947434456395e-06, "loss": 0.0064, "step": 129050 }, { "epoch": 1.0897806674969919, "grad_norm": 0.1371060013771057, "learning_rate": 5.0891579797158585e-06, "loss": 0.0101, "step": 129060 }, { "epoch": 1.0898651073441556, "grad_norm": 0.9395676255226135, "learning_rate": 5.088421214049587e-06, "loss": 0.0088, "step": 129070 }, { "epoch": 1.0899495471913196, "grad_norm": 0.8372749090194702, "learning_rate": 5.087684446462823e-06, "loss": 0.0122, "step": 129080 }, { "epoch": 1.0900339870384834, "grad_norm": 0.2949647307395935, "learning_rate": 5.086947676971571e-06, "loss": 0.0151, "step": 129090 }, { "epoch": 1.0901184268856474, "grad_norm": 0.24796348810195923, "learning_rate": 5.086210905591833e-06, "loss": 0.0136, "step": 129100 }, { "epoch": 1.0902028667328112, "grad_norm": 0.24016264081001282, "learning_rate": 5.085474132339613e-06, "loss": 0.0083, "step": 129110 }, { "epoch": 1.0902873065799752, "grad_norm": 0.15896442532539368, "learning_rate": 5.084737357230911e-06, "loss": 0.0189, "step": 129120 }, { "epoch": 1.090371746427139, "grad_norm": 0.1385863721370697, "learning_rate": 5.0840005802817316e-06, "loss": 0.0046, "step": 129130 }, { "epoch": 1.0904561862743027, "grad_norm": 1.0474810600280762, "learning_rate": 5.083263801508076e-06, "loss": 0.0154, "step": 129140 }, { "epoch": 1.0905406261214667, "grad_norm": 0.31000930070877075, "learning_rate": 5.0825270209259495e-06, "loss": 0.0071, "step": 129150 }, { "epoch": 1.0906250659686305, "grad_norm": 0.4700790047645569, "learning_rate": 5.081790238551353e-06, "loss": 0.0104, "step": 129160 }, { "epoch": 1.0907095058157945, "grad_norm": 0.0065483697690069675, "learning_rate": 5.081053454400288e-06, "loss": 0.0082, "step": 129170 }, { "epoch": 1.0907939456629583, "grad_norm": 0.1894398182630539, "learning_rate": 5.0803166684887585e-06, "loss": 0.0046, "step": 129180 }, { "epoch": 1.0908783855101223, "grad_norm": 0.4193631112575531, "learning_rate": 5.0795798808327675e-06, "loss": 0.0084, "step": 129190 }, { "epoch": 1.090962825357286, "grad_norm": 0.34833675622940063, "learning_rate": 5.0788430914483186e-06, "loss": 0.0076, "step": 129200 }, { "epoch": 1.09104726520445, "grad_norm": 0.2494286447763443, "learning_rate": 5.078106300351413e-06, "loss": 0.0061, "step": 129210 }, { "epoch": 1.0911317050516138, "grad_norm": 0.09742332249879837, "learning_rate": 5.077369507558055e-06, "loss": 0.0034, "step": 129220 }, { "epoch": 1.0912161448987778, "grad_norm": 0.1581214815378189, "learning_rate": 5.076632713084249e-06, "loss": 0.014, "step": 129230 }, { "epoch": 1.0913005847459416, "grad_norm": 0.3425734043121338, "learning_rate": 5.0758959169459935e-06, "loss": 0.0074, "step": 129240 }, { "epoch": 1.0913850245931056, "grad_norm": 0.3377591371536255, "learning_rate": 5.0751591191592945e-06, "loss": 0.0095, "step": 129250 }, { "epoch": 1.0914694644402694, "grad_norm": 0.4923078119754791, "learning_rate": 5.074422319740155e-06, "loss": 0.0104, "step": 129260 }, { "epoch": 1.0915539042874332, "grad_norm": 0.2545364797115326, "learning_rate": 5.0736855187045785e-06, "loss": 0.0056, "step": 129270 }, { "epoch": 1.0916383441345971, "grad_norm": 0.35522890090942383, "learning_rate": 5.0729487160685684e-06, "loss": 0.0066, "step": 129280 }, { "epoch": 1.091722783981761, "grad_norm": 0.12046940624713898, "learning_rate": 5.0722119118481265e-06, "loss": 0.0047, "step": 129290 }, { "epoch": 1.091807223828925, "grad_norm": 0.3178129196166992, "learning_rate": 5.071475106059256e-06, "loss": 0.0057, "step": 129300 }, { "epoch": 1.0918916636760887, "grad_norm": 0.27693215012550354, "learning_rate": 5.070738298717961e-06, "loss": 0.0058, "step": 129310 }, { "epoch": 1.0919761035232527, "grad_norm": 0.01987803541123867, "learning_rate": 5.070001489840246e-06, "loss": 0.0056, "step": 129320 }, { "epoch": 1.0920605433704165, "grad_norm": 0.24082660675048828, "learning_rate": 5.069264679442111e-06, "loss": 0.0082, "step": 129330 }, { "epoch": 1.0921449832175805, "grad_norm": 0.10576719790697098, "learning_rate": 5.068527867539562e-06, "loss": 0.011, "step": 129340 }, { "epoch": 1.0922294230647442, "grad_norm": 0.3344956338405609, "learning_rate": 5.067791054148602e-06, "loss": 0.013, "step": 129350 }, { "epoch": 1.092313862911908, "grad_norm": 0.1759638786315918, "learning_rate": 5.067054239285235e-06, "loss": 0.0094, "step": 129360 }, { "epoch": 1.092398302759072, "grad_norm": 0.3925757110118866, "learning_rate": 5.0663174229654625e-06, "loss": 0.0065, "step": 129370 }, { "epoch": 1.0924827426062358, "grad_norm": 0.2136458307504654, "learning_rate": 5.0655806052052895e-06, "loss": 0.0056, "step": 129380 }, { "epoch": 1.0925671824533998, "grad_norm": 0.28042739629745483, "learning_rate": 5.064843786020719e-06, "loss": 0.0066, "step": 129390 }, { "epoch": 1.0926516223005636, "grad_norm": 0.6993939876556396, "learning_rate": 5.064106965427756e-06, "loss": 0.0097, "step": 129400 }, { "epoch": 1.0927360621477276, "grad_norm": 0.2373412400484085, "learning_rate": 5.063370143442402e-06, "loss": 0.0147, "step": 129410 }, { "epoch": 1.0928205019948913, "grad_norm": 0.3400644063949585, "learning_rate": 5.062633320080661e-06, "loss": 0.0114, "step": 129420 }, { "epoch": 1.0929049418420553, "grad_norm": 0.07066371291875839, "learning_rate": 5.061896495358537e-06, "loss": 0.011, "step": 129430 }, { "epoch": 1.092989381689219, "grad_norm": 0.11587559431791306, "learning_rate": 5.061159669292035e-06, "loss": 0.0058, "step": 129440 }, { "epoch": 1.093073821536383, "grad_norm": 0.1991853564977646, "learning_rate": 5.060422841897155e-06, "loss": 0.0086, "step": 129450 }, { "epoch": 1.0931582613835469, "grad_norm": 0.2262830287218094, "learning_rate": 5.0596860131899065e-06, "loss": 0.0075, "step": 129460 }, { "epoch": 1.0932427012307109, "grad_norm": 0.5448622703552246, "learning_rate": 5.058949183186287e-06, "loss": 0.0065, "step": 129470 }, { "epoch": 1.0933271410778747, "grad_norm": 0.3529983162879944, "learning_rate": 5.058212351902307e-06, "loss": 0.011, "step": 129480 }, { "epoch": 1.0934115809250384, "grad_norm": 1.2853271961212158, "learning_rate": 5.0574755193539625e-06, "loss": 0.008, "step": 129490 }, { "epoch": 1.0934960207722024, "grad_norm": 0.26711902022361755, "learning_rate": 5.0567386855572645e-06, "loss": 0.0075, "step": 129500 }, { "epoch": 1.0935804606193662, "grad_norm": 0.293811559677124, "learning_rate": 5.056001850528212e-06, "loss": 0.0132, "step": 129510 }, { "epoch": 1.0936649004665302, "grad_norm": 0.22402989864349365, "learning_rate": 5.055265014282813e-06, "loss": 0.0044, "step": 129520 }, { "epoch": 1.093749340313694, "grad_norm": 0.2266267091035843, "learning_rate": 5.054528176837068e-06, "loss": 0.0109, "step": 129530 }, { "epoch": 1.093833780160858, "grad_norm": 0.7904018759727478, "learning_rate": 5.053791338206981e-06, "loss": 0.0064, "step": 129540 }, { "epoch": 1.0939182200080217, "grad_norm": 0.3049246072769165, "learning_rate": 5.0530544984085586e-06, "loss": 0.0101, "step": 129550 }, { "epoch": 1.0940026598551857, "grad_norm": 0.2164561152458191, "learning_rate": 5.052317657457803e-06, "loss": 0.0075, "step": 129560 }, { "epoch": 1.0940870997023495, "grad_norm": 0.7460897564888, "learning_rate": 5.051580815370719e-06, "loss": 0.0148, "step": 129570 }, { "epoch": 1.0941715395495135, "grad_norm": 0.20545236766338348, "learning_rate": 5.0508439721633095e-06, "loss": 0.005, "step": 129580 }, { "epoch": 1.0942559793966773, "grad_norm": 0.17542147636413574, "learning_rate": 5.050107127851579e-06, "loss": 0.0048, "step": 129590 }, { "epoch": 1.094340419243841, "grad_norm": 0.10691875219345093, "learning_rate": 5.049370282451534e-06, "loss": 0.0096, "step": 129600 }, { "epoch": 1.094424859091005, "grad_norm": 0.4602692127227783, "learning_rate": 5.048633435979175e-06, "loss": 0.0122, "step": 129610 }, { "epoch": 1.0945092989381688, "grad_norm": 0.04921601340174675, "learning_rate": 5.047896588450508e-06, "loss": 0.0102, "step": 129620 }, { "epoch": 1.0945937387853328, "grad_norm": 0.5447950959205627, "learning_rate": 5.047159739881536e-06, "loss": 0.0085, "step": 129630 }, { "epoch": 1.0946781786324966, "grad_norm": 0.2379317283630371, "learning_rate": 5.046422890288265e-06, "loss": 0.008, "step": 129640 }, { "epoch": 1.0947626184796606, "grad_norm": 0.40355244278907776, "learning_rate": 5.0456860396866995e-06, "loss": 0.0058, "step": 129650 }, { "epoch": 1.0948470583268244, "grad_norm": 0.605311393737793, "learning_rate": 5.044949188092841e-06, "loss": 0.0081, "step": 129660 }, { "epoch": 1.0949314981739884, "grad_norm": 0.36310359835624695, "learning_rate": 5.044212335522695e-06, "loss": 0.0082, "step": 129670 }, { "epoch": 1.0950159380211522, "grad_norm": 0.2789471745491028, "learning_rate": 5.043475481992267e-06, "loss": 0.0053, "step": 129680 }, { "epoch": 1.0951003778683162, "grad_norm": 0.13046300411224365, "learning_rate": 5.0427386275175605e-06, "loss": 0.008, "step": 129690 }, { "epoch": 1.09518481771548, "grad_norm": 0.46423855423927307, "learning_rate": 5.04200177211458e-06, "loss": 0.0068, "step": 129700 }, { "epoch": 1.0952692575626437, "grad_norm": 0.03024335205554962, "learning_rate": 5.0412649157993274e-06, "loss": 0.007, "step": 129710 }, { "epoch": 1.0953536974098077, "grad_norm": 0.19697928428649902, "learning_rate": 5.040528058587812e-06, "loss": 0.0117, "step": 129720 }, { "epoch": 1.0954381372569715, "grad_norm": 0.31699883937835693, "learning_rate": 5.039791200496035e-06, "loss": 0.0058, "step": 129730 }, { "epoch": 1.0955225771041355, "grad_norm": 0.4854966104030609, "learning_rate": 5.039054341540001e-06, "loss": 0.0059, "step": 129740 }, { "epoch": 1.0956070169512993, "grad_norm": 0.14015895128250122, "learning_rate": 5.038317481735715e-06, "loss": 0.0102, "step": 129750 }, { "epoch": 1.0956914567984632, "grad_norm": 0.31511035561561584, "learning_rate": 5.03758062109918e-06, "loss": 0.0071, "step": 129760 }, { "epoch": 1.095775896645627, "grad_norm": 0.1039421483874321, "learning_rate": 5.036843759646403e-06, "loss": 0.0126, "step": 129770 }, { "epoch": 1.095860336492791, "grad_norm": 0.17427407205104828, "learning_rate": 5.036106897393386e-06, "loss": 0.0095, "step": 129780 }, { "epoch": 1.0959447763399548, "grad_norm": 0.20082788169384003, "learning_rate": 5.035370034356134e-06, "loss": 0.0071, "step": 129790 }, { "epoch": 1.0960292161871188, "grad_norm": 0.4964544475078583, "learning_rate": 5.0346331705506535e-06, "loss": 0.0115, "step": 129800 }, { "epoch": 1.0961136560342826, "grad_norm": 0.1098269447684288, "learning_rate": 5.033896305992948e-06, "loss": 0.0108, "step": 129810 }, { "epoch": 1.0961980958814466, "grad_norm": 0.3354928195476532, "learning_rate": 5.0331594406990205e-06, "loss": 0.0058, "step": 129820 }, { "epoch": 1.0962825357286103, "grad_norm": 0.24754847586154938, "learning_rate": 5.032422574684876e-06, "loss": 0.0101, "step": 129830 }, { "epoch": 1.0963669755757741, "grad_norm": 0.0942123681306839, "learning_rate": 5.031685707966523e-06, "loss": 0.0071, "step": 129840 }, { "epoch": 1.0964514154229381, "grad_norm": 0.26011016964912415, "learning_rate": 5.03094884055996e-06, "loss": 0.0128, "step": 129850 }, { "epoch": 1.096535855270102, "grad_norm": 0.29553312063217163, "learning_rate": 5.0302119724811955e-06, "loss": 0.0103, "step": 129860 }, { "epoch": 1.0966202951172659, "grad_norm": 0.2310062199831009, "learning_rate": 5.029475103746233e-06, "loss": 0.0089, "step": 129870 }, { "epoch": 1.0967047349644297, "grad_norm": 0.22843840718269348, "learning_rate": 5.028738234371077e-06, "loss": 0.0081, "step": 129880 }, { "epoch": 1.0967891748115937, "grad_norm": 0.2071606069803238, "learning_rate": 5.028001364371734e-06, "loss": 0.0102, "step": 129890 }, { "epoch": 1.0968736146587574, "grad_norm": 0.8776291012763977, "learning_rate": 5.027264493764205e-06, "loss": 0.0111, "step": 129900 }, { "epoch": 1.0969580545059214, "grad_norm": 0.11192486435174942, "learning_rate": 5.026527622564498e-06, "loss": 0.0106, "step": 129910 }, { "epoch": 1.0970424943530852, "grad_norm": 0.1242142990231514, "learning_rate": 5.025790750788615e-06, "loss": 0.0071, "step": 129920 }, { "epoch": 1.097126934200249, "grad_norm": 0.05530970171093941, "learning_rate": 5.025053878452564e-06, "loss": 0.0093, "step": 129930 }, { "epoch": 1.097211374047413, "grad_norm": 0.26345276832580566, "learning_rate": 5.024317005572347e-06, "loss": 0.0063, "step": 129940 }, { "epoch": 1.0972958138945768, "grad_norm": 0.26998665928840637, "learning_rate": 5.0235801321639695e-06, "loss": 0.0035, "step": 129950 }, { "epoch": 1.0973802537417408, "grad_norm": 0.07372342795133591, "learning_rate": 5.022843258243436e-06, "loss": 0.0071, "step": 129960 }, { "epoch": 1.0974646935889045, "grad_norm": 0.16403992474079132, "learning_rate": 5.022106383826754e-06, "loss": 0.0067, "step": 129970 }, { "epoch": 1.0975491334360685, "grad_norm": 0.014436335302889347, "learning_rate": 5.021369508929924e-06, "loss": 0.0173, "step": 129980 }, { "epoch": 1.0976335732832323, "grad_norm": 0.2552914023399353, "learning_rate": 5.020632633568953e-06, "loss": 0.0063, "step": 129990 }, { "epoch": 1.0977180131303963, "grad_norm": 0.37872251868247986, "learning_rate": 5.019895757759845e-06, "loss": 0.0091, "step": 130000 }, { "epoch": 1.09780245297756, "grad_norm": 0.5382055044174194, "learning_rate": 5.019158881518606e-06, "loss": 0.0126, "step": 130010 }, { "epoch": 1.097886892824724, "grad_norm": 0.26268506050109863, "learning_rate": 5.018422004861238e-06, "loss": 0.0065, "step": 130020 }, { "epoch": 1.0979713326718878, "grad_norm": 0.30298393964767456, "learning_rate": 5.01768512780375e-06, "loss": 0.0079, "step": 130030 }, { "epoch": 1.0980557725190518, "grad_norm": 0.18549887835979462, "learning_rate": 5.0169482503621435e-06, "loss": 0.0072, "step": 130040 }, { "epoch": 1.0981402123662156, "grad_norm": 0.1572703719139099, "learning_rate": 5.016211372552425e-06, "loss": 0.0124, "step": 130050 }, { "epoch": 1.0982246522133794, "grad_norm": 0.5783292055130005, "learning_rate": 5.015474494390599e-06, "loss": 0.0093, "step": 130060 }, { "epoch": 1.0983090920605434, "grad_norm": 0.20164616405963898, "learning_rate": 5.01473761589267e-06, "loss": 0.0106, "step": 130070 }, { "epoch": 1.0983935319077072, "grad_norm": 0.2639312446117401, "learning_rate": 5.0140007370746425e-06, "loss": 0.0106, "step": 130080 }, { "epoch": 1.0984779717548712, "grad_norm": 0.1974884569644928, "learning_rate": 5.013263857952523e-06, "loss": 0.0052, "step": 130090 }, { "epoch": 1.098562411602035, "grad_norm": 0.3801555633544922, "learning_rate": 5.012526978542315e-06, "loss": 0.0075, "step": 130100 }, { "epoch": 1.098646851449199, "grad_norm": 0.6668351292610168, "learning_rate": 5.0117900988600225e-06, "loss": 0.0102, "step": 130110 }, { "epoch": 1.0987312912963627, "grad_norm": 0.34912362694740295, "learning_rate": 5.011053218921652e-06, "loss": 0.0059, "step": 130120 }, { "epoch": 1.0988157311435267, "grad_norm": 0.15523386001586914, "learning_rate": 5.010316338743209e-06, "loss": 0.0097, "step": 130130 }, { "epoch": 1.0989001709906905, "grad_norm": 0.25371649861335754, "learning_rate": 5.009579458340695e-06, "loss": 0.0075, "step": 130140 }, { "epoch": 1.0989846108378545, "grad_norm": 0.09595739841461182, "learning_rate": 5.008842577730118e-06, "loss": 0.0046, "step": 130150 }, { "epoch": 1.0990690506850183, "grad_norm": 0.1157575473189354, "learning_rate": 5.008105696927482e-06, "loss": 0.0046, "step": 130160 }, { "epoch": 1.0991534905321823, "grad_norm": 0.4913727641105652, "learning_rate": 5.0073688159487935e-06, "loss": 0.0176, "step": 130170 }, { "epoch": 1.099237930379346, "grad_norm": 0.24254634976387024, "learning_rate": 5.006631934810054e-06, "loss": 0.0128, "step": 130180 }, { "epoch": 1.0993223702265098, "grad_norm": 0.169822096824646, "learning_rate": 5.00589505352727e-06, "loss": 0.0125, "step": 130190 }, { "epoch": 1.0994068100736738, "grad_norm": 0.914704442024231, "learning_rate": 5.005158172116447e-06, "loss": 0.008, "step": 130200 }, { "epoch": 1.0994912499208376, "grad_norm": 0.22400493919849396, "learning_rate": 5.00442129059359e-06, "loss": 0.0054, "step": 130210 }, { "epoch": 1.0995756897680016, "grad_norm": 0.2524343729019165, "learning_rate": 5.003684408974705e-06, "loss": 0.0065, "step": 130220 }, { "epoch": 1.0996601296151653, "grad_norm": 0.20699256658554077, "learning_rate": 5.002947527275793e-06, "loss": 0.0077, "step": 130230 }, { "epoch": 1.0997445694623293, "grad_norm": 0.4422054588794708, "learning_rate": 5.002210645512861e-06, "loss": 0.0066, "step": 130240 }, { "epoch": 1.0998290093094931, "grad_norm": 0.1709468811750412, "learning_rate": 5.001473763701916e-06, "loss": 0.0079, "step": 130250 }, { "epoch": 1.0999134491566571, "grad_norm": 0.2550484538078308, "learning_rate": 5.000736881858962e-06, "loss": 0.0047, "step": 130260 }, { "epoch": 1.099997889003821, "grad_norm": 0.1315620243549347, "learning_rate": 5e-06, "loss": 0.0092, "step": 130270 }, { "epoch": 1.1000823288509847, "grad_norm": 0.0789211094379425, "learning_rate": 4.999263118141041e-06, "loss": 0.007, "step": 130280 }, { "epoch": 1.1001667686981487, "grad_norm": 0.33725664019584656, "learning_rate": 4.998526236298085e-06, "loss": 0.0082, "step": 130290 }, { "epoch": 1.1002512085453124, "grad_norm": 0.03699590638279915, "learning_rate": 4.9977893544871405e-06, "loss": 0.0062, "step": 130300 }, { "epoch": 1.1003356483924764, "grad_norm": 0.09664225578308105, "learning_rate": 4.9970524727242085e-06, "loss": 0.0116, "step": 130310 }, { "epoch": 1.1004200882396402, "grad_norm": 0.5953763723373413, "learning_rate": 4.996315591025298e-06, "loss": 0.0116, "step": 130320 }, { "epoch": 1.1005045280868042, "grad_norm": 0.14447557926177979, "learning_rate": 4.9955787094064105e-06, "loss": 0.0085, "step": 130330 }, { "epoch": 1.100588967933968, "grad_norm": 0.5443602204322815, "learning_rate": 4.994841827883554e-06, "loss": 0.0152, "step": 130340 }, { "epoch": 1.100673407781132, "grad_norm": 0.5122836828231812, "learning_rate": 4.99410494647273e-06, "loss": 0.0148, "step": 130350 }, { "epoch": 1.1007578476282958, "grad_norm": 0.3961171507835388, "learning_rate": 4.993368065189948e-06, "loss": 0.007, "step": 130360 }, { "epoch": 1.1008422874754598, "grad_norm": 0.13030019402503967, "learning_rate": 4.992631184051209e-06, "loss": 0.0061, "step": 130370 }, { "epoch": 1.1009267273226235, "grad_norm": 0.5788965225219727, "learning_rate": 4.991894303072519e-06, "loss": 0.0084, "step": 130380 }, { "epoch": 1.1010111671697875, "grad_norm": 0.4323064684867859, "learning_rate": 4.991157422269885e-06, "loss": 0.0106, "step": 130390 }, { "epoch": 1.1010956070169513, "grad_norm": 0.21156679093837738, "learning_rate": 4.990420541659306e-06, "loss": 0.0051, "step": 130400 }, { "epoch": 1.101180046864115, "grad_norm": 0.25101637840270996, "learning_rate": 4.989683661256795e-06, "loss": 0.0063, "step": 130410 }, { "epoch": 1.101264486711279, "grad_norm": 0.07170533388853073, "learning_rate": 4.98894678107835e-06, "loss": 0.0038, "step": 130420 }, { "epoch": 1.1013489265584429, "grad_norm": 0.18616808950901031, "learning_rate": 4.98820990113998e-06, "loss": 0.0061, "step": 130430 }, { "epoch": 1.1014333664056069, "grad_norm": 0.22985787689685822, "learning_rate": 4.987473021457687e-06, "loss": 0.0072, "step": 130440 }, { "epoch": 1.1015178062527706, "grad_norm": 0.15908189117908478, "learning_rate": 4.986736142047479e-06, "loss": 0.0066, "step": 130450 }, { "epoch": 1.1016022460999346, "grad_norm": 1.2631473541259766, "learning_rate": 4.985999262925358e-06, "loss": 0.0071, "step": 130460 }, { "epoch": 1.1016866859470984, "grad_norm": 0.25836271047592163, "learning_rate": 4.985262384107332e-06, "loss": 0.006, "step": 130470 }, { "epoch": 1.1017711257942624, "grad_norm": 0.21222907304763794, "learning_rate": 4.984525505609401e-06, "loss": 0.0053, "step": 130480 }, { "epoch": 1.1018555656414262, "grad_norm": 0.3476930856704712, "learning_rate": 4.983788627447576e-06, "loss": 0.0107, "step": 130490 }, { "epoch": 1.1019400054885902, "grad_norm": 0.42927730083465576, "learning_rate": 4.9830517496378564e-06, "loss": 0.0098, "step": 130500 }, { "epoch": 1.102024445335754, "grad_norm": 0.18440580368041992, "learning_rate": 4.982314872196252e-06, "loss": 0.0068, "step": 130510 }, { "epoch": 1.1021088851829177, "grad_norm": 0.2991913855075836, "learning_rate": 4.981577995138764e-06, "loss": 0.0055, "step": 130520 }, { "epoch": 1.1021933250300817, "grad_norm": 0.32120347023010254, "learning_rate": 4.980841118481395e-06, "loss": 0.0146, "step": 130530 }, { "epoch": 1.1022777648772455, "grad_norm": 0.23299026489257812, "learning_rate": 4.980104242240157e-06, "loss": 0.0102, "step": 130540 }, { "epoch": 1.1023622047244095, "grad_norm": 0.4348089098930359, "learning_rate": 4.979367366431049e-06, "loss": 0.0105, "step": 130550 }, { "epoch": 1.1024466445715733, "grad_norm": 0.3058704733848572, "learning_rate": 4.978630491070078e-06, "loss": 0.014, "step": 130560 }, { "epoch": 1.1025310844187373, "grad_norm": 0.07375774532556534, "learning_rate": 4.977893616173247e-06, "loss": 0.0128, "step": 130570 }, { "epoch": 1.102615524265901, "grad_norm": 0.4170321822166443, "learning_rate": 4.977156741756566e-06, "loss": 0.0115, "step": 130580 }, { "epoch": 1.102699964113065, "grad_norm": 0.49866995215415955, "learning_rate": 4.976419867836031e-06, "loss": 0.0094, "step": 130590 }, { "epoch": 1.1027844039602288, "grad_norm": 0.6139862537384033, "learning_rate": 4.975682994427655e-06, "loss": 0.0098, "step": 130600 }, { "epoch": 1.1028688438073928, "grad_norm": 0.35723450779914856, "learning_rate": 4.974946121547437e-06, "loss": 0.0102, "step": 130610 }, { "epoch": 1.1029532836545566, "grad_norm": 0.10955584794282913, "learning_rate": 4.974209249211386e-06, "loss": 0.0093, "step": 130620 }, { "epoch": 1.1030377235017204, "grad_norm": 0.0895589292049408, "learning_rate": 4.973472377435505e-06, "loss": 0.0062, "step": 130630 }, { "epoch": 1.1031221633488844, "grad_norm": 0.40993380546569824, "learning_rate": 4.972735506235796e-06, "loss": 0.0088, "step": 130640 }, { "epoch": 1.1032066031960481, "grad_norm": 0.06929556280374527, "learning_rate": 4.971998635628269e-06, "loss": 0.0046, "step": 130650 }, { "epoch": 1.1032910430432121, "grad_norm": 0.5396120548248291, "learning_rate": 4.971261765628925e-06, "loss": 0.0164, "step": 130660 }, { "epoch": 1.103375482890376, "grad_norm": 0.5386308431625366, "learning_rate": 4.970524896253769e-06, "loss": 0.0068, "step": 130670 }, { "epoch": 1.10345992273754, "grad_norm": 0.1476498246192932, "learning_rate": 4.969788027518805e-06, "loss": 0.0066, "step": 130680 }, { "epoch": 1.1035443625847037, "grad_norm": 0.15460869669914246, "learning_rate": 4.969051159440041e-06, "loss": 0.0057, "step": 130690 }, { "epoch": 1.1036288024318677, "grad_norm": 0.1046423614025116, "learning_rate": 4.968314292033479e-06, "loss": 0.009, "step": 130700 }, { "epoch": 1.1037132422790314, "grad_norm": 0.18778423964977264, "learning_rate": 4.9675774253151245e-06, "loss": 0.0071, "step": 130710 }, { "epoch": 1.1037976821261954, "grad_norm": 0.2872878611087799, "learning_rate": 4.96684055930098e-06, "loss": 0.0095, "step": 130720 }, { "epoch": 1.1038821219733592, "grad_norm": 0.33234140276908875, "learning_rate": 4.966103694007054e-06, "loss": 0.0128, "step": 130730 }, { "epoch": 1.1039665618205232, "grad_norm": 0.10496164858341217, "learning_rate": 4.965366829449347e-06, "loss": 0.0152, "step": 130740 }, { "epoch": 1.104051001667687, "grad_norm": 0.31693536043167114, "learning_rate": 4.964629965643867e-06, "loss": 0.008, "step": 130750 }, { "epoch": 1.1041354415148508, "grad_norm": 0.19488804042339325, "learning_rate": 4.963893102606617e-06, "loss": 0.0082, "step": 130760 }, { "epoch": 1.1042198813620148, "grad_norm": 0.5347636938095093, "learning_rate": 4.963156240353599e-06, "loss": 0.0062, "step": 130770 }, { "epoch": 1.1043043212091785, "grad_norm": 0.43340158462524414, "learning_rate": 4.962419378900822e-06, "loss": 0.0089, "step": 130780 }, { "epoch": 1.1043887610563425, "grad_norm": 0.418270081281662, "learning_rate": 4.961682518264287e-06, "loss": 0.0149, "step": 130790 }, { "epoch": 1.1044732009035063, "grad_norm": 0.07572785019874573, "learning_rate": 4.960945658460001e-06, "loss": 0.0058, "step": 130800 }, { "epoch": 1.1045576407506703, "grad_norm": 0.19622808694839478, "learning_rate": 4.960208799503966e-06, "loss": 0.0056, "step": 130810 }, { "epoch": 1.104642080597834, "grad_norm": 0.33139750361442566, "learning_rate": 4.9594719414121886e-06, "loss": 0.0082, "step": 130820 }, { "epoch": 1.104726520444998, "grad_norm": 0.13477838039398193, "learning_rate": 4.958735084200672e-06, "loss": 0.0047, "step": 130830 }, { "epoch": 1.1048109602921619, "grad_norm": 0.21234944462776184, "learning_rate": 4.957998227885422e-06, "loss": 0.0089, "step": 130840 }, { "epoch": 1.1048954001393256, "grad_norm": 0.36589863896369934, "learning_rate": 4.9572613724824395e-06, "loss": 0.007, "step": 130850 }, { "epoch": 1.1049798399864896, "grad_norm": 0.3129345774650574, "learning_rate": 4.9565245180077335e-06, "loss": 0.0075, "step": 130860 }, { "epoch": 1.1050642798336534, "grad_norm": 0.3917787969112396, "learning_rate": 4.9557876644773075e-06, "loss": 0.0084, "step": 130870 }, { "epoch": 1.1051487196808174, "grad_norm": 1.148081660270691, "learning_rate": 4.95505081190716e-06, "loss": 0.0081, "step": 130880 }, { "epoch": 1.1052331595279812, "grad_norm": 0.03352438285946846, "learning_rate": 4.954313960313303e-06, "loss": 0.0112, "step": 130890 }, { "epoch": 1.1053175993751452, "grad_norm": 0.16251450777053833, "learning_rate": 4.9535771097117366e-06, "loss": 0.008, "step": 130900 }, { "epoch": 1.105402039222309, "grad_norm": 0.6035736203193665, "learning_rate": 4.952840260118466e-06, "loss": 0.0064, "step": 130910 }, { "epoch": 1.105486479069473, "grad_norm": 0.6316762566566467, "learning_rate": 4.952103411549493e-06, "loss": 0.0049, "step": 130920 }, { "epoch": 1.1055709189166367, "grad_norm": 0.4288620352745056, "learning_rate": 4.951366564020827e-06, "loss": 0.0123, "step": 130930 }, { "epoch": 1.1056553587638007, "grad_norm": 0.18362465500831604, "learning_rate": 4.950629717548468e-06, "loss": 0.0094, "step": 130940 }, { "epoch": 1.1057397986109645, "grad_norm": 1.2917298078536987, "learning_rate": 4.949892872148422e-06, "loss": 0.0084, "step": 130950 }, { "epoch": 1.1058242384581285, "grad_norm": 0.24382150173187256, "learning_rate": 4.949156027836691e-06, "loss": 0.0071, "step": 130960 }, { "epoch": 1.1059086783052923, "grad_norm": 0.29223668575286865, "learning_rate": 4.948419184629283e-06, "loss": 0.0064, "step": 130970 }, { "epoch": 1.105993118152456, "grad_norm": 0.6720105409622192, "learning_rate": 4.947682342542199e-06, "loss": 0.0056, "step": 130980 }, { "epoch": 1.10607755799962, "grad_norm": 0.041297174990177155, "learning_rate": 4.946945501591443e-06, "loss": 0.0045, "step": 130990 }, { "epoch": 1.1061619978467838, "grad_norm": 0.2556946873664856, "learning_rate": 4.946208661793021e-06, "loss": 0.005, "step": 131000 }, { "epoch": 1.1062464376939478, "grad_norm": 0.40171539783477783, "learning_rate": 4.945471823162933e-06, "loss": 0.0062, "step": 131010 }, { "epoch": 1.1063308775411116, "grad_norm": 0.2853942811489105, "learning_rate": 4.944734985717189e-06, "loss": 0.0057, "step": 131020 }, { "epoch": 1.1064153173882756, "grad_norm": 0.03312554582953453, "learning_rate": 4.9439981494717884e-06, "loss": 0.0057, "step": 131030 }, { "epoch": 1.1064997572354394, "grad_norm": 0.12444009631872177, "learning_rate": 4.943261314442738e-06, "loss": 0.0149, "step": 131040 }, { "epoch": 1.1065841970826034, "grad_norm": 0.11435376107692719, "learning_rate": 4.942524480646037e-06, "loss": 0.007, "step": 131050 }, { "epoch": 1.1066686369297671, "grad_norm": 0.9249273538589478, "learning_rate": 4.941787648097696e-06, "loss": 0.0171, "step": 131060 }, { "epoch": 1.1067530767769311, "grad_norm": 0.5802093148231506, "learning_rate": 4.941050816813713e-06, "loss": 0.009, "step": 131070 }, { "epoch": 1.106837516624095, "grad_norm": 0.22501029074192047, "learning_rate": 4.940313986810096e-06, "loss": 0.0069, "step": 131080 }, { "epoch": 1.1069219564712587, "grad_norm": 0.05803486704826355, "learning_rate": 4.939577158102844e-06, "loss": 0.0056, "step": 131090 }, { "epoch": 1.1070063963184227, "grad_norm": 0.11707675457000732, "learning_rate": 4.938840330707967e-06, "loss": 0.0112, "step": 131100 }, { "epoch": 1.1070908361655865, "grad_norm": 0.14734023809432983, "learning_rate": 4.938103504641465e-06, "loss": 0.0066, "step": 131110 }, { "epoch": 1.1071752760127505, "grad_norm": 0.5417980551719666, "learning_rate": 4.937366679919341e-06, "loss": 0.0047, "step": 131120 }, { "epoch": 1.1072597158599142, "grad_norm": 0.13608239591121674, "learning_rate": 4.936629856557601e-06, "loss": 0.0044, "step": 131130 }, { "epoch": 1.1073441557070782, "grad_norm": 0.2688097059726715, "learning_rate": 4.935893034572246e-06, "loss": 0.01, "step": 131140 }, { "epoch": 1.107428595554242, "grad_norm": 0.2780236601829529, "learning_rate": 4.935156213979282e-06, "loss": 0.0062, "step": 131150 }, { "epoch": 1.107513035401406, "grad_norm": 0.19276219606399536, "learning_rate": 4.934419394794711e-06, "loss": 0.005, "step": 131160 }, { "epoch": 1.1075974752485698, "grad_norm": 0.005248685367405415, "learning_rate": 4.933682577034539e-06, "loss": 0.0044, "step": 131170 }, { "epoch": 1.1076819150957338, "grad_norm": 0.25562557578086853, "learning_rate": 4.932945760714766e-06, "loss": 0.0126, "step": 131180 }, { "epoch": 1.1077663549428975, "grad_norm": 0.21003100275993347, "learning_rate": 4.9322089458514e-06, "loss": 0.012, "step": 131190 }, { "epoch": 1.1078507947900613, "grad_norm": 0.010701003484427929, "learning_rate": 4.931472132460438e-06, "loss": 0.0111, "step": 131200 }, { "epoch": 1.1079352346372253, "grad_norm": 0.21201878786087036, "learning_rate": 4.93073532055789e-06, "loss": 0.0088, "step": 131210 }, { "epoch": 1.108019674484389, "grad_norm": 0.08305256068706512, "learning_rate": 4.929998510159757e-06, "loss": 0.0061, "step": 131220 }, { "epoch": 1.108104114331553, "grad_norm": 0.24608294665813446, "learning_rate": 4.92926170128204e-06, "loss": 0.0066, "step": 131230 }, { "epoch": 1.1081885541787169, "grad_norm": 0.5575785636901855, "learning_rate": 4.928524893940747e-06, "loss": 0.0084, "step": 131240 }, { "epoch": 1.1082729940258809, "grad_norm": 0.11920837312936783, "learning_rate": 4.927788088151875e-06, "loss": 0.0041, "step": 131250 }, { "epoch": 1.1083574338730446, "grad_norm": 0.15164655447006226, "learning_rate": 4.927051283931434e-06, "loss": 0.005, "step": 131260 }, { "epoch": 1.1084418737202086, "grad_norm": 0.6850920915603638, "learning_rate": 4.926314481295422e-06, "loss": 0.012, "step": 131270 }, { "epoch": 1.1085263135673724, "grad_norm": 0.1809762418270111, "learning_rate": 4.925577680259846e-06, "loss": 0.0093, "step": 131280 }, { "epoch": 1.1086107534145364, "grad_norm": 0.2094930112361908, "learning_rate": 4.924840880840706e-06, "loss": 0.0037, "step": 131290 }, { "epoch": 1.1086951932617002, "grad_norm": 0.2724420130252838, "learning_rate": 4.924104083054008e-06, "loss": 0.0048, "step": 131300 }, { "epoch": 1.1087796331088642, "grad_norm": 0.3690789043903351, "learning_rate": 4.923367286915753e-06, "loss": 0.0059, "step": 131310 }, { "epoch": 1.108864072956028, "grad_norm": 0.3582022488117218, "learning_rate": 4.922630492441946e-06, "loss": 0.0066, "step": 131320 }, { "epoch": 1.1089485128031917, "grad_norm": 0.3067670166492462, "learning_rate": 4.921893699648587e-06, "loss": 0.0067, "step": 131330 }, { "epoch": 1.1090329526503557, "grad_norm": 0.20629991590976715, "learning_rate": 4.921156908551682e-06, "loss": 0.005, "step": 131340 }, { "epoch": 1.1091173924975195, "grad_norm": 0.04203841835260391, "learning_rate": 4.920420119167234e-06, "loss": 0.0048, "step": 131350 }, { "epoch": 1.1092018323446835, "grad_norm": 0.2109525501728058, "learning_rate": 4.919683331511243e-06, "loss": 0.0099, "step": 131360 }, { "epoch": 1.1092862721918473, "grad_norm": 0.13536158204078674, "learning_rate": 4.918946545599714e-06, "loss": 0.011, "step": 131370 }, { "epoch": 1.1093707120390113, "grad_norm": 0.7781888246536255, "learning_rate": 4.918209761448649e-06, "loss": 0.0113, "step": 131380 }, { "epoch": 1.109455151886175, "grad_norm": 0.13043753802776337, "learning_rate": 4.917472979074051e-06, "loss": 0.0079, "step": 131390 }, { "epoch": 1.109539591733339, "grad_norm": 0.2967422306537628, "learning_rate": 4.916736198491924e-06, "loss": 0.0067, "step": 131400 }, { "epoch": 1.1096240315805028, "grad_norm": 0.848471999168396, "learning_rate": 4.91599941971827e-06, "loss": 0.0104, "step": 131410 }, { "epoch": 1.1097084714276666, "grad_norm": 0.11420122534036636, "learning_rate": 4.915262642769089e-06, "loss": 0.0104, "step": 131420 }, { "epoch": 1.1097929112748306, "grad_norm": 0.2764261066913605, "learning_rate": 4.9145258676603884e-06, "loss": 0.0112, "step": 131430 }, { "epoch": 1.1098773511219944, "grad_norm": 0.255809485912323, "learning_rate": 4.913789094408167e-06, "loss": 0.0083, "step": 131440 }, { "epoch": 1.1099617909691584, "grad_norm": 0.003535608761012554, "learning_rate": 4.913052323028431e-06, "loss": 0.0064, "step": 131450 }, { "epoch": 1.1100462308163221, "grad_norm": 0.4651848077774048, "learning_rate": 4.912315553537179e-06, "loss": 0.0076, "step": 131460 }, { "epoch": 1.1101306706634861, "grad_norm": 0.10559873282909393, "learning_rate": 4.911578785950415e-06, "loss": 0.0047, "step": 131470 }, { "epoch": 1.11021511051065, "grad_norm": 0.050100333988666534, "learning_rate": 4.910842020284144e-06, "loss": 0.0056, "step": 131480 }, { "epoch": 1.110299550357814, "grad_norm": 0.49538499116897583, "learning_rate": 4.910105256554363e-06, "loss": 0.0074, "step": 131490 }, { "epoch": 1.1103839902049777, "grad_norm": 0.25296029448509216, "learning_rate": 4.909368494777078e-06, "loss": 0.0096, "step": 131500 }, { "epoch": 1.1104684300521417, "grad_norm": 0.8582233786582947, "learning_rate": 4.9086317349682925e-06, "loss": 0.0108, "step": 131510 }, { "epoch": 1.1105528698993055, "grad_norm": 0.3830145001411438, "learning_rate": 4.907894977144006e-06, "loss": 0.0077, "step": 131520 }, { "epoch": 1.1106373097464695, "grad_norm": 0.12175925821065903, "learning_rate": 4.90715822132022e-06, "loss": 0.0063, "step": 131530 }, { "epoch": 1.1107217495936332, "grad_norm": 0.24138496816158295, "learning_rate": 4.9064214675129405e-06, "loss": 0.0141, "step": 131540 }, { "epoch": 1.110806189440797, "grad_norm": 0.45965561270713806, "learning_rate": 4.905684715738166e-06, "loss": 0.0083, "step": 131550 }, { "epoch": 1.110890629287961, "grad_norm": 0.09754671156406403, "learning_rate": 4.9049479660119025e-06, "loss": 0.0049, "step": 131560 }, { "epoch": 1.1109750691351248, "grad_norm": 0.3078976273536682, "learning_rate": 4.9042112183501476e-06, "loss": 0.0079, "step": 131570 }, { "epoch": 1.1110595089822888, "grad_norm": 0.2477305680513382, "learning_rate": 4.903474472768906e-06, "loss": 0.0104, "step": 131580 }, { "epoch": 1.1111439488294526, "grad_norm": 0.3209000527858734, "learning_rate": 4.902737729284181e-06, "loss": 0.008, "step": 131590 }, { "epoch": 1.1112283886766166, "grad_norm": 0.09239310771226883, "learning_rate": 4.902000987911969e-06, "loss": 0.0076, "step": 131600 }, { "epoch": 1.1113128285237803, "grad_norm": 0.24421890079975128, "learning_rate": 4.901264248668278e-06, "loss": 0.0095, "step": 131610 }, { "epoch": 1.1113972683709443, "grad_norm": 0.2931644320487976, "learning_rate": 4.900527511569106e-06, "loss": 0.0092, "step": 131620 }, { "epoch": 1.111481708218108, "grad_norm": 0.14584749937057495, "learning_rate": 4.899790776630457e-06, "loss": 0.0037, "step": 131630 }, { "epoch": 1.111566148065272, "grad_norm": 0.11677467077970505, "learning_rate": 4.899054043868331e-06, "loss": 0.0066, "step": 131640 }, { "epoch": 1.1116505879124359, "grad_norm": 0.17499980330467224, "learning_rate": 4.898317313298731e-06, "loss": 0.0068, "step": 131650 }, { "epoch": 1.1117350277595999, "grad_norm": 0.11323131620883942, "learning_rate": 4.897580584937657e-06, "loss": 0.0068, "step": 131660 }, { "epoch": 1.1118194676067636, "grad_norm": 0.007383840624243021, "learning_rate": 4.8968438588011135e-06, "loss": 0.0057, "step": 131670 }, { "epoch": 1.1119039074539274, "grad_norm": 0.14358772337436676, "learning_rate": 4.896107134905099e-06, "loss": 0.0114, "step": 131680 }, { "epoch": 1.1119883473010914, "grad_norm": 0.8305366635322571, "learning_rate": 4.8953704132656176e-06, "loss": 0.0117, "step": 131690 }, { "epoch": 1.1120727871482552, "grad_norm": 0.20578375458717346, "learning_rate": 4.89463369389867e-06, "loss": 0.0109, "step": 131700 }, { "epoch": 1.1121572269954192, "grad_norm": 0.0013514702441170812, "learning_rate": 4.893896976820253e-06, "loss": 0.004, "step": 131710 }, { "epoch": 1.112241666842583, "grad_norm": 0.2152397781610489, "learning_rate": 4.893160262046376e-06, "loss": 0.0083, "step": 131720 }, { "epoch": 1.112326106689747, "grad_norm": 0.4196276366710663, "learning_rate": 4.892423549593034e-06, "loss": 0.0057, "step": 131730 }, { "epoch": 1.1124105465369107, "grad_norm": 0.10741925984621048, "learning_rate": 4.891686839476232e-06, "loss": 0.0128, "step": 131740 }, { "epoch": 1.1124949863840747, "grad_norm": 0.16679224371910095, "learning_rate": 4.890950131711967e-06, "loss": 0.0058, "step": 131750 }, { "epoch": 1.1125794262312385, "grad_norm": 0.3771507143974304, "learning_rate": 4.890213426316245e-06, "loss": 0.0082, "step": 131760 }, { "epoch": 1.1126638660784023, "grad_norm": 0.06777998059988022, "learning_rate": 4.889476723305065e-06, "loss": 0.0055, "step": 131770 }, { "epoch": 1.1127483059255663, "grad_norm": 0.12305816262960434, "learning_rate": 4.888740022694427e-06, "loss": 0.0049, "step": 131780 }, { "epoch": 1.11283274577273, "grad_norm": 0.08047470450401306, "learning_rate": 4.888003324500332e-06, "loss": 0.0055, "step": 131790 }, { "epoch": 1.112917185619894, "grad_norm": 0.08692987263202667, "learning_rate": 4.887266628738785e-06, "loss": 0.0066, "step": 131800 }, { "epoch": 1.1130016254670578, "grad_norm": 0.5986935496330261, "learning_rate": 4.886529935425782e-06, "loss": 0.0062, "step": 131810 }, { "epoch": 1.1130860653142218, "grad_norm": 0.08285550028085709, "learning_rate": 4.8857932445773245e-06, "loss": 0.0071, "step": 131820 }, { "epoch": 1.1131705051613856, "grad_norm": 0.4031023383140564, "learning_rate": 4.885056556209416e-06, "loss": 0.0128, "step": 131830 }, { "epoch": 1.1132549450085496, "grad_norm": 0.26979026198387146, "learning_rate": 4.884319870338054e-06, "loss": 0.0064, "step": 131840 }, { "epoch": 1.1133393848557134, "grad_norm": 0.3619478940963745, "learning_rate": 4.883583186979242e-06, "loss": 0.0156, "step": 131850 }, { "epoch": 1.1134238247028774, "grad_norm": 0.5376424193382263, "learning_rate": 4.882846506148977e-06, "loss": 0.007, "step": 131860 }, { "epoch": 1.1135082645500411, "grad_norm": 0.07706302404403687, "learning_rate": 4.882109827863264e-06, "loss": 0.0077, "step": 131870 }, { "epoch": 1.1135927043972051, "grad_norm": 0.28651610016822815, "learning_rate": 4.8813731521381015e-06, "loss": 0.0151, "step": 131880 }, { "epoch": 1.113677144244369, "grad_norm": 0.12003887444734573, "learning_rate": 4.88063647898949e-06, "loss": 0.0123, "step": 131890 }, { "epoch": 1.1137615840915327, "grad_norm": 0.43947693705558777, "learning_rate": 4.879899808433427e-06, "loss": 0.0126, "step": 131900 }, { "epoch": 1.1138460239386967, "grad_norm": 0.34313738346099854, "learning_rate": 4.879163140485919e-06, "loss": 0.0092, "step": 131910 }, { "epoch": 1.1139304637858605, "grad_norm": 0.16837595403194427, "learning_rate": 4.878426475162961e-06, "loss": 0.0035, "step": 131920 }, { "epoch": 1.1140149036330245, "grad_norm": 0.15876679122447968, "learning_rate": 4.877689812480556e-06, "loss": 0.0067, "step": 131930 }, { "epoch": 1.1140993434801882, "grad_norm": 0.4465365409851074, "learning_rate": 4.876953152454704e-06, "loss": 0.0082, "step": 131940 }, { "epoch": 1.1141837833273522, "grad_norm": 0.03897537663578987, "learning_rate": 4.876216495101401e-06, "loss": 0.0066, "step": 131950 }, { "epoch": 1.114268223174516, "grad_norm": 0.3582502007484436, "learning_rate": 4.875479840436653e-06, "loss": 0.0074, "step": 131960 }, { "epoch": 1.11435266302168, "grad_norm": 0.5269973278045654, "learning_rate": 4.8747431884764565e-06, "loss": 0.0102, "step": 131970 }, { "epoch": 1.1144371028688438, "grad_norm": 0.59017014503479, "learning_rate": 4.874006539236813e-06, "loss": 0.0093, "step": 131980 }, { "epoch": 1.1145215427160078, "grad_norm": 0.04862402752041817, "learning_rate": 4.8732698927337195e-06, "loss": 0.0057, "step": 131990 }, { "epoch": 1.1146059825631716, "grad_norm": 0.1932535469532013, "learning_rate": 4.872533248983181e-06, "loss": 0.0045, "step": 132000 }, { "epoch": 1.1146904224103353, "grad_norm": 0.12961360812187195, "learning_rate": 4.871796608001192e-06, "loss": 0.0055, "step": 132010 }, { "epoch": 1.1147748622574993, "grad_norm": 0.07935023307800293, "learning_rate": 4.871059969803756e-06, "loss": 0.007, "step": 132020 }, { "epoch": 1.114859302104663, "grad_norm": 0.6477738618850708, "learning_rate": 4.870323334406869e-06, "loss": 0.0136, "step": 132030 }, { "epoch": 1.114943741951827, "grad_norm": 0.32048702239990234, "learning_rate": 4.869586701826534e-06, "loss": 0.0061, "step": 132040 }, { "epoch": 1.1150281817989909, "grad_norm": 0.09591903537511826, "learning_rate": 4.86885007207875e-06, "loss": 0.0062, "step": 132050 }, { "epoch": 1.1151126216461549, "grad_norm": 0.31084582209587097, "learning_rate": 4.868113445179515e-06, "loss": 0.0055, "step": 132060 }, { "epoch": 1.1151970614933187, "grad_norm": 0.3120983839035034, "learning_rate": 4.867376821144828e-06, "loss": 0.0087, "step": 132070 }, { "epoch": 1.1152815013404827, "grad_norm": 0.18782585859298706, "learning_rate": 4.866640199990689e-06, "loss": 0.0116, "step": 132080 }, { "epoch": 1.1153659411876464, "grad_norm": 0.10482650250196457, "learning_rate": 4.8659035817331e-06, "loss": 0.0083, "step": 132090 }, { "epoch": 1.1154503810348104, "grad_norm": 0.7774084210395813, "learning_rate": 4.865166966388054e-06, "loss": 0.0096, "step": 132100 }, { "epoch": 1.1155348208819742, "grad_norm": 0.3651745319366455, "learning_rate": 4.864430353971557e-06, "loss": 0.0081, "step": 132110 }, { "epoch": 1.115619260729138, "grad_norm": 0.3118373453617096, "learning_rate": 4.863693744499603e-06, "loss": 0.0122, "step": 132120 }, { "epoch": 1.115703700576302, "grad_norm": 0.15283958613872528, "learning_rate": 4.862957137988195e-06, "loss": 0.0044, "step": 132130 }, { "epoch": 1.1157881404234657, "grad_norm": 0.10894530266523361, "learning_rate": 4.862220534453327e-06, "loss": 0.0064, "step": 132140 }, { "epoch": 1.1158725802706297, "grad_norm": 0.06906571239233017, "learning_rate": 4.861483933911003e-06, "loss": 0.0074, "step": 132150 }, { "epoch": 1.1159570201177935, "grad_norm": 0.11389194428920746, "learning_rate": 4.860747336377218e-06, "loss": 0.0042, "step": 132160 }, { "epoch": 1.1160414599649575, "grad_norm": 0.29515740275382996, "learning_rate": 4.860010741867974e-06, "loss": 0.0058, "step": 132170 }, { "epoch": 1.1161258998121213, "grad_norm": 0.32163360714912415, "learning_rate": 4.859274150399268e-06, "loss": 0.0071, "step": 132180 }, { "epoch": 1.1162103396592853, "grad_norm": 0.08137384802103043, "learning_rate": 4.858537561987097e-06, "loss": 0.0068, "step": 132190 }, { "epoch": 1.116294779506449, "grad_norm": 0.824123740196228, "learning_rate": 4.857800976647462e-06, "loss": 0.0122, "step": 132200 }, { "epoch": 1.116379219353613, "grad_norm": 0.09491055458784103, "learning_rate": 4.8570643943963604e-06, "loss": 0.0053, "step": 132210 }, { "epoch": 1.1164636592007768, "grad_norm": 0.21786735951900482, "learning_rate": 4.856327815249792e-06, "loss": 0.0081, "step": 132220 }, { "epoch": 1.1165480990479408, "grad_norm": 0.3995867073535919, "learning_rate": 4.855591239223752e-06, "loss": 0.0059, "step": 132230 }, { "epoch": 1.1166325388951046, "grad_norm": 0.20308327674865723, "learning_rate": 4.854854666334242e-06, "loss": 0.005, "step": 132240 }, { "epoch": 1.1167169787422684, "grad_norm": 0.3088153302669525, "learning_rate": 4.854118096597258e-06, "loss": 0.0108, "step": 132250 }, { "epoch": 1.1168014185894324, "grad_norm": 0.11703402549028397, "learning_rate": 4.853381530028801e-06, "loss": 0.0037, "step": 132260 }, { "epoch": 1.1168858584365962, "grad_norm": 0.5534727573394775, "learning_rate": 4.852644966644865e-06, "loss": 0.0092, "step": 132270 }, { "epoch": 1.1169702982837602, "grad_norm": 0.3600008189678192, "learning_rate": 4.851908406461451e-06, "loss": 0.0076, "step": 132280 }, { "epoch": 1.117054738130924, "grad_norm": 0.1431540846824646, "learning_rate": 4.851171849494557e-06, "loss": 0.0076, "step": 132290 }, { "epoch": 1.117139177978088, "grad_norm": 0.21712729334831238, "learning_rate": 4.850435295760179e-06, "loss": 0.0069, "step": 132300 }, { "epoch": 1.1172236178252517, "grad_norm": 0.5755143761634827, "learning_rate": 4.849698745274316e-06, "loss": 0.0081, "step": 132310 }, { "epoch": 1.1173080576724157, "grad_norm": 0.3391813635826111, "learning_rate": 4.848962198052964e-06, "loss": 0.0076, "step": 132320 }, { "epoch": 1.1173924975195795, "grad_norm": 0.735318660736084, "learning_rate": 4.848225654112124e-06, "loss": 0.0089, "step": 132330 }, { "epoch": 1.1174769373667433, "grad_norm": 0.018408887088298798, "learning_rate": 4.847489113467791e-06, "loss": 0.0073, "step": 132340 }, { "epoch": 1.1175613772139072, "grad_norm": 0.09726765751838684, "learning_rate": 4.846752576135964e-06, "loss": 0.0047, "step": 132350 }, { "epoch": 1.117645817061071, "grad_norm": 0.6256645917892456, "learning_rate": 4.846016042132637e-06, "loss": 0.0096, "step": 132360 }, { "epoch": 1.117730256908235, "grad_norm": 0.3205007314682007, "learning_rate": 4.845279511473812e-06, "loss": 0.005, "step": 132370 }, { "epoch": 1.1178146967553988, "grad_norm": 0.5763229727745056, "learning_rate": 4.844542984175484e-06, "loss": 0.0119, "step": 132380 }, { "epoch": 1.1178991366025628, "grad_norm": 0.22244760394096375, "learning_rate": 4.843806460253652e-06, "loss": 0.0053, "step": 132390 }, { "epoch": 1.1179835764497266, "grad_norm": 1.129504919052124, "learning_rate": 4.843069939724311e-06, "loss": 0.0165, "step": 132400 }, { "epoch": 1.1180680162968906, "grad_norm": 0.06477469950914383, "learning_rate": 4.842333422603458e-06, "loss": 0.0054, "step": 132410 }, { "epoch": 1.1181524561440543, "grad_norm": 0.19133278727531433, "learning_rate": 4.8415969089070916e-06, "loss": 0.0106, "step": 132420 }, { "epoch": 1.1182368959912183, "grad_norm": 0.17123781144618988, "learning_rate": 4.840860398651205e-06, "loss": 0.0042, "step": 132430 }, { "epoch": 1.1183213358383821, "grad_norm": 0.38705605268478394, "learning_rate": 4.840123891851801e-06, "loss": 0.013, "step": 132440 }, { "epoch": 1.118405775685546, "grad_norm": 0.5115252733230591, "learning_rate": 4.839387388524872e-06, "loss": 0.0083, "step": 132450 }, { "epoch": 1.1184902155327099, "grad_norm": 0.1461341232061386, "learning_rate": 4.838650888686417e-06, "loss": 0.0045, "step": 132460 }, { "epoch": 1.1185746553798737, "grad_norm": 0.5226513743400574, "learning_rate": 4.8379143923524305e-06, "loss": 0.0058, "step": 132470 }, { "epoch": 1.1186590952270377, "grad_norm": 0.13280385732650757, "learning_rate": 4.837177899538911e-06, "loss": 0.0053, "step": 132480 }, { "epoch": 1.1187435350742014, "grad_norm": 0.005459169391542673, "learning_rate": 4.8364414102618535e-06, "loss": 0.0117, "step": 132490 }, { "epoch": 1.1188279749213654, "grad_norm": 0.06806319952011108, "learning_rate": 4.835704924537256e-06, "loss": 0.0124, "step": 132500 }, { "epoch": 1.1189124147685292, "grad_norm": 0.22019372880458832, "learning_rate": 4.834968442381112e-06, "loss": 0.0059, "step": 132510 }, { "epoch": 1.1189968546156932, "grad_norm": 0.5549381375312805, "learning_rate": 4.8342319638094215e-06, "loss": 0.0109, "step": 132520 }, { "epoch": 1.119081294462857, "grad_norm": 0.3926544189453125, "learning_rate": 4.833495488838179e-06, "loss": 0.0048, "step": 132530 }, { "epoch": 1.119165734310021, "grad_norm": 0.12729085981845856, "learning_rate": 4.832759017483379e-06, "loss": 0.0034, "step": 132540 }, { "epoch": 1.1192501741571848, "grad_norm": 0.25372380018234253, "learning_rate": 4.83202254976102e-06, "loss": 0.0062, "step": 132550 }, { "epoch": 1.1193346140043487, "grad_norm": 0.43720173835754395, "learning_rate": 4.831286085687095e-06, "loss": 0.015, "step": 132560 }, { "epoch": 1.1194190538515125, "grad_norm": 0.2871326804161072, "learning_rate": 4.830549625277604e-06, "loss": 0.0095, "step": 132570 }, { "epoch": 1.1195034936986765, "grad_norm": 0.8125539422035217, "learning_rate": 4.829813168548539e-06, "loss": 0.007, "step": 132580 }, { "epoch": 1.1195879335458403, "grad_norm": 0.29654306173324585, "learning_rate": 4.829076715515898e-06, "loss": 0.0057, "step": 132590 }, { "epoch": 1.119672373393004, "grad_norm": 0.5591232776641846, "learning_rate": 4.828340266195673e-06, "loss": 0.0109, "step": 132600 }, { "epoch": 1.119756813240168, "grad_norm": 0.19097396731376648, "learning_rate": 4.827603820603866e-06, "loss": 0.0071, "step": 132610 }, { "epoch": 1.1198412530873318, "grad_norm": 0.24687987565994263, "learning_rate": 4.826867378756466e-06, "loss": 0.009, "step": 132620 }, { "epoch": 1.1199256929344958, "grad_norm": 0.056751132011413574, "learning_rate": 4.826130940669473e-06, "loss": 0.0097, "step": 132630 }, { "epoch": 1.1200101327816596, "grad_norm": 0.1699039191007614, "learning_rate": 4.82539450635888e-06, "loss": 0.0101, "step": 132640 }, { "epoch": 1.1200945726288236, "grad_norm": 0.0008901582914404571, "learning_rate": 4.82465807584068e-06, "loss": 0.0122, "step": 132650 }, { "epoch": 1.1201790124759874, "grad_norm": 0.0031627609860152006, "learning_rate": 4.823921649130874e-06, "loss": 0.0054, "step": 132660 }, { "epoch": 1.1202634523231514, "grad_norm": 0.22688494622707367, "learning_rate": 4.823185226245452e-06, "loss": 0.0111, "step": 132670 }, { "epoch": 1.1203478921703152, "grad_norm": 0.2964399755001068, "learning_rate": 4.822448807200412e-06, "loss": 0.0054, "step": 132680 }, { "epoch": 1.120432332017479, "grad_norm": 0.6268802285194397, "learning_rate": 4.821712392011745e-06, "loss": 0.0153, "step": 132690 }, { "epoch": 1.120516771864643, "grad_norm": 0.27120441198349, "learning_rate": 4.820975980695451e-06, "loss": 0.0094, "step": 132700 }, { "epoch": 1.1206012117118067, "grad_norm": 0.16512510180473328, "learning_rate": 4.82023957326752e-06, "loss": 0.003, "step": 132710 }, { "epoch": 1.1206856515589707, "grad_norm": 0.2679412364959717, "learning_rate": 4.81950316974395e-06, "loss": 0.0071, "step": 132720 }, { "epoch": 1.1207700914061345, "grad_norm": 0.006165445316582918, "learning_rate": 4.8187667701407336e-06, "loss": 0.0053, "step": 132730 }, { "epoch": 1.1208545312532985, "grad_norm": 0.024325134232640266, "learning_rate": 4.818030374473866e-06, "loss": 0.0072, "step": 132740 }, { "epoch": 1.1209389711004623, "grad_norm": 0.24122977256774902, "learning_rate": 4.81729398275934e-06, "loss": 0.0081, "step": 132750 }, { "epoch": 1.1210234109476263, "grad_norm": 0.20546551048755646, "learning_rate": 4.816557595013153e-06, "loss": 0.0063, "step": 132760 }, { "epoch": 1.12110785079479, "grad_norm": 0.23006072640419006, "learning_rate": 4.815821211251297e-06, "loss": 0.0066, "step": 132770 }, { "epoch": 1.121192290641954, "grad_norm": 0.3283006250858307, "learning_rate": 4.815084831489767e-06, "loss": 0.0118, "step": 132780 }, { "epoch": 1.1212767304891178, "grad_norm": 0.29325389862060547, "learning_rate": 4.814348455744556e-06, "loss": 0.0069, "step": 132790 }, { "epoch": 1.1213611703362818, "grad_norm": 0.2601945996284485, "learning_rate": 4.813612084031657e-06, "loss": 0.0072, "step": 132800 }, { "epoch": 1.1214456101834456, "grad_norm": 0.48858150839805603, "learning_rate": 4.812875716367068e-06, "loss": 0.0085, "step": 132810 }, { "epoch": 1.1215300500306093, "grad_norm": 0.3081377148628235, "learning_rate": 4.812139352766779e-06, "loss": 0.0073, "step": 132820 }, { "epoch": 1.1216144898777733, "grad_norm": 0.2703154683113098, "learning_rate": 4.811402993246785e-06, "loss": 0.009, "step": 132830 }, { "epoch": 1.1216989297249371, "grad_norm": 0.2788057029247284, "learning_rate": 4.8106666378230775e-06, "loss": 0.0082, "step": 132840 }, { "epoch": 1.1217833695721011, "grad_norm": 0.9163666367530823, "learning_rate": 4.809930286511654e-06, "loss": 0.012, "step": 132850 }, { "epoch": 1.121867809419265, "grad_norm": 0.20733067393302917, "learning_rate": 4.809193939328505e-06, "loss": 0.0138, "step": 132860 }, { "epoch": 1.121952249266429, "grad_norm": 0.3911164402961731, "learning_rate": 4.808457596289626e-06, "loss": 0.0083, "step": 132870 }, { "epoch": 1.1220366891135927, "grad_norm": 0.5073097348213196, "learning_rate": 4.8077212574110076e-06, "loss": 0.0078, "step": 132880 }, { "epoch": 1.1221211289607567, "grad_norm": 0.030994227156043053, "learning_rate": 4.806984922708643e-06, "loss": 0.0047, "step": 132890 }, { "epoch": 1.1222055688079204, "grad_norm": 0.37084704637527466, "learning_rate": 4.8062485921985275e-06, "loss": 0.0075, "step": 132900 }, { "epoch": 1.1222900086550844, "grad_norm": 0.4319356381893158, "learning_rate": 4.805512265896652e-06, "loss": 0.0122, "step": 132910 }, { "epoch": 1.1223744485022482, "grad_norm": 0.5552420616149902, "learning_rate": 4.804775943819012e-06, "loss": 0.0107, "step": 132920 }, { "epoch": 1.122458888349412, "grad_norm": 0.05244719982147217, "learning_rate": 4.804039625981596e-06, "loss": 0.0076, "step": 132930 }, { "epoch": 1.122543328196576, "grad_norm": 0.21984565258026123, "learning_rate": 4.8033033124004005e-06, "loss": 0.0083, "step": 132940 }, { "epoch": 1.1226277680437398, "grad_norm": 0.24286313354969025, "learning_rate": 4.802567003091417e-06, "loss": 0.0111, "step": 132950 }, { "epoch": 1.1227122078909038, "grad_norm": 0.0659983679652214, "learning_rate": 4.801830698070638e-06, "loss": 0.0039, "step": 132960 }, { "epoch": 1.1227966477380675, "grad_norm": 0.5790195465087891, "learning_rate": 4.801094397354053e-06, "loss": 0.0066, "step": 132970 }, { "epoch": 1.1228810875852315, "grad_norm": 1.0751163959503174, "learning_rate": 4.80035810095766e-06, "loss": 0.0217, "step": 132980 }, { "epoch": 1.1229655274323953, "grad_norm": 0.04528263956308365, "learning_rate": 4.799621808897449e-06, "loss": 0.0062, "step": 132990 }, { "epoch": 1.1230499672795593, "grad_norm": 0.2361881285905838, "learning_rate": 4.798885521189409e-06, "loss": 0.0088, "step": 133000 }, { "epoch": 1.123134407126723, "grad_norm": 0.3886810839176178, "learning_rate": 4.798149237849535e-06, "loss": 0.0057, "step": 133010 }, { "epoch": 1.123218846973887, "grad_norm": 0.2599865794181824, "learning_rate": 4.797412958893817e-06, "loss": 0.0071, "step": 133020 }, { "epoch": 1.1233032868210509, "grad_norm": 0.561347246170044, "learning_rate": 4.79667668433825e-06, "loss": 0.0079, "step": 133030 }, { "epoch": 1.1233877266682146, "grad_norm": 0.11484397947788239, "learning_rate": 4.795940414198821e-06, "loss": 0.0103, "step": 133040 }, { "epoch": 1.1234721665153786, "grad_norm": 0.1472187340259552, "learning_rate": 4.795204148491527e-06, "loss": 0.0044, "step": 133050 }, { "epoch": 1.1235566063625424, "grad_norm": 0.11304077506065369, "learning_rate": 4.794467887232356e-06, "loss": 0.0087, "step": 133060 }, { "epoch": 1.1236410462097064, "grad_norm": 0.22436891496181488, "learning_rate": 4.7937316304373e-06, "loss": 0.0072, "step": 133070 }, { "epoch": 1.1237254860568702, "grad_norm": 0.2174444943666458, "learning_rate": 4.79299537812235e-06, "loss": 0.0028, "step": 133080 }, { "epoch": 1.1238099259040342, "grad_norm": 0.07726448774337769, "learning_rate": 4.7922591303035e-06, "loss": 0.0063, "step": 133090 }, { "epoch": 1.123894365751198, "grad_norm": 0.31550928950309753, "learning_rate": 4.7915228869967376e-06, "loss": 0.0087, "step": 133100 }, { "epoch": 1.123978805598362, "grad_norm": 0.15831057727336884, "learning_rate": 4.790786648218057e-06, "loss": 0.0054, "step": 133110 }, { "epoch": 1.1240632454455257, "grad_norm": 0.39665839076042175, "learning_rate": 4.790050413983447e-06, "loss": 0.0068, "step": 133120 }, { "epoch": 1.1241476852926897, "grad_norm": 0.27737367153167725, "learning_rate": 4.789314184308897e-06, "loss": 0.0073, "step": 133130 }, { "epoch": 1.1242321251398535, "grad_norm": 0.1957150399684906, "learning_rate": 4.788577959210402e-06, "loss": 0.0042, "step": 133140 }, { "epoch": 1.1243165649870175, "grad_norm": 0.18920177221298218, "learning_rate": 4.787841738703949e-06, "loss": 0.0133, "step": 133150 }, { "epoch": 1.1244010048341813, "grad_norm": 0.16160151362419128, "learning_rate": 4.7871055228055315e-06, "loss": 0.0078, "step": 133160 }, { "epoch": 1.124485444681345, "grad_norm": 0.004554967395961285, "learning_rate": 4.786369311531136e-06, "loss": 0.0081, "step": 133170 }, { "epoch": 1.124569884528509, "grad_norm": 0.48385089635849, "learning_rate": 4.785633104896757e-06, "loss": 0.011, "step": 133180 }, { "epoch": 1.1246543243756728, "grad_norm": 0.4235077500343323, "learning_rate": 4.7848969029183825e-06, "loss": 0.0046, "step": 133190 }, { "epoch": 1.1247387642228368, "grad_norm": 0.00024471990764141083, "learning_rate": 4.784160705612005e-06, "loss": 0.0056, "step": 133200 }, { "epoch": 1.1248232040700006, "grad_norm": 0.13803109526634216, "learning_rate": 4.783424512993609e-06, "loss": 0.0073, "step": 133210 }, { "epoch": 1.1249076439171646, "grad_norm": 0.2509193420410156, "learning_rate": 4.7826883250791915e-06, "loss": 0.0066, "step": 133220 }, { "epoch": 1.1249920837643284, "grad_norm": 0.13865990936756134, "learning_rate": 4.781952141884739e-06, "loss": 0.0038, "step": 133230 }, { "epoch": 1.1250765236114924, "grad_norm": 0.5197531580924988, "learning_rate": 4.781215963426239e-06, "loss": 0.0102, "step": 133240 }, { "epoch": 1.1251609634586561, "grad_norm": 0.15212275087833405, "learning_rate": 4.7804797897196854e-06, "loss": 0.0133, "step": 133250 }, { "epoch": 1.12524540330582, "grad_norm": 0.12547601759433746, "learning_rate": 4.779743620781065e-06, "loss": 0.0067, "step": 133260 }, { "epoch": 1.125329843152984, "grad_norm": 0.1938885599374771, "learning_rate": 4.7790074566263676e-06, "loss": 0.0072, "step": 133270 }, { "epoch": 1.1254142830001477, "grad_norm": 0.2034706324338913, "learning_rate": 4.778271297271583e-06, "loss": 0.0091, "step": 133280 }, { "epoch": 1.1254987228473117, "grad_norm": 0.054829876869916916, "learning_rate": 4.777535142732702e-06, "loss": 0.0127, "step": 133290 }, { "epoch": 1.1255831626944754, "grad_norm": 0.27164584398269653, "learning_rate": 4.776798993025711e-06, "loss": 0.0096, "step": 133300 }, { "epoch": 1.1256676025416394, "grad_norm": 0.1261480748653412, "learning_rate": 4.7760628481666e-06, "loss": 0.0122, "step": 133310 }, { "epoch": 1.1257520423888032, "grad_norm": 0.30975618958473206, "learning_rate": 4.775326708171357e-06, "loss": 0.0058, "step": 133320 }, { "epoch": 1.1258364822359672, "grad_norm": 0.19651250541210175, "learning_rate": 4.774590573055974e-06, "loss": 0.0035, "step": 133330 }, { "epoch": 1.125920922083131, "grad_norm": 0.3527523875236511, "learning_rate": 4.7738544428364356e-06, "loss": 0.0152, "step": 133340 }, { "epoch": 1.126005361930295, "grad_norm": 0.18823648989200592, "learning_rate": 4.773118317528734e-06, "loss": 0.0116, "step": 133350 }, { "epoch": 1.1260898017774588, "grad_norm": 0.23624826967716217, "learning_rate": 4.772382197148857e-06, "loss": 0.0053, "step": 133360 }, { "epoch": 1.1261742416246228, "grad_norm": 0.1418115645647049, "learning_rate": 4.771646081712788e-06, "loss": 0.0127, "step": 133370 }, { "epoch": 1.1262586814717865, "grad_norm": 0.27939969301223755, "learning_rate": 4.770909971236523e-06, "loss": 0.012, "step": 133380 }, { "epoch": 1.1263431213189503, "grad_norm": 0.34472107887268066, "learning_rate": 4.7701738657360446e-06, "loss": 0.0072, "step": 133390 }, { "epoch": 1.1264275611661143, "grad_norm": 0.2072933316230774, "learning_rate": 4.769437765227345e-06, "loss": 0.0109, "step": 133400 }, { "epoch": 1.126512001013278, "grad_norm": 0.3124650716781616, "learning_rate": 4.768701669726406e-06, "loss": 0.0097, "step": 133410 }, { "epoch": 1.126596440860442, "grad_norm": 0.17019686102867126, "learning_rate": 4.767965579249222e-06, "loss": 0.007, "step": 133420 }, { "epoch": 1.1266808807076059, "grad_norm": 0.2708638608455658, "learning_rate": 4.767229493811778e-06, "loss": 0.0094, "step": 133430 }, { "epoch": 1.1267653205547699, "grad_norm": 0.2582176923751831, "learning_rate": 4.766493413430062e-06, "loss": 0.0088, "step": 133440 }, { "epoch": 1.1268497604019336, "grad_norm": 0.29374027252197266, "learning_rate": 4.765757338120059e-06, "loss": 0.0041, "step": 133450 }, { "epoch": 1.1269342002490976, "grad_norm": 0.1029253900051117, "learning_rate": 4.76502126789776e-06, "loss": 0.0154, "step": 133460 }, { "epoch": 1.1270186400962614, "grad_norm": 0.2997533679008484, "learning_rate": 4.764285202779152e-06, "loss": 0.0053, "step": 133470 }, { "epoch": 1.1271030799434252, "grad_norm": 0.4840116798877716, "learning_rate": 4.76354914278022e-06, "loss": 0.0144, "step": 133480 }, { "epoch": 1.1271875197905892, "grad_norm": 0.04956885799765587, "learning_rate": 4.762813087916953e-06, "loss": 0.0089, "step": 133490 }, { "epoch": 1.1272719596377532, "grad_norm": 0.558235764503479, "learning_rate": 4.762077038205335e-06, "loss": 0.0161, "step": 133500 }, { "epoch": 1.127356399484917, "grad_norm": 0.1006733626127243, "learning_rate": 4.761340993661358e-06, "loss": 0.0081, "step": 133510 }, { "epoch": 1.1274408393320807, "grad_norm": 0.2488868683576584, "learning_rate": 4.760604954301003e-06, "loss": 0.0152, "step": 133520 }, { "epoch": 1.1275252791792447, "grad_norm": 0.2090185582637787, "learning_rate": 4.759868920140262e-06, "loss": 0.0095, "step": 133530 }, { "epoch": 1.1276097190264085, "grad_norm": 0.0028334641829133034, "learning_rate": 4.759132891195115e-06, "loss": 0.0061, "step": 133540 }, { "epoch": 1.1276941588735725, "grad_norm": 0.1483418345451355, "learning_rate": 4.758396867481556e-06, "loss": 0.0102, "step": 133550 }, { "epoch": 1.1277785987207363, "grad_norm": 0.7787567973136902, "learning_rate": 4.757660849015565e-06, "loss": 0.0096, "step": 133560 }, { "epoch": 1.1278630385679003, "grad_norm": 0.013973349705338478, "learning_rate": 4.756924835813133e-06, "loss": 0.0113, "step": 133570 }, { "epoch": 1.127947478415064, "grad_norm": 0.2621527910232544, "learning_rate": 4.756188827890241e-06, "loss": 0.0122, "step": 133580 }, { "epoch": 1.128031918262228, "grad_norm": 0.39045774936676025, "learning_rate": 4.755452825262881e-06, "loss": 0.0086, "step": 133590 }, { "epoch": 1.1281163581093918, "grad_norm": 0.27668699622154236, "learning_rate": 4.754716827947034e-06, "loss": 0.0077, "step": 133600 }, { "epoch": 1.1282007979565556, "grad_norm": 0.1701994240283966, "learning_rate": 4.753980835958686e-06, "loss": 0.0062, "step": 133610 }, { "epoch": 1.1282852378037196, "grad_norm": 0.053434569388628006, "learning_rate": 4.753244849313826e-06, "loss": 0.0065, "step": 133620 }, { "epoch": 1.1283696776508834, "grad_norm": 0.14598587155342102, "learning_rate": 4.752508868028435e-06, "loss": 0.0046, "step": 133630 }, { "epoch": 1.1284541174980474, "grad_norm": 0.8039562702178955, "learning_rate": 4.751772892118503e-06, "loss": 0.0135, "step": 133640 }, { "epoch": 1.1285385573452111, "grad_norm": 0.3300303816795349, "learning_rate": 4.75103692160001e-06, "loss": 0.006, "step": 133650 }, { "epoch": 1.1286229971923751, "grad_norm": 0.2960682213306427, "learning_rate": 4.750300956488946e-06, "loss": 0.0052, "step": 133660 }, { "epoch": 1.128707437039539, "grad_norm": 0.37610042095184326, "learning_rate": 4.749564996801294e-06, "loss": 0.0095, "step": 133670 }, { "epoch": 1.128791876886703, "grad_norm": 0.11639063060283661, "learning_rate": 4.748829042553039e-06, "loss": 0.0099, "step": 133680 }, { "epoch": 1.1288763167338667, "grad_norm": 0.0009007594780996442, "learning_rate": 4.748093093760164e-06, "loss": 0.0103, "step": 133690 }, { "epoch": 1.1289607565810307, "grad_norm": 0.15665844082832336, "learning_rate": 4.7473571504386575e-06, "loss": 0.0068, "step": 133700 }, { "epoch": 1.1290451964281945, "grad_norm": 0.48938092589378357, "learning_rate": 4.746621212604503e-06, "loss": 0.0122, "step": 133710 }, { "epoch": 1.1291296362753585, "grad_norm": 0.28085386753082275, "learning_rate": 4.745885280273681e-06, "loss": 0.0059, "step": 133720 }, { "epoch": 1.1292140761225222, "grad_norm": 0.49147146940231323, "learning_rate": 4.7451493534621804e-06, "loss": 0.0112, "step": 133730 }, { "epoch": 1.129298515969686, "grad_norm": 0.6276857256889343, "learning_rate": 4.744413432185982e-06, "loss": 0.0089, "step": 133740 }, { "epoch": 1.12938295581685, "grad_norm": 0.30901455879211426, "learning_rate": 4.743677516461073e-06, "loss": 0.0068, "step": 133750 }, { "epoch": 1.1294673956640138, "grad_norm": 0.33047378063201904, "learning_rate": 4.742941606303435e-06, "loss": 0.0092, "step": 133760 }, { "epoch": 1.1295518355111778, "grad_norm": 0.08307211101055145, "learning_rate": 4.7422057017290535e-06, "loss": 0.005, "step": 133770 }, { "epoch": 1.1296362753583415, "grad_norm": 0.0017995797097682953, "learning_rate": 4.74146980275391e-06, "loss": 0.0034, "step": 133780 }, { "epoch": 1.1297207152055055, "grad_norm": 0.1648969203233719, "learning_rate": 4.74073390939399e-06, "loss": 0.0079, "step": 133790 }, { "epoch": 1.1298051550526693, "grad_norm": 0.2610660493373871, "learning_rate": 4.7399980216652765e-06, "loss": 0.0045, "step": 133800 }, { "epoch": 1.1298895948998333, "grad_norm": 0.5638481378555298, "learning_rate": 4.739262139583754e-06, "loss": 0.0069, "step": 133810 }, { "epoch": 1.129974034746997, "grad_norm": 0.5656331777572632, "learning_rate": 4.738526263165403e-06, "loss": 0.0088, "step": 133820 }, { "epoch": 1.1300584745941609, "grad_norm": 0.4973788559436798, "learning_rate": 4.737790392426207e-06, "loss": 0.0057, "step": 133830 }, { "epoch": 1.1301429144413249, "grad_norm": 0.2598261833190918, "learning_rate": 4.7370545273821514e-06, "loss": 0.0078, "step": 133840 }, { "epoch": 1.1302273542884889, "grad_norm": 0.280661016702652, "learning_rate": 4.736318668049217e-06, "loss": 0.004, "step": 133850 }, { "epoch": 1.1303117941356526, "grad_norm": 0.08661692589521408, "learning_rate": 4.7355828144433884e-06, "loss": 0.0082, "step": 133860 }, { "epoch": 1.1303962339828164, "grad_norm": 0.5196499228477478, "learning_rate": 4.734846966580644e-06, "loss": 0.0074, "step": 133870 }, { "epoch": 1.1304806738299804, "grad_norm": 0.07649421691894531, "learning_rate": 4.734111124476973e-06, "loss": 0.0054, "step": 133880 }, { "epoch": 1.1305651136771442, "grad_norm": 0.6263256669044495, "learning_rate": 4.733375288148351e-06, "loss": 0.0063, "step": 133890 }, { "epoch": 1.1306495535243082, "grad_norm": 0.629809558391571, "learning_rate": 4.732639457610765e-06, "loss": 0.0084, "step": 133900 }, { "epoch": 1.130733993371472, "grad_norm": 0.395846962928772, "learning_rate": 4.731903632880193e-06, "loss": 0.007, "step": 133910 }, { "epoch": 1.130818433218636, "grad_norm": 0.49397435784339905, "learning_rate": 4.7311678139726206e-06, "loss": 0.0072, "step": 133920 }, { "epoch": 1.1309028730657997, "grad_norm": 0.00901247188448906, "learning_rate": 4.730432000904027e-06, "loss": 0.0037, "step": 133930 }, { "epoch": 1.1309873129129637, "grad_norm": 0.10249735414981842, "learning_rate": 4.729696193690396e-06, "loss": 0.0096, "step": 133940 }, { "epoch": 1.1310717527601275, "grad_norm": 0.17535586655139923, "learning_rate": 4.72896039234771e-06, "loss": 0.0076, "step": 133950 }, { "epoch": 1.1311561926072913, "grad_norm": 0.5081800222396851, "learning_rate": 4.728224596891946e-06, "loss": 0.0077, "step": 133960 }, { "epoch": 1.1312406324544553, "grad_norm": 0.7807523012161255, "learning_rate": 4.72748880733909e-06, "loss": 0.0052, "step": 133970 }, { "epoch": 1.131325072301619, "grad_norm": 0.6256444454193115, "learning_rate": 4.726753023705119e-06, "loss": 0.016, "step": 133980 }, { "epoch": 1.131409512148783, "grad_norm": 0.24959908425807953, "learning_rate": 4.726017246006018e-06, "loss": 0.0118, "step": 133990 }, { "epoch": 1.1314939519959468, "grad_norm": 0.229162335395813, "learning_rate": 4.7252814742577655e-06, "loss": 0.0089, "step": 134000 }, { "epoch": 1.1315783918431108, "grad_norm": 0.41110607981681824, "learning_rate": 4.724545708476345e-06, "loss": 0.0104, "step": 134010 }, { "epoch": 1.1316628316902746, "grad_norm": 0.3341188132762909, "learning_rate": 4.723809948677732e-06, "loss": 0.0053, "step": 134020 }, { "epoch": 1.1317472715374386, "grad_norm": 0.14357295632362366, "learning_rate": 4.723074194877912e-06, "loss": 0.0081, "step": 134030 }, { "epoch": 1.1318317113846024, "grad_norm": 0.10420342534780502, "learning_rate": 4.722338447092864e-06, "loss": 0.0096, "step": 134040 }, { "epoch": 1.1319161512317664, "grad_norm": 0.14814208447933197, "learning_rate": 4.721602705338569e-06, "loss": 0.005, "step": 134050 }, { "epoch": 1.1320005910789301, "grad_norm": 0.2383575588464737, "learning_rate": 4.7208669696310056e-06, "loss": 0.0121, "step": 134060 }, { "epoch": 1.1320850309260941, "grad_norm": 0.2777252793312073, "learning_rate": 4.7201312399861524e-06, "loss": 0.0068, "step": 134070 }, { "epoch": 1.132169470773258, "grad_norm": 0.1317608803510666, "learning_rate": 4.719395516419993e-06, "loss": 0.0089, "step": 134080 }, { "epoch": 1.1322539106204217, "grad_norm": 0.27646052837371826, "learning_rate": 4.718659798948505e-06, "loss": 0.0054, "step": 134090 }, { "epoch": 1.1323383504675857, "grad_norm": 0.45161160826683044, "learning_rate": 4.71792408758767e-06, "loss": 0.0107, "step": 134100 }, { "epoch": 1.1324227903147495, "grad_norm": 0.21852676570415497, "learning_rate": 4.717188382353463e-06, "loss": 0.0088, "step": 134110 }, { "epoch": 1.1325072301619135, "grad_norm": 0.12344764918088913, "learning_rate": 4.71645268326187e-06, "loss": 0.0067, "step": 134120 }, { "epoch": 1.1325916700090772, "grad_norm": 0.4982341229915619, "learning_rate": 4.7157169903288635e-06, "loss": 0.0103, "step": 134130 }, { "epoch": 1.1326761098562412, "grad_norm": 0.11415465176105499, "learning_rate": 4.714981303570428e-06, "loss": 0.0141, "step": 134140 }, { "epoch": 1.132760549703405, "grad_norm": 0.29344871640205383, "learning_rate": 4.714245623002538e-06, "loss": 0.0068, "step": 134150 }, { "epoch": 1.132844989550569, "grad_norm": 0.23297029733657837, "learning_rate": 4.713509948641176e-06, "loss": 0.008, "step": 134160 }, { "epoch": 1.1329294293977328, "grad_norm": 0.6460521221160889, "learning_rate": 4.712774280502318e-06, "loss": 0.0112, "step": 134170 }, { "epoch": 1.1330138692448966, "grad_norm": 0.10642556846141815, "learning_rate": 4.7120386186019466e-06, "loss": 0.0096, "step": 134180 }, { "epoch": 1.1330983090920606, "grad_norm": 0.347260981798172, "learning_rate": 4.7113029629560356e-06, "loss": 0.003, "step": 134190 }, { "epoch": 1.1331827489392243, "grad_norm": 0.1642204374074936, "learning_rate": 4.710567313580565e-06, "loss": 0.0041, "step": 134200 }, { "epoch": 1.1332671887863883, "grad_norm": 0.39020007848739624, "learning_rate": 4.709831670491514e-06, "loss": 0.0094, "step": 134210 }, { "epoch": 1.133351628633552, "grad_norm": 0.014195505529642105, "learning_rate": 4.709096033704857e-06, "loss": 0.0071, "step": 134220 }, { "epoch": 1.133436068480716, "grad_norm": 0.14398492872714996, "learning_rate": 4.708360403236577e-06, "loss": 0.0033, "step": 134230 }, { "epoch": 1.1335205083278799, "grad_norm": 0.04530878737568855, "learning_rate": 4.707624779102648e-06, "loss": 0.0119, "step": 134240 }, { "epoch": 1.1336049481750439, "grad_norm": 0.14070850610733032, "learning_rate": 4.70688916131905e-06, "loss": 0.0101, "step": 134250 }, { "epoch": 1.1336893880222076, "grad_norm": 0.007642986252903938, "learning_rate": 4.706153549901757e-06, "loss": 0.0068, "step": 134260 }, { "epoch": 1.1337738278693716, "grad_norm": 0.09249019622802734, "learning_rate": 4.705417944866752e-06, "loss": 0.0082, "step": 134270 }, { "epoch": 1.1338582677165354, "grad_norm": 0.18380208313465118, "learning_rate": 4.704682346230006e-06, "loss": 0.0079, "step": 134280 }, { "epoch": 1.1339427075636994, "grad_norm": 0.5886822938919067, "learning_rate": 4.703946754007501e-06, "loss": 0.0145, "step": 134290 }, { "epoch": 1.1340271474108632, "grad_norm": 0.17635388672351837, "learning_rate": 4.7032111682152124e-06, "loss": 0.0049, "step": 134300 }, { "epoch": 1.134111587258027, "grad_norm": 0.1584089696407318, "learning_rate": 4.702475588869113e-06, "loss": 0.0052, "step": 134310 }, { "epoch": 1.134196027105191, "grad_norm": 0.3080359697341919, "learning_rate": 4.701740015985186e-06, "loss": 0.0148, "step": 134320 }, { "epoch": 1.1342804669523547, "grad_norm": 0.27339884638786316, "learning_rate": 4.7010044495794035e-06, "loss": 0.0079, "step": 134330 }, { "epoch": 1.1343649067995187, "grad_norm": 0.05937523394823074, "learning_rate": 4.700268889667743e-06, "loss": 0.0101, "step": 134340 }, { "epoch": 1.1344493466466825, "grad_norm": 0.0837695300579071, "learning_rate": 4.6995333362661795e-06, "loss": 0.0092, "step": 134350 }, { "epoch": 1.1345337864938465, "grad_norm": 0.3355729281902313, "learning_rate": 4.698797789390693e-06, "loss": 0.0117, "step": 134360 }, { "epoch": 1.1346182263410103, "grad_norm": 0.25488680601119995, "learning_rate": 4.698062249057255e-06, "loss": 0.007, "step": 134370 }, { "epoch": 1.1347026661881743, "grad_norm": 0.3346796929836273, "learning_rate": 4.697326715281845e-06, "loss": 0.011, "step": 134380 }, { "epoch": 1.134787106035338, "grad_norm": 0.17537528276443481, "learning_rate": 4.696591188080434e-06, "loss": 0.0071, "step": 134390 }, { "epoch": 1.1348715458825018, "grad_norm": 0.025403492152690887, "learning_rate": 4.695855667469003e-06, "loss": 0.0069, "step": 134400 }, { "epoch": 1.1349559857296658, "grad_norm": 0.33687618374824524, "learning_rate": 4.6951201534635234e-06, "loss": 0.0166, "step": 134410 }, { "epoch": 1.1350404255768298, "grad_norm": 0.14068081974983215, "learning_rate": 4.694384646079971e-06, "loss": 0.0142, "step": 134420 }, { "epoch": 1.1351248654239936, "grad_norm": 0.580194890499115, "learning_rate": 4.693649145334322e-06, "loss": 0.0105, "step": 134430 }, { "epoch": 1.1352093052711574, "grad_norm": 0.1820107400417328, "learning_rate": 4.69291365124255e-06, "loss": 0.014, "step": 134440 }, { "epoch": 1.1352937451183214, "grad_norm": 0.3180462419986725, "learning_rate": 4.692178163820632e-06, "loss": 0.004, "step": 134450 }, { "epoch": 1.1353781849654851, "grad_norm": 0.21243537962436676, "learning_rate": 4.69144268308454e-06, "loss": 0.0046, "step": 134460 }, { "epoch": 1.1354626248126491, "grad_norm": 0.2534085512161255, "learning_rate": 4.690707209050251e-06, "loss": 0.011, "step": 134470 }, { "epoch": 1.135547064659813, "grad_norm": 0.18499435484409332, "learning_rate": 4.689971741733735e-06, "loss": 0.0058, "step": 134480 }, { "epoch": 1.135631504506977, "grad_norm": 0.3469575047492981, "learning_rate": 4.689236281150974e-06, "loss": 0.0121, "step": 134490 }, { "epoch": 1.1357159443541407, "grad_norm": 0.2982473373413086, "learning_rate": 4.688500827317934e-06, "loss": 0.0092, "step": 134500 }, { "epoch": 1.1358003842013047, "grad_norm": 0.21480409801006317, "learning_rate": 4.687765380250593e-06, "loss": 0.0059, "step": 134510 }, { "epoch": 1.1358848240484685, "grad_norm": 0.32912105321884155, "learning_rate": 4.6870299399649235e-06, "loss": 0.0039, "step": 134520 }, { "epoch": 1.1359692638956322, "grad_norm": 0.2926079332828522, "learning_rate": 4.6862945064769e-06, "loss": 0.0051, "step": 134530 }, { "epoch": 1.1360537037427962, "grad_norm": 0.478535920381546, "learning_rate": 4.685559079802497e-06, "loss": 0.0078, "step": 134540 }, { "epoch": 1.13613814358996, "grad_norm": 0.19800890982151031, "learning_rate": 4.684823659957683e-06, "loss": 0.0042, "step": 134550 }, { "epoch": 1.136222583437124, "grad_norm": 0.004564274102449417, "learning_rate": 4.684088246958437e-06, "loss": 0.0053, "step": 134560 }, { "epoch": 1.1363070232842878, "grad_norm": 0.2632819414138794, "learning_rate": 4.683352840820728e-06, "loss": 0.0081, "step": 134570 }, { "epoch": 1.1363914631314518, "grad_norm": 0.339662104845047, "learning_rate": 4.682617441560533e-06, "loss": 0.0098, "step": 134580 }, { "epoch": 1.1364759029786156, "grad_norm": 0.7281478643417358, "learning_rate": 4.6818820491938185e-06, "loss": 0.0086, "step": 134590 }, { "epoch": 1.1365603428257796, "grad_norm": 0.11816539615392685, "learning_rate": 4.681146663736564e-06, "loss": 0.0116, "step": 134600 }, { "epoch": 1.1366447826729433, "grad_norm": 0.5891464352607727, "learning_rate": 4.680411285204737e-06, "loss": 0.0087, "step": 134610 }, { "epoch": 1.1367292225201073, "grad_norm": 0.16435889899730682, "learning_rate": 4.679675913614312e-06, "loss": 0.0044, "step": 134620 }, { "epoch": 1.136813662367271, "grad_norm": 0.18165411055088043, "learning_rate": 4.678940548981259e-06, "loss": 0.0039, "step": 134630 }, { "epoch": 1.136898102214435, "grad_norm": 0.1292203664779663, "learning_rate": 4.6782051913215534e-06, "loss": 0.0105, "step": 134640 }, { "epoch": 1.1369825420615989, "grad_norm": 0.24512837827205658, "learning_rate": 4.677469840651165e-06, "loss": 0.0074, "step": 134650 }, { "epoch": 1.1370669819087627, "grad_norm": 0.6104920506477356, "learning_rate": 4.676734496986064e-06, "loss": 0.0077, "step": 134660 }, { "epoch": 1.1371514217559267, "grad_norm": 0.6084046959877014, "learning_rate": 4.675999160342225e-06, "loss": 0.0052, "step": 134670 }, { "epoch": 1.1372358616030904, "grad_norm": 0.23885685205459595, "learning_rate": 4.675263830735616e-06, "loss": 0.0065, "step": 134680 }, { "epoch": 1.1373203014502544, "grad_norm": 0.6440009474754333, "learning_rate": 4.674528508182212e-06, "loss": 0.0153, "step": 134690 }, { "epoch": 1.1374047412974182, "grad_norm": 0.4626169800758362, "learning_rate": 4.673793192697981e-06, "loss": 0.0068, "step": 134700 }, { "epoch": 1.1374891811445822, "grad_norm": 0.4440937042236328, "learning_rate": 4.673057884298896e-06, "loss": 0.0145, "step": 134710 }, { "epoch": 1.137573620991746, "grad_norm": 0.17805743217468262, "learning_rate": 4.672322583000925e-06, "loss": 0.0155, "step": 134720 }, { "epoch": 1.13765806083891, "grad_norm": 0.13452661037445068, "learning_rate": 4.671587288820042e-06, "loss": 0.0076, "step": 134730 }, { "epoch": 1.1377425006860737, "grad_norm": 0.16612125933170319, "learning_rate": 4.670852001772216e-06, "loss": 0.0071, "step": 134740 }, { "epoch": 1.1378269405332375, "grad_norm": 0.16025374829769135, "learning_rate": 4.670116721873417e-06, "loss": 0.0127, "step": 134750 }, { "epoch": 1.1379113803804015, "grad_norm": 0.5849469304084778, "learning_rate": 4.669381449139613e-06, "loss": 0.0086, "step": 134760 }, { "epoch": 1.1379958202275653, "grad_norm": 0.2642882466316223, "learning_rate": 4.668646183586778e-06, "loss": 0.0108, "step": 134770 }, { "epoch": 1.1380802600747293, "grad_norm": 0.038754161447286606, "learning_rate": 4.667910925230882e-06, "loss": 0.0174, "step": 134780 }, { "epoch": 1.138164699921893, "grad_norm": 0.42714980244636536, "learning_rate": 4.667175674087889e-06, "loss": 0.0084, "step": 134790 }, { "epoch": 1.138249139769057, "grad_norm": 0.2611309885978699, "learning_rate": 4.666440430173774e-06, "loss": 0.0054, "step": 134800 }, { "epoch": 1.1383335796162208, "grad_norm": 0.4100503623485565, "learning_rate": 4.665705193504504e-06, "loss": 0.0048, "step": 134810 }, { "epoch": 1.1384180194633848, "grad_norm": 0.09379680454730988, "learning_rate": 4.664969964096049e-06, "loss": 0.0055, "step": 134820 }, { "epoch": 1.1385024593105486, "grad_norm": 0.13085761666297913, "learning_rate": 4.6642347419643765e-06, "loss": 0.0117, "step": 134830 }, { "epoch": 1.1385868991577126, "grad_norm": 0.0902881771326065, "learning_rate": 4.663499527125458e-06, "loss": 0.0056, "step": 134840 }, { "epoch": 1.1386713390048764, "grad_norm": 0.2863519787788391, "learning_rate": 4.66276431959526e-06, "loss": 0.0067, "step": 134850 }, { "epoch": 1.1387557788520404, "grad_norm": 0.12833145260810852, "learning_rate": 4.662029119389754e-06, "loss": 0.0107, "step": 134860 }, { "epoch": 1.1388402186992042, "grad_norm": 0.16872170567512512, "learning_rate": 4.661293926524902e-06, "loss": 0.0061, "step": 134870 }, { "epoch": 1.138924658546368, "grad_norm": 0.2547018826007843, "learning_rate": 4.660558741016681e-06, "loss": 0.0069, "step": 134880 }, { "epoch": 1.139009098393532, "grad_norm": 0.07596767693758011, "learning_rate": 4.659823562881053e-06, "loss": 0.0072, "step": 134890 }, { "epoch": 1.1390935382406957, "grad_norm": 0.2954144775867462, "learning_rate": 4.6590883921339865e-06, "loss": 0.0086, "step": 134900 }, { "epoch": 1.1391779780878597, "grad_norm": 0.5450761318206787, "learning_rate": 4.658353228791451e-06, "loss": 0.0134, "step": 134910 }, { "epoch": 1.1392624179350235, "grad_norm": 0.11443938314914703, "learning_rate": 4.657618072869412e-06, "loss": 0.0123, "step": 134920 }, { "epoch": 1.1393468577821875, "grad_norm": 0.21238483488559723, "learning_rate": 4.656882924383841e-06, "loss": 0.0053, "step": 134930 }, { "epoch": 1.1394312976293512, "grad_norm": 0.17856371402740479, "learning_rate": 4.656147783350701e-06, "loss": 0.0085, "step": 134940 }, { "epoch": 1.1395157374765152, "grad_norm": 0.518303632736206, "learning_rate": 4.655412649785961e-06, "loss": 0.0064, "step": 134950 }, { "epoch": 1.139600177323679, "grad_norm": 0.4552019536495209, "learning_rate": 4.654677523705586e-06, "loss": 0.0082, "step": 134960 }, { "epoch": 1.139684617170843, "grad_norm": 0.44668272137641907, "learning_rate": 4.653942405125546e-06, "loss": 0.009, "step": 134970 }, { "epoch": 1.1397690570180068, "grad_norm": 0.5824795365333557, "learning_rate": 4.6532072940618065e-06, "loss": 0.0059, "step": 134980 }, { "epoch": 1.1398534968651708, "grad_norm": 0.5466343760490417, "learning_rate": 4.652472190530333e-06, "loss": 0.0079, "step": 134990 }, { "epoch": 1.1399379367123346, "grad_norm": 0.19028180837631226, "learning_rate": 4.6517370945470935e-06, "loss": 0.0055, "step": 135000 }, { "epoch": 1.1400223765594983, "grad_norm": 0.30130213499069214, "learning_rate": 4.65100200612805e-06, "loss": 0.0087, "step": 135010 }, { "epoch": 1.1401068164066623, "grad_norm": 0.0719427689909935, "learning_rate": 4.650266925289174e-06, "loss": 0.0058, "step": 135020 }, { "epoch": 1.1401912562538261, "grad_norm": 0.3963351845741272, "learning_rate": 4.649531852046428e-06, "loss": 0.0077, "step": 135030 }, { "epoch": 1.14027569610099, "grad_norm": 0.3808779716491699, "learning_rate": 4.648796786415778e-06, "loss": 0.0058, "step": 135040 }, { "epoch": 1.1403601359481539, "grad_norm": 0.12197304517030716, "learning_rate": 4.648061728413189e-06, "loss": 0.0079, "step": 135050 }, { "epoch": 1.1404445757953179, "grad_norm": 0.04999377205967903, "learning_rate": 4.6473266780546285e-06, "loss": 0.0097, "step": 135060 }, { "epoch": 1.1405290156424817, "grad_norm": 0.441711962223053, "learning_rate": 4.6465916353560596e-06, "loss": 0.0059, "step": 135070 }, { "epoch": 1.1406134554896457, "grad_norm": 0.21324482560157776, "learning_rate": 4.64585660033345e-06, "loss": 0.0128, "step": 135080 }, { "epoch": 1.1406978953368094, "grad_norm": 0.5393392443656921, "learning_rate": 4.645121573002758e-06, "loss": 0.0052, "step": 135090 }, { "epoch": 1.1407823351839732, "grad_norm": 0.15539605915546417, "learning_rate": 4.6443865533799576e-06, "loss": 0.0093, "step": 135100 }, { "epoch": 1.1408667750311372, "grad_norm": 0.10072013735771179, "learning_rate": 4.643651541481005e-06, "loss": 0.0059, "step": 135110 }, { "epoch": 1.140951214878301, "grad_norm": 0.024279264733195305, "learning_rate": 4.64291653732187e-06, "loss": 0.0071, "step": 135120 }, { "epoch": 1.141035654725465, "grad_norm": 0.15550509095191956, "learning_rate": 4.642181540918515e-06, "loss": 0.006, "step": 135130 }, { "epoch": 1.1411200945726288, "grad_norm": 0.12437181919813156, "learning_rate": 4.6414465522869015e-06, "loss": 0.0062, "step": 135140 }, { "epoch": 1.1412045344197927, "grad_norm": 0.29184362292289734, "learning_rate": 4.6407115714429966e-06, "loss": 0.0084, "step": 135150 }, { "epoch": 1.1412889742669565, "grad_norm": 0.6064972877502441, "learning_rate": 4.639976598402761e-06, "loss": 0.0127, "step": 135160 }, { "epoch": 1.1413734141141205, "grad_norm": 0.26474958658218384, "learning_rate": 4.639241633182162e-06, "loss": 0.0098, "step": 135170 }, { "epoch": 1.1414578539612843, "grad_norm": 0.10596951097249985, "learning_rate": 4.63850667579716e-06, "loss": 0.012, "step": 135180 }, { "epoch": 1.1415422938084483, "grad_norm": 0.05507742241024971, "learning_rate": 4.63777172626372e-06, "loss": 0.0067, "step": 135190 }, { "epoch": 1.141626733655612, "grad_norm": 0.09161354601383209, "learning_rate": 4.637036784597801e-06, "loss": 0.0086, "step": 135200 }, { "epoch": 1.141711173502776, "grad_norm": 0.5263771414756775, "learning_rate": 4.636301850815371e-06, "loss": 0.0198, "step": 135210 }, { "epoch": 1.1417956133499398, "grad_norm": 0.2891320586204529, "learning_rate": 4.63556692493239e-06, "loss": 0.0126, "step": 135220 }, { "epoch": 1.1418800531971036, "grad_norm": 0.2899189591407776, "learning_rate": 4.634832006964822e-06, "loss": 0.0078, "step": 135230 }, { "epoch": 1.1419644930442676, "grad_norm": 0.7766717076301575, "learning_rate": 4.634097096928627e-06, "loss": 0.0109, "step": 135240 }, { "epoch": 1.1420489328914314, "grad_norm": 0.1601227968931198, "learning_rate": 4.6333621948397665e-06, "loss": 0.0061, "step": 135250 }, { "epoch": 1.1421333727385954, "grad_norm": 0.404607892036438, "learning_rate": 4.632627300714205e-06, "loss": 0.0058, "step": 135260 }, { "epoch": 1.1422178125857592, "grad_norm": 0.08565703779459, "learning_rate": 4.631892414567904e-06, "loss": 0.0108, "step": 135270 }, { "epoch": 1.1423022524329232, "grad_norm": 0.007161106448620558, "learning_rate": 4.631157536416824e-06, "loss": 0.0079, "step": 135280 }, { "epoch": 1.142386692280087, "grad_norm": 0.46202516555786133, "learning_rate": 4.630422666276924e-06, "loss": 0.0076, "step": 135290 }, { "epoch": 1.142471132127251, "grad_norm": 0.31137603521347046, "learning_rate": 4.629687804164172e-06, "loss": 0.0044, "step": 135300 }, { "epoch": 1.1425555719744147, "grad_norm": 0.19609126448631287, "learning_rate": 4.628952950094523e-06, "loss": 0.007, "step": 135310 }, { "epoch": 1.1426400118215785, "grad_norm": 0.197821244597435, "learning_rate": 4.628218104083941e-06, "loss": 0.0104, "step": 135320 }, { "epoch": 1.1427244516687425, "grad_norm": 0.22608502209186554, "learning_rate": 4.6274832661483835e-06, "loss": 0.0066, "step": 135330 }, { "epoch": 1.1428088915159065, "grad_norm": 0.6404182314872742, "learning_rate": 4.6267484363038155e-06, "loss": 0.0078, "step": 135340 }, { "epoch": 1.1428933313630703, "grad_norm": 0.28636932373046875, "learning_rate": 4.626013614566193e-06, "loss": 0.0058, "step": 135350 }, { "epoch": 1.142977771210234, "grad_norm": 0.04623759910464287, "learning_rate": 4.62527880095148e-06, "loss": 0.006, "step": 135360 }, { "epoch": 1.143062211057398, "grad_norm": 0.1587282121181488, "learning_rate": 4.624543995475635e-06, "loss": 0.0085, "step": 135370 }, { "epoch": 1.1431466509045618, "grad_norm": 0.07019404321908951, "learning_rate": 4.623809198154614e-06, "loss": 0.007, "step": 135380 }, { "epoch": 1.1432310907517258, "grad_norm": 0.320417582988739, "learning_rate": 4.623074409004385e-06, "loss": 0.0069, "step": 135390 }, { "epoch": 1.1433155305988896, "grad_norm": 0.42378416657447815, "learning_rate": 4.622339628040898e-06, "loss": 0.0089, "step": 135400 }, { "epoch": 1.1433999704460536, "grad_norm": 0.11728282272815704, "learning_rate": 4.62160485528012e-06, "loss": 0.0073, "step": 135410 }, { "epoch": 1.1434844102932173, "grad_norm": 0.30871862173080444, "learning_rate": 4.620870090738005e-06, "loss": 0.0065, "step": 135420 }, { "epoch": 1.1435688501403813, "grad_norm": 0.06316067278385162, "learning_rate": 4.620135334430516e-06, "loss": 0.0052, "step": 135430 }, { "epoch": 1.1436532899875451, "grad_norm": 0.42136916518211365, "learning_rate": 4.619400586373607e-06, "loss": 0.0143, "step": 135440 }, { "epoch": 1.143737729834709, "grad_norm": 0.20840336382389069, "learning_rate": 4.618665846583242e-06, "loss": 0.0073, "step": 135450 }, { "epoch": 1.143822169681873, "grad_norm": 0.22393456101417542, "learning_rate": 4.617931115075375e-06, "loss": 0.0058, "step": 135460 }, { "epoch": 1.1439066095290367, "grad_norm": 0.747685968875885, "learning_rate": 4.617196391865966e-06, "loss": 0.011, "step": 135470 }, { "epoch": 1.1439910493762007, "grad_norm": 0.7227141261100769, "learning_rate": 4.616461676970975e-06, "loss": 0.0079, "step": 135480 }, { "epoch": 1.1440754892233644, "grad_norm": 0.6883357763290405, "learning_rate": 4.615726970406355e-06, "loss": 0.0101, "step": 135490 }, { "epoch": 1.1441599290705284, "grad_norm": 0.0017402076628059149, "learning_rate": 4.614992272188068e-06, "loss": 0.0068, "step": 135500 }, { "epoch": 1.1442443689176922, "grad_norm": 0.08450254797935486, "learning_rate": 4.614257582332069e-06, "loss": 0.0042, "step": 135510 }, { "epoch": 1.1443288087648562, "grad_norm": 0.22585389018058777, "learning_rate": 4.613522900854318e-06, "loss": 0.0088, "step": 135520 }, { "epoch": 1.14441324861202, "grad_norm": 0.2628965675830841, "learning_rate": 4.612788227770767e-06, "loss": 0.0075, "step": 135530 }, { "epoch": 1.144497688459184, "grad_norm": 0.29655566811561584, "learning_rate": 4.61205356309738e-06, "loss": 0.0091, "step": 135540 }, { "epoch": 1.1445821283063478, "grad_norm": 0.1978546380996704, "learning_rate": 4.611318906850108e-06, "loss": 0.0047, "step": 135550 }, { "epoch": 1.1446665681535118, "grad_norm": 0.3047805726528168, "learning_rate": 4.610584259044911e-06, "loss": 0.0088, "step": 135560 }, { "epoch": 1.1447510080006755, "grad_norm": 0.3461456596851349, "learning_rate": 4.609849619697742e-06, "loss": 0.0063, "step": 135570 }, { "epoch": 1.1448354478478393, "grad_norm": 0.15003539621829987, "learning_rate": 4.609114988824561e-06, "loss": 0.004, "step": 135580 }, { "epoch": 1.1449198876950033, "grad_norm": 0.015521408058702946, "learning_rate": 4.608380366441323e-06, "loss": 0.0078, "step": 135590 }, { "epoch": 1.145004327542167, "grad_norm": 0.3216153681278229, "learning_rate": 4.607645752563981e-06, "loss": 0.0093, "step": 135600 }, { "epoch": 1.145088767389331, "grad_norm": 0.10661996155977249, "learning_rate": 4.606911147208495e-06, "loss": 0.0095, "step": 135610 }, { "epoch": 1.1451732072364949, "grad_norm": 0.17084898054599762, "learning_rate": 4.606176550390816e-06, "loss": 0.0067, "step": 135620 }, { "epoch": 1.1452576470836588, "grad_norm": 0.04390187934041023, "learning_rate": 4.6054419621269045e-06, "loss": 0.0081, "step": 135630 }, { "epoch": 1.1453420869308226, "grad_norm": 0.2629660665988922, "learning_rate": 4.60470738243271e-06, "loss": 0.0087, "step": 135640 }, { "epoch": 1.1454265267779866, "grad_norm": 0.13498803973197937, "learning_rate": 4.603972811324193e-06, "loss": 0.0071, "step": 135650 }, { "epoch": 1.1455109666251504, "grad_norm": 0.2053157091140747, "learning_rate": 4.603238248817302e-06, "loss": 0.0029, "step": 135660 }, { "epoch": 1.1455954064723142, "grad_norm": 0.32500478625297546, "learning_rate": 4.602503694927998e-06, "loss": 0.0081, "step": 135670 }, { "epoch": 1.1456798463194782, "grad_norm": 0.5463729500770569, "learning_rate": 4.60176914967223e-06, "loss": 0.0094, "step": 135680 }, { "epoch": 1.145764286166642, "grad_norm": 0.3227284550666809, "learning_rate": 4.6010346130659565e-06, "loss": 0.0044, "step": 135690 }, { "epoch": 1.145848726013806, "grad_norm": 0.29434025287628174, "learning_rate": 4.600300085125127e-06, "loss": 0.0103, "step": 135700 }, { "epoch": 1.1459331658609697, "grad_norm": 0.523116409778595, "learning_rate": 4.599565565865701e-06, "loss": 0.0059, "step": 135710 }, { "epoch": 1.1460176057081337, "grad_norm": 0.2722768783569336, "learning_rate": 4.598831055303628e-06, "loss": 0.0075, "step": 135720 }, { "epoch": 1.1461020455552975, "grad_norm": 0.11913304775953293, "learning_rate": 4.598096553454859e-06, "loss": 0.0033, "step": 135730 }, { "epoch": 1.1461864854024615, "grad_norm": 0.5051100850105286, "learning_rate": 4.597362060335354e-06, "loss": 0.0064, "step": 135740 }, { "epoch": 1.1462709252496253, "grad_norm": 0.14157356321811676, "learning_rate": 4.596627575961061e-06, "loss": 0.0099, "step": 135750 }, { "epoch": 1.1463553650967893, "grad_norm": 0.8740799427032471, "learning_rate": 4.595893100347936e-06, "loss": 0.0092, "step": 135760 }, { "epoch": 1.146439804943953, "grad_norm": 0.18854713439941406, "learning_rate": 4.595158633511927e-06, "loss": 0.0052, "step": 135770 }, { "epoch": 1.146524244791117, "grad_norm": 0.524918258190155, "learning_rate": 4.5944241754689925e-06, "loss": 0.0116, "step": 135780 }, { "epoch": 1.1466086846382808, "grad_norm": 0.06292547285556793, "learning_rate": 4.5936897262350795e-06, "loss": 0.004, "step": 135790 }, { "epoch": 1.1466931244854446, "grad_norm": 0.2766450047492981, "learning_rate": 4.5929552858261445e-06, "loss": 0.0053, "step": 135800 }, { "epoch": 1.1467775643326086, "grad_norm": 0.0667450949549675, "learning_rate": 4.5922208542581345e-06, "loss": 0.0078, "step": 135810 }, { "epoch": 1.1468620041797724, "grad_norm": 0.31206822395324707, "learning_rate": 4.591486431547007e-06, "loss": 0.0045, "step": 135820 }, { "epoch": 1.1469464440269364, "grad_norm": 0.23859162628650665, "learning_rate": 4.590752017708709e-06, "loss": 0.0083, "step": 135830 }, { "epoch": 1.1470308838741001, "grad_norm": 0.13097010552883148, "learning_rate": 4.590017612759194e-06, "loss": 0.0046, "step": 135840 }, { "epoch": 1.1471153237212641, "grad_norm": 0.06602121144533157, "learning_rate": 4.5892832167144125e-06, "loss": 0.0031, "step": 135850 }, { "epoch": 1.147199763568428, "grad_norm": 0.2596058249473572, "learning_rate": 4.5885488295903135e-06, "loss": 0.0066, "step": 135860 }, { "epoch": 1.147284203415592, "grad_norm": 0.13568726181983948, "learning_rate": 4.587814451402852e-06, "loss": 0.0087, "step": 135870 }, { "epoch": 1.1473686432627557, "grad_norm": 0.21773242950439453, "learning_rate": 4.587080082167974e-06, "loss": 0.0061, "step": 135880 }, { "epoch": 1.1474530831099194, "grad_norm": 0.24958448112010956, "learning_rate": 4.586345721901634e-06, "loss": 0.007, "step": 135890 }, { "epoch": 1.1475375229570834, "grad_norm": 0.09846561402082443, "learning_rate": 4.585611370619778e-06, "loss": 0.0073, "step": 135900 }, { "epoch": 1.1476219628042474, "grad_norm": 0.18599802255630493, "learning_rate": 4.58487702833836e-06, "loss": 0.0094, "step": 135910 }, { "epoch": 1.1477064026514112, "grad_norm": 0.272938072681427, "learning_rate": 4.584142695073327e-06, "loss": 0.0073, "step": 135920 }, { "epoch": 1.147790842498575, "grad_norm": 0.26993483304977417, "learning_rate": 4.583408370840631e-06, "loss": 0.0063, "step": 135930 }, { "epoch": 1.147875282345739, "grad_norm": 0.2243141233921051, "learning_rate": 4.5826740556562165e-06, "loss": 0.0035, "step": 135940 }, { "epoch": 1.1479597221929028, "grad_norm": 0.42016687989234924, "learning_rate": 4.581939749536039e-06, "loss": 0.0182, "step": 135950 }, { "epoch": 1.1480441620400668, "grad_norm": 0.28242695331573486, "learning_rate": 4.581205452496044e-06, "loss": 0.0067, "step": 135960 }, { "epoch": 1.1481286018872305, "grad_norm": 0.11879542469978333, "learning_rate": 4.58047116455218e-06, "loss": 0.0072, "step": 135970 }, { "epoch": 1.1482130417343945, "grad_norm": 0.39139696955680847, "learning_rate": 4.5797368857203975e-06, "loss": 0.0054, "step": 135980 }, { "epoch": 1.1482974815815583, "grad_norm": 0.18709106743335724, "learning_rate": 4.5790026160166415e-06, "loss": 0.0071, "step": 135990 }, { "epoch": 1.1483819214287223, "grad_norm": 0.3441479504108429, "learning_rate": 4.578268355456865e-06, "loss": 0.0111, "step": 136000 }, { "epoch": 1.148466361275886, "grad_norm": 0.0479547493159771, "learning_rate": 4.577534104057011e-06, "loss": 0.0073, "step": 136010 }, { "epoch": 1.1485508011230499, "grad_norm": 0.6197457313537598, "learning_rate": 4.576799861833032e-06, "loss": 0.0065, "step": 136020 }, { "epoch": 1.1486352409702139, "grad_norm": 0.24501445889472961, "learning_rate": 4.576065628800872e-06, "loss": 0.0075, "step": 136030 }, { "epoch": 1.1487196808173776, "grad_norm": 0.1977378875017166, "learning_rate": 4.57533140497648e-06, "loss": 0.01, "step": 136040 }, { "epoch": 1.1488041206645416, "grad_norm": 0.1632377654314041, "learning_rate": 4.574597190375801e-06, "loss": 0.0107, "step": 136050 }, { "epoch": 1.1488885605117054, "grad_norm": 0.4468460977077484, "learning_rate": 4.573862985014786e-06, "loss": 0.013, "step": 136060 }, { "epoch": 1.1489730003588694, "grad_norm": 0.16336889564990997, "learning_rate": 4.57312878890938e-06, "loss": 0.0068, "step": 136070 }, { "epoch": 1.1490574402060332, "grad_norm": 0.4358494281768799, "learning_rate": 4.5723946020755275e-06, "loss": 0.008, "step": 136080 }, { "epoch": 1.1491418800531972, "grad_norm": 0.021105075255036354, "learning_rate": 4.571660424529178e-06, "loss": 0.0067, "step": 136090 }, { "epoch": 1.149226319900361, "grad_norm": 0.17668424546718597, "learning_rate": 4.570926256286275e-06, "loss": 0.0052, "step": 136100 }, { "epoch": 1.149310759747525, "grad_norm": 0.1686021089553833, "learning_rate": 4.570192097362768e-06, "loss": 0.01, "step": 136110 }, { "epoch": 1.1493951995946887, "grad_norm": 0.3228873610496521, "learning_rate": 4.569457947774598e-06, "loss": 0.006, "step": 136120 }, { "epoch": 1.1494796394418527, "grad_norm": 0.3183799386024475, "learning_rate": 4.568723807537715e-06, "loss": 0.0067, "step": 136130 }, { "epoch": 1.1495640792890165, "grad_norm": 0.01641860604286194, "learning_rate": 4.56798967666806e-06, "loss": 0.0055, "step": 136140 }, { "epoch": 1.1496485191361803, "grad_norm": 0.11121094971895218, "learning_rate": 4.567255555181584e-06, "loss": 0.0065, "step": 136150 }, { "epoch": 1.1497329589833443, "grad_norm": 0.5390082001686096, "learning_rate": 4.566521443094226e-06, "loss": 0.0101, "step": 136160 }, { "epoch": 1.149817398830508, "grad_norm": 0.19694924354553223, "learning_rate": 4.565787340421936e-06, "loss": 0.0068, "step": 136170 }, { "epoch": 1.149901838677672, "grad_norm": 0.30300161242485046, "learning_rate": 4.565053247180653e-06, "loss": 0.0049, "step": 136180 }, { "epoch": 1.1499862785248358, "grad_norm": 0.12893825769424438, "learning_rate": 4.5643191633863265e-06, "loss": 0.0053, "step": 136190 }, { "epoch": 1.1500707183719998, "grad_norm": 0.24621796607971191, "learning_rate": 4.563585089054899e-06, "loss": 0.0066, "step": 136200 }, { "epoch": 1.1501551582191636, "grad_norm": 1.5428334474563599, "learning_rate": 4.562851024202312e-06, "loss": 0.0136, "step": 136210 }, { "epoch": 1.1502395980663276, "grad_norm": 0.1779763549566269, "learning_rate": 4.562116968844513e-06, "loss": 0.0053, "step": 136220 }, { "epoch": 1.1503240379134914, "grad_norm": 0.3052864372730255, "learning_rate": 4.561382922997441e-06, "loss": 0.0067, "step": 136230 }, { "epoch": 1.1504084777606551, "grad_norm": 0.2854815721511841, "learning_rate": 4.560648886677045e-06, "loss": 0.0058, "step": 136240 }, { "epoch": 1.1504929176078191, "grad_norm": 0.5215412974357605, "learning_rate": 4.5599148598992635e-06, "loss": 0.0091, "step": 136250 }, { "epoch": 1.1505773574549831, "grad_norm": 0.12555348873138428, "learning_rate": 4.559180842680042e-06, "loss": 0.0075, "step": 136260 }, { "epoch": 1.150661797302147, "grad_norm": 0.2780955731868744, "learning_rate": 4.5584468350353204e-06, "loss": 0.0058, "step": 136270 }, { "epoch": 1.1507462371493107, "grad_norm": 0.2419160008430481, "learning_rate": 4.5577128369810445e-06, "loss": 0.0043, "step": 136280 }, { "epoch": 1.1508306769964747, "grad_norm": 0.2597011625766754, "learning_rate": 4.556978848533155e-06, "loss": 0.0067, "step": 136290 }, { "epoch": 1.1509151168436385, "grad_norm": 0.25841301679611206, "learning_rate": 4.556244869707595e-06, "loss": 0.0054, "step": 136300 }, { "epoch": 1.1509995566908025, "grad_norm": 0.24028021097183228, "learning_rate": 4.555510900520305e-06, "loss": 0.0064, "step": 136310 }, { "epoch": 1.1510839965379662, "grad_norm": 0.2439424693584442, "learning_rate": 4.554776940987227e-06, "loss": 0.012, "step": 136320 }, { "epoch": 1.1511684363851302, "grad_norm": 0.1976311206817627, "learning_rate": 4.554042991124303e-06, "loss": 0.0053, "step": 136330 }, { "epoch": 1.151252876232294, "grad_norm": 0.2528330981731415, "learning_rate": 4.553309050947471e-06, "loss": 0.0041, "step": 136340 }, { "epoch": 1.151337316079458, "grad_norm": 0.6579450368881226, "learning_rate": 4.552575120472679e-06, "loss": 0.008, "step": 136350 }, { "epoch": 1.1514217559266218, "grad_norm": 0.2723046839237213, "learning_rate": 4.551841199715861e-06, "loss": 0.006, "step": 136360 }, { "epoch": 1.1515061957737855, "grad_norm": 0.2498302012681961, "learning_rate": 4.5511072886929616e-06, "loss": 0.0065, "step": 136370 }, { "epoch": 1.1515906356209495, "grad_norm": 0.08209015429019928, "learning_rate": 4.550373387419919e-06, "loss": 0.009, "step": 136380 }, { "epoch": 1.1516750754681133, "grad_norm": 0.1018122136592865, "learning_rate": 4.549639495912675e-06, "loss": 0.0108, "step": 136390 }, { "epoch": 1.1517595153152773, "grad_norm": 0.014819609001278877, "learning_rate": 4.548905614187168e-06, "loss": 0.0068, "step": 136400 }, { "epoch": 1.151843955162441, "grad_norm": 0.29910361766815186, "learning_rate": 4.54817174225934e-06, "loss": 0.0049, "step": 136410 }, { "epoch": 1.151928395009605, "grad_norm": 0.40721195936203003, "learning_rate": 4.547437880145129e-06, "loss": 0.0085, "step": 136420 }, { "epoch": 1.1520128348567689, "grad_norm": 0.08991999924182892, "learning_rate": 4.546704027860472e-06, "loss": 0.0094, "step": 136430 }, { "epoch": 1.1520972747039329, "grad_norm": 0.04183734953403473, "learning_rate": 4.545970185421314e-06, "loss": 0.004, "step": 136440 }, { "epoch": 1.1521817145510966, "grad_norm": 0.21868294477462769, "learning_rate": 4.545236352843588e-06, "loss": 0.0027, "step": 136450 }, { "epoch": 1.1522661543982606, "grad_norm": 0.47125571966171265, "learning_rate": 4.5445025301432374e-06, "loss": 0.011, "step": 136460 }, { "epoch": 1.1523505942454244, "grad_norm": 0.20160402357578278, "learning_rate": 4.543768717336196e-06, "loss": 0.0113, "step": 136470 }, { "epoch": 1.1524350340925884, "grad_norm": 0.23403400182724, "learning_rate": 4.543034914438407e-06, "loss": 0.0084, "step": 136480 }, { "epoch": 1.1525194739397522, "grad_norm": 0.7539472579956055, "learning_rate": 4.5423011214658044e-06, "loss": 0.0061, "step": 136490 }, { "epoch": 1.152603913786916, "grad_norm": 0.577922523021698, "learning_rate": 4.541567338434329e-06, "loss": 0.006, "step": 136500 }, { "epoch": 1.15268835363408, "grad_norm": 0.26905450224876404, "learning_rate": 4.540833565359915e-06, "loss": 0.0065, "step": 136510 }, { "epoch": 1.1527727934812437, "grad_norm": 0.3233473598957062, "learning_rate": 4.540099802258503e-06, "loss": 0.0064, "step": 136520 }, { "epoch": 1.1528572333284077, "grad_norm": 0.3790573179721832, "learning_rate": 4.539366049146029e-06, "loss": 0.0094, "step": 136530 }, { "epoch": 1.1529416731755715, "grad_norm": 0.12415452301502228, "learning_rate": 4.538632306038432e-06, "loss": 0.0045, "step": 136540 }, { "epoch": 1.1530261130227355, "grad_norm": 0.5048660635948181, "learning_rate": 4.5378985729516454e-06, "loss": 0.009, "step": 136550 }, { "epoch": 1.1531105528698993, "grad_norm": 0.04294391721487045, "learning_rate": 4.537164849901605e-06, "loss": 0.0123, "step": 136560 }, { "epoch": 1.1531949927170633, "grad_norm": 0.10765498876571655, "learning_rate": 4.5364311369042515e-06, "loss": 0.0071, "step": 136570 }, { "epoch": 1.153279432564227, "grad_norm": 0.15470227599143982, "learning_rate": 4.535697433975518e-06, "loss": 0.005, "step": 136580 }, { "epoch": 1.1533638724113908, "grad_norm": 0.5463731288909912, "learning_rate": 4.534963741131342e-06, "loss": 0.0074, "step": 136590 }, { "epoch": 1.1534483122585548, "grad_norm": 0.08921404927968979, "learning_rate": 4.534230058387656e-06, "loss": 0.0049, "step": 136600 }, { "epoch": 1.1535327521057186, "grad_norm": 0.1975315362215042, "learning_rate": 4.533496385760401e-06, "loss": 0.0097, "step": 136610 }, { "epoch": 1.1536171919528826, "grad_norm": 0.18907305598258972, "learning_rate": 4.5327627232655055e-06, "loss": 0.0131, "step": 136620 }, { "epoch": 1.1537016318000464, "grad_norm": 0.33558031916618347, "learning_rate": 4.53202907091891e-06, "loss": 0.0087, "step": 136630 }, { "epoch": 1.1537860716472104, "grad_norm": 0.34622764587402344, "learning_rate": 4.531295428736546e-06, "loss": 0.0096, "step": 136640 }, { "epoch": 1.1538705114943741, "grad_norm": 0.28972917795181274, "learning_rate": 4.530561796734351e-06, "loss": 0.0084, "step": 136650 }, { "epoch": 1.1539549513415381, "grad_norm": 0.355294793844223, "learning_rate": 4.529828174928258e-06, "loss": 0.0116, "step": 136660 }, { "epoch": 1.154039391188702, "grad_norm": 0.2926887571811676, "learning_rate": 4.529094563334197e-06, "loss": 0.0132, "step": 136670 }, { "epoch": 1.154123831035866, "grad_norm": 0.2591746747493744, "learning_rate": 4.5283609619681085e-06, "loss": 0.0106, "step": 136680 }, { "epoch": 1.1542082708830297, "grad_norm": 0.015622786246240139, "learning_rate": 4.527627370845922e-06, "loss": 0.0043, "step": 136690 }, { "epoch": 1.1542927107301937, "grad_norm": 0.2217039316892624, "learning_rate": 4.526893789983574e-06, "loss": 0.0077, "step": 136700 }, { "epoch": 1.1543771505773575, "grad_norm": 0.20588770508766174, "learning_rate": 4.526160219396993e-06, "loss": 0.0051, "step": 136710 }, { "epoch": 1.1544615904245212, "grad_norm": 0.6192346215248108, "learning_rate": 4.525426659102117e-06, "loss": 0.0078, "step": 136720 }, { "epoch": 1.1545460302716852, "grad_norm": 0.3729313015937805, "learning_rate": 4.524693109114876e-06, "loss": 0.0099, "step": 136730 }, { "epoch": 1.154630470118849, "grad_norm": 0.2771168053150177, "learning_rate": 4.523959569451204e-06, "loss": 0.0105, "step": 136740 }, { "epoch": 1.154714909966013, "grad_norm": 0.07693695276975632, "learning_rate": 4.523226040127031e-06, "loss": 0.0087, "step": 136750 }, { "epoch": 1.1547993498131768, "grad_norm": 0.45006194710731506, "learning_rate": 4.5224925211582924e-06, "loss": 0.0049, "step": 136760 }, { "epoch": 1.1548837896603408, "grad_norm": 0.33526214957237244, "learning_rate": 4.5217590125609175e-06, "loss": 0.0051, "step": 136770 }, { "epoch": 1.1549682295075046, "grad_norm": 0.3885957598686218, "learning_rate": 4.521025514350841e-06, "loss": 0.0045, "step": 136780 }, { "epoch": 1.1550526693546685, "grad_norm": 0.010395783931016922, "learning_rate": 4.52029202654399e-06, "loss": 0.0051, "step": 136790 }, { "epoch": 1.1551371092018323, "grad_norm": 0.1097307801246643, "learning_rate": 4.519558549156297e-06, "loss": 0.0042, "step": 136800 }, { "epoch": 1.155221549048996, "grad_norm": 0.19876165688037872, "learning_rate": 4.518825082203696e-06, "loss": 0.0093, "step": 136810 }, { "epoch": 1.15530598889616, "grad_norm": 0.021996237337589264, "learning_rate": 4.518091625702114e-06, "loss": 0.009, "step": 136820 }, { "epoch": 1.155390428743324, "grad_norm": 0.3825439214706421, "learning_rate": 4.517358179667485e-06, "loss": 0.0059, "step": 136830 }, { "epoch": 1.1554748685904879, "grad_norm": 0.19930268824100494, "learning_rate": 4.516624744115734e-06, "loss": 0.0084, "step": 136840 }, { "epoch": 1.1555593084376516, "grad_norm": 0.05592802166938782, "learning_rate": 4.515891319062799e-06, "loss": 0.008, "step": 136850 }, { "epoch": 1.1556437482848156, "grad_norm": 0.30207180976867676, "learning_rate": 4.5151579045246016e-06, "loss": 0.0035, "step": 136860 }, { "epoch": 1.1557281881319794, "grad_norm": 0.4400841295719147, "learning_rate": 4.514424500517078e-06, "loss": 0.0067, "step": 136870 }, { "epoch": 1.1558126279791434, "grad_norm": 0.05330781266093254, "learning_rate": 4.513691107056152e-06, "loss": 0.0059, "step": 136880 }, { "epoch": 1.1558970678263072, "grad_norm": 0.46905577182769775, "learning_rate": 4.5129577241577575e-06, "loss": 0.0074, "step": 136890 }, { "epoch": 1.1559815076734712, "grad_norm": 0.2521097958087921, "learning_rate": 4.512224351837823e-06, "loss": 0.0067, "step": 136900 }, { "epoch": 1.156065947520635, "grad_norm": 0.5607438087463379, "learning_rate": 4.511490990112271e-06, "loss": 0.0134, "step": 136910 }, { "epoch": 1.156150387367799, "grad_norm": 0.09723649173974991, "learning_rate": 4.510757638997039e-06, "loss": 0.0064, "step": 136920 }, { "epoch": 1.1562348272149627, "grad_norm": 0.24765314161777496, "learning_rate": 4.5100242985080485e-06, "loss": 0.0093, "step": 136930 }, { "epoch": 1.1563192670621265, "grad_norm": 0.759789228439331, "learning_rate": 4.509290968661231e-06, "loss": 0.012, "step": 136940 }, { "epoch": 1.1564037069092905, "grad_norm": 0.02679145336151123, "learning_rate": 4.508557649472511e-06, "loss": 0.0136, "step": 136950 }, { "epoch": 1.1564881467564543, "grad_norm": 0.39680254459381104, "learning_rate": 4.50782434095782e-06, "loss": 0.0141, "step": 136960 }, { "epoch": 1.1565725866036183, "grad_norm": 0.27161121368408203, "learning_rate": 4.507091043133083e-06, "loss": 0.0108, "step": 136970 }, { "epoch": 1.156657026450782, "grad_norm": 0.7485761046409607, "learning_rate": 4.506357756014228e-06, "loss": 0.0089, "step": 136980 }, { "epoch": 1.156741466297946, "grad_norm": 0.37139803171157837, "learning_rate": 4.50562447961718e-06, "loss": 0.0099, "step": 136990 }, { "epoch": 1.1568259061451098, "grad_norm": 0.7559016942977905, "learning_rate": 4.504891213957868e-06, "loss": 0.0054, "step": 137000 }, { "epoch": 1.1569103459922738, "grad_norm": 0.15675626695156097, "learning_rate": 4.5041579590522175e-06, "loss": 0.0068, "step": 137010 }, { "epoch": 1.1569947858394376, "grad_norm": 0.42887771129608154, "learning_rate": 4.503424714916154e-06, "loss": 0.0093, "step": 137020 }, { "epoch": 1.1570792256866016, "grad_norm": 0.560896635055542, "learning_rate": 4.502691481565605e-06, "loss": 0.0118, "step": 137030 }, { "epoch": 1.1571636655337654, "grad_norm": 0.2586234211921692, "learning_rate": 4.501958259016493e-06, "loss": 0.0083, "step": 137040 }, { "epoch": 1.1572481053809294, "grad_norm": 0.2774287760257721, "learning_rate": 4.501225047284748e-06, "loss": 0.0105, "step": 137050 }, { "epoch": 1.1573325452280931, "grad_norm": 0.16841216385364532, "learning_rate": 4.500491846386291e-06, "loss": 0.0082, "step": 137060 }, { "epoch": 1.157416985075257, "grad_norm": 0.153018057346344, "learning_rate": 4.49975865633705e-06, "loss": 0.0069, "step": 137070 }, { "epoch": 1.157501424922421, "grad_norm": 0.16355997323989868, "learning_rate": 4.499025477152947e-06, "loss": 0.0062, "step": 137080 }, { "epoch": 1.1575858647695847, "grad_norm": 0.09796591103076935, "learning_rate": 4.498292308849909e-06, "loss": 0.004, "step": 137090 }, { "epoch": 1.1576703046167487, "grad_norm": 0.11257591098546982, "learning_rate": 4.4975591514438585e-06, "loss": 0.0081, "step": 137100 }, { "epoch": 1.1577547444639125, "grad_norm": 1.1317989826202393, "learning_rate": 4.496826004950721e-06, "loss": 0.01, "step": 137110 }, { "epoch": 1.1578391843110765, "grad_norm": 0.4332915246486664, "learning_rate": 4.496092869386419e-06, "loss": 0.007, "step": 137120 }, { "epoch": 1.1579236241582402, "grad_norm": 0.7238273620605469, "learning_rate": 4.495359744766878e-06, "loss": 0.0081, "step": 137130 }, { "epoch": 1.1580080640054042, "grad_norm": 0.17556315660476685, "learning_rate": 4.4946266311080194e-06, "loss": 0.0107, "step": 137140 }, { "epoch": 1.158092503852568, "grad_norm": 0.16020342707633972, "learning_rate": 4.493893528425766e-06, "loss": 0.0159, "step": 137150 }, { "epoch": 1.1581769436997318, "grad_norm": 0.10021945834159851, "learning_rate": 4.4931604367360425e-06, "loss": 0.0048, "step": 137160 }, { "epoch": 1.1582613835468958, "grad_norm": 1.1894636154174805, "learning_rate": 4.492427356054768e-06, "loss": 0.0118, "step": 137170 }, { "epoch": 1.1583458233940598, "grad_norm": 0.21667741239070892, "learning_rate": 4.49169428639787e-06, "loss": 0.0077, "step": 137180 }, { "epoch": 1.1584302632412236, "grad_norm": 0.3552192747592926, "learning_rate": 4.490961227781267e-06, "loss": 0.007, "step": 137190 }, { "epoch": 1.1585147030883873, "grad_norm": 0.23422648012638092, "learning_rate": 4.4902281802208824e-06, "loss": 0.0073, "step": 137200 }, { "epoch": 1.1585991429355513, "grad_norm": 0.2896592319011688, "learning_rate": 4.489495143732636e-06, "loss": 0.007, "step": 137210 }, { "epoch": 1.158683582782715, "grad_norm": 0.7144678831100464, "learning_rate": 4.488762118332453e-06, "loss": 0.0103, "step": 137220 }, { "epoch": 1.158768022629879, "grad_norm": 0.17622770369052887, "learning_rate": 4.4880291040362485e-06, "loss": 0.0081, "step": 137230 }, { "epoch": 1.1588524624770429, "grad_norm": 0.3388465344905853, "learning_rate": 4.487296100859949e-06, "loss": 0.0092, "step": 137240 }, { "epoch": 1.1589369023242069, "grad_norm": 0.2185746282339096, "learning_rate": 4.486563108819474e-06, "loss": 0.0087, "step": 137250 }, { "epoch": 1.1590213421713706, "grad_norm": 0.2422713190317154, "learning_rate": 4.485830127930741e-06, "loss": 0.0056, "step": 137260 }, { "epoch": 1.1591057820185346, "grad_norm": 0.5208991169929504, "learning_rate": 4.485097158209674e-06, "loss": 0.01, "step": 137270 }, { "epoch": 1.1591902218656984, "grad_norm": 0.4706215560436249, "learning_rate": 4.4843641996721885e-06, "loss": 0.0081, "step": 137280 }, { "epoch": 1.1592746617128622, "grad_norm": 0.3356597125530243, "learning_rate": 4.4836312523342096e-06, "loss": 0.0079, "step": 137290 }, { "epoch": 1.1593591015600262, "grad_norm": 0.353985071182251, "learning_rate": 4.482898316211653e-06, "loss": 0.0068, "step": 137300 }, { "epoch": 1.15944354140719, "grad_norm": 0.2903587818145752, "learning_rate": 4.48216539132044e-06, "loss": 0.0096, "step": 137310 }, { "epoch": 1.159527981254354, "grad_norm": 0.07639311999082565, "learning_rate": 4.481432477676486e-06, "loss": 0.0049, "step": 137320 }, { "epoch": 1.1596124211015177, "grad_norm": 0.02981121838092804, "learning_rate": 4.480699575295715e-06, "loss": 0.0092, "step": 137330 }, { "epoch": 1.1596968609486817, "grad_norm": 0.5787899494171143, "learning_rate": 4.479966684194042e-06, "loss": 0.0071, "step": 137340 }, { "epoch": 1.1597813007958455, "grad_norm": 0.2972547709941864, "learning_rate": 4.479233804387387e-06, "loss": 0.0069, "step": 137350 }, { "epoch": 1.1598657406430095, "grad_norm": 0.28671956062316895, "learning_rate": 4.478500935891664e-06, "loss": 0.0108, "step": 137360 }, { "epoch": 1.1599501804901733, "grad_norm": 0.07529478520154953, "learning_rate": 4.477768078722797e-06, "loss": 0.0071, "step": 137370 }, { "epoch": 1.1600346203373373, "grad_norm": 0.19014905393123627, "learning_rate": 4.4770352328967e-06, "loss": 0.0054, "step": 137380 }, { "epoch": 1.160119060184501, "grad_norm": 0.33302587270736694, "learning_rate": 4.476302398429289e-06, "loss": 0.0077, "step": 137390 }, { "epoch": 1.160203500031665, "grad_norm": 0.5185018181800842, "learning_rate": 4.475569575336484e-06, "loss": 0.0071, "step": 137400 }, { "epoch": 1.1602879398788288, "grad_norm": 0.16639937460422516, "learning_rate": 4.4748367636341985e-06, "loss": 0.0082, "step": 137410 }, { "epoch": 1.1603723797259926, "grad_norm": 0.301781564950943, "learning_rate": 4.4741039633383525e-06, "loss": 0.0074, "step": 137420 }, { "epoch": 1.1604568195731566, "grad_norm": 0.35743460059165955, "learning_rate": 4.47337117446486e-06, "loss": 0.0091, "step": 137430 }, { "epoch": 1.1605412594203204, "grad_norm": 0.08572059869766235, "learning_rate": 4.472638397029639e-06, "loss": 0.0083, "step": 137440 }, { "epoch": 1.1606256992674844, "grad_norm": 0.41385990381240845, "learning_rate": 4.471905631048601e-06, "loss": 0.0102, "step": 137450 }, { "epoch": 1.1607101391146482, "grad_norm": 0.22368738055229187, "learning_rate": 4.471172876537667e-06, "loss": 0.008, "step": 137460 }, { "epoch": 1.1607945789618122, "grad_norm": 0.2947443425655365, "learning_rate": 4.470440133512749e-06, "loss": 0.0109, "step": 137470 }, { "epoch": 1.160879018808976, "grad_norm": 0.8297967314720154, "learning_rate": 4.469707401989764e-06, "loss": 0.0075, "step": 137480 }, { "epoch": 1.16096345865614, "grad_norm": 1.2866986989974976, "learning_rate": 4.468974681984625e-06, "loss": 0.0177, "step": 137490 }, { "epoch": 1.1610478985033037, "grad_norm": 0.09508950263261795, "learning_rate": 4.468241973513245e-06, "loss": 0.0076, "step": 137500 }, { "epoch": 1.1611323383504675, "grad_norm": 0.4111981689929962, "learning_rate": 4.4675092765915434e-06, "loss": 0.0222, "step": 137510 }, { "epoch": 1.1612167781976315, "grad_norm": 0.2440875917673111, "learning_rate": 4.466776591235428e-06, "loss": 0.0081, "step": 137520 }, { "epoch": 1.1613012180447952, "grad_norm": 0.31642377376556396, "learning_rate": 4.466043917460818e-06, "loss": 0.0084, "step": 137530 }, { "epoch": 1.1613856578919592, "grad_norm": 0.4330582022666931, "learning_rate": 4.465311255283624e-06, "loss": 0.0068, "step": 137540 }, { "epoch": 1.161470097739123, "grad_norm": 0.22003266215324402, "learning_rate": 4.464578604719761e-06, "loss": 0.0075, "step": 137550 }, { "epoch": 1.161554537586287, "grad_norm": 0.26423680782318115, "learning_rate": 4.463845965785139e-06, "loss": 0.0097, "step": 137560 }, { "epoch": 1.1616389774334508, "grad_norm": 0.6603739857673645, "learning_rate": 4.463113338495674e-06, "loss": 0.0142, "step": 137570 }, { "epoch": 1.1617234172806148, "grad_norm": 0.21031667292118073, "learning_rate": 4.462380722867277e-06, "loss": 0.0105, "step": 137580 }, { "epoch": 1.1618078571277786, "grad_norm": 0.1427680104970932, "learning_rate": 4.461648118915862e-06, "loss": 0.0068, "step": 137590 }, { "epoch": 1.1618922969749426, "grad_norm": 0.1538996547460556, "learning_rate": 4.460915526657339e-06, "loss": 0.0101, "step": 137600 }, { "epoch": 1.1619767368221063, "grad_norm": 0.07887087017297745, "learning_rate": 4.460182946107617e-06, "loss": 0.0051, "step": 137610 }, { "epoch": 1.1620611766692703, "grad_norm": 0.04092736914753914, "learning_rate": 4.459450377282614e-06, "loss": 0.0064, "step": 137620 }, { "epoch": 1.162145616516434, "grad_norm": 0.42223235964775085, "learning_rate": 4.458717820198237e-06, "loss": 0.0135, "step": 137630 }, { "epoch": 1.1622300563635979, "grad_norm": 0.2441377490758896, "learning_rate": 4.457985274870399e-06, "loss": 0.0084, "step": 137640 }, { "epoch": 1.1623144962107619, "grad_norm": 0.3004937469959259, "learning_rate": 4.457252741315008e-06, "loss": 0.0051, "step": 137650 }, { "epoch": 1.1623989360579257, "grad_norm": 0.3776302933692932, "learning_rate": 4.456520219547978e-06, "loss": 0.0091, "step": 137660 }, { "epoch": 1.1624833759050897, "grad_norm": 0.6758772730827332, "learning_rate": 4.455787709585217e-06, "loss": 0.0098, "step": 137670 }, { "epoch": 1.1625678157522534, "grad_norm": 0.3659604787826538, "learning_rate": 4.455055211442635e-06, "loss": 0.0066, "step": 137680 }, { "epoch": 1.1626522555994174, "grad_norm": 0.26951396465301514, "learning_rate": 4.454322725136141e-06, "loss": 0.0048, "step": 137690 }, { "epoch": 1.1627366954465812, "grad_norm": 0.2552066743373871, "learning_rate": 4.453590250681648e-06, "loss": 0.0077, "step": 137700 }, { "epoch": 1.1628211352937452, "grad_norm": 0.20842938125133514, "learning_rate": 4.452857788095062e-06, "loss": 0.0085, "step": 137710 }, { "epoch": 1.162905575140909, "grad_norm": 0.0642385482788086, "learning_rate": 4.452125337392293e-06, "loss": 0.007, "step": 137720 }, { "epoch": 1.1629900149880728, "grad_norm": 0.1250116527080536, "learning_rate": 4.451392898589249e-06, "loss": 0.0085, "step": 137730 }, { "epoch": 1.1630744548352367, "grad_norm": 0.004398406483232975, "learning_rate": 4.450660471701838e-06, "loss": 0.007, "step": 137740 }, { "epoch": 1.1631588946824007, "grad_norm": 0.5019441246986389, "learning_rate": 4.4499280567459706e-06, "loss": 0.0114, "step": 137750 }, { "epoch": 1.1632433345295645, "grad_norm": 0.20670635998249054, "learning_rate": 4.449195653737551e-06, "loss": 0.008, "step": 137760 }, { "epoch": 1.1633277743767283, "grad_norm": 0.2672169804573059, "learning_rate": 4.4484632626924916e-06, "loss": 0.0059, "step": 137770 }, { "epoch": 1.1634122142238923, "grad_norm": 0.6315407752990723, "learning_rate": 4.4477308836266935e-06, "loss": 0.018, "step": 137780 }, { "epoch": 1.163496654071056, "grad_norm": 0.3393717408180237, "learning_rate": 4.44699851655607e-06, "loss": 0.0072, "step": 137790 }, { "epoch": 1.16358109391822, "grad_norm": 0.2950482964515686, "learning_rate": 4.446266161496525e-06, "loss": 0.0128, "step": 137800 }, { "epoch": 1.1636655337653838, "grad_norm": 0.6123856902122498, "learning_rate": 4.445533818463966e-06, "loss": 0.0129, "step": 137810 }, { "epoch": 1.1637499736125478, "grad_norm": 0.3755723834037781, "learning_rate": 4.444801487474297e-06, "loss": 0.0104, "step": 137820 }, { "epoch": 1.1638344134597116, "grad_norm": 0.9031380414962769, "learning_rate": 4.444069168543429e-06, "loss": 0.0087, "step": 137830 }, { "epoch": 1.1639188533068756, "grad_norm": 0.02685615047812462, "learning_rate": 4.443336861687263e-06, "loss": 0.0049, "step": 137840 }, { "epoch": 1.1640032931540394, "grad_norm": 0.12206505239009857, "learning_rate": 4.442604566921705e-06, "loss": 0.0109, "step": 137850 }, { "epoch": 1.1640877330012032, "grad_norm": 0.5306680798530579, "learning_rate": 4.441872284262664e-06, "loss": 0.0097, "step": 137860 }, { "epoch": 1.1641721728483672, "grad_norm": 0.5473002195358276, "learning_rate": 4.441140013726041e-06, "loss": 0.0107, "step": 137870 }, { "epoch": 1.164256612695531, "grad_norm": 0.2146078199148178, "learning_rate": 4.4404077553277435e-06, "loss": 0.0075, "step": 137880 }, { "epoch": 1.164341052542695, "grad_norm": 0.011438485234975815, "learning_rate": 4.439675509083673e-06, "loss": 0.0039, "step": 137890 }, { "epoch": 1.1644254923898587, "grad_norm": 0.20898941159248352, "learning_rate": 4.438943275009737e-06, "loss": 0.0033, "step": 137900 }, { "epoch": 1.1645099322370227, "grad_norm": 0.008652852848172188, "learning_rate": 4.438211053121838e-06, "loss": 0.005, "step": 137910 }, { "epoch": 1.1645943720841865, "grad_norm": 0.4423467218875885, "learning_rate": 4.4374788434358805e-06, "loss": 0.0101, "step": 137920 }, { "epoch": 1.1646788119313505, "grad_norm": 0.19056423008441925, "learning_rate": 4.436746645967765e-06, "loss": 0.0094, "step": 137930 }, { "epoch": 1.1647632517785143, "grad_norm": 0.15018802881240845, "learning_rate": 4.436014460733399e-06, "loss": 0.0053, "step": 137940 }, { "epoch": 1.1648476916256782, "grad_norm": 0.0009486086200922728, "learning_rate": 4.435282287748683e-06, "loss": 0.0073, "step": 137950 }, { "epoch": 1.164932131472842, "grad_norm": 0.4955478012561798, "learning_rate": 4.43455012702952e-06, "loss": 0.008, "step": 137960 }, { "epoch": 1.165016571320006, "grad_norm": 0.5917415022850037, "learning_rate": 4.433817978591813e-06, "loss": 0.0086, "step": 137970 }, { "epoch": 1.1651010111671698, "grad_norm": 0.11024247854948044, "learning_rate": 4.433085842451461e-06, "loss": 0.0041, "step": 137980 }, { "epoch": 1.1651854510143336, "grad_norm": 0.4746638238430023, "learning_rate": 4.43235371862437e-06, "loss": 0.0066, "step": 137990 }, { "epoch": 1.1652698908614976, "grad_norm": 0.1576668620109558, "learning_rate": 4.43162160712644e-06, "loss": 0.0071, "step": 138000 }, { "epoch": 1.1653543307086613, "grad_norm": 0.21472512185573578, "learning_rate": 4.430889507973572e-06, "loss": 0.0035, "step": 138010 }, { "epoch": 1.1654387705558253, "grad_norm": 0.28060561418533325, "learning_rate": 4.4301574211816665e-06, "loss": 0.0041, "step": 138020 }, { "epoch": 1.1655232104029891, "grad_norm": 0.348512202501297, "learning_rate": 4.429425346766626e-06, "loss": 0.0077, "step": 138030 }, { "epoch": 1.1656076502501531, "grad_norm": 0.47039130330085754, "learning_rate": 4.42869328474435e-06, "loss": 0.0069, "step": 138040 }, { "epoch": 1.165692090097317, "grad_norm": 0.5419498682022095, "learning_rate": 4.42796123513074e-06, "loss": 0.0085, "step": 138050 }, { "epoch": 1.1657765299444809, "grad_norm": 0.23999257385730743, "learning_rate": 4.427229197941691e-06, "loss": 0.0059, "step": 138060 }, { "epoch": 1.1658609697916447, "grad_norm": 0.1671970635652542, "learning_rate": 4.42649717319311e-06, "loss": 0.0116, "step": 138070 }, { "epoch": 1.1659454096388084, "grad_norm": 0.10762035846710205, "learning_rate": 4.425765160900893e-06, "loss": 0.0087, "step": 138080 }, { "epoch": 1.1660298494859724, "grad_norm": 0.35931235551834106, "learning_rate": 4.425033161080937e-06, "loss": 0.0081, "step": 138090 }, { "epoch": 1.1661142893331362, "grad_norm": 0.07580570876598358, "learning_rate": 4.424301173749145e-06, "loss": 0.0048, "step": 138100 }, { "epoch": 1.1661987291803002, "grad_norm": 1.018625020980835, "learning_rate": 4.423569198921411e-06, "loss": 0.0072, "step": 138110 }, { "epoch": 1.166283169027464, "grad_norm": 0.1566244512796402, "learning_rate": 4.422837236613638e-06, "loss": 0.0054, "step": 138120 }, { "epoch": 1.166367608874628, "grad_norm": 0.17097975313663483, "learning_rate": 4.42210528684172e-06, "loss": 0.0075, "step": 138130 }, { "epoch": 1.1664520487217918, "grad_norm": 0.6605791449546814, "learning_rate": 4.421373349621558e-06, "loss": 0.0076, "step": 138140 }, { "epoch": 1.1665364885689558, "grad_norm": 0.33720025420188904, "learning_rate": 4.420641424969048e-06, "loss": 0.0098, "step": 138150 }, { "epoch": 1.1666209284161195, "grad_norm": 0.19298696517944336, "learning_rate": 4.419909512900088e-06, "loss": 0.0037, "step": 138160 }, { "epoch": 1.1667053682632835, "grad_norm": 0.08023479580879211, "learning_rate": 4.419177613430573e-06, "loss": 0.0075, "step": 138170 }, { "epoch": 1.1667898081104473, "grad_norm": 0.0880340188741684, "learning_rate": 4.418445726576403e-06, "loss": 0.0063, "step": 138180 }, { "epoch": 1.1668742479576113, "grad_norm": 0.6429287195205688, "learning_rate": 4.417713852353472e-06, "loss": 0.01, "step": 138190 }, { "epoch": 1.166958687804775, "grad_norm": 0.7033601403236389, "learning_rate": 4.416981990777677e-06, "loss": 0.0068, "step": 138200 }, { "epoch": 1.1670431276519388, "grad_norm": 0.013888672925531864, "learning_rate": 4.4162501418649145e-06, "loss": 0.0056, "step": 138210 }, { "epoch": 1.1671275674991028, "grad_norm": 0.22414907813072205, "learning_rate": 4.415518305631077e-06, "loss": 0.0089, "step": 138220 }, { "epoch": 1.1672120073462666, "grad_norm": 0.4413866400718689, "learning_rate": 4.4147864820920635e-06, "loss": 0.0126, "step": 138230 }, { "epoch": 1.1672964471934306, "grad_norm": 0.13645042479038239, "learning_rate": 4.414054671263768e-06, "loss": 0.0118, "step": 138240 }, { "epoch": 1.1673808870405944, "grad_norm": 0.2654018700122833, "learning_rate": 4.413322873162086e-06, "loss": 0.0073, "step": 138250 }, { "epoch": 1.1674653268877584, "grad_norm": 0.7518860697746277, "learning_rate": 4.412591087802907e-06, "loss": 0.0144, "step": 138260 }, { "epoch": 1.1675497667349222, "grad_norm": 0.3887731432914734, "learning_rate": 4.4118593152021325e-06, "loss": 0.0053, "step": 138270 }, { "epoch": 1.1676342065820862, "grad_norm": 0.2469434142112732, "learning_rate": 4.4111275553756524e-06, "loss": 0.0101, "step": 138280 }, { "epoch": 1.16771864642925, "grad_norm": 0.2985697388648987, "learning_rate": 4.410395808339361e-06, "loss": 0.0069, "step": 138290 }, { "epoch": 1.1678030862764137, "grad_norm": 0.11423476785421371, "learning_rate": 4.4096640741091514e-06, "loss": 0.0042, "step": 138300 }, { "epoch": 1.1678875261235777, "grad_norm": 0.38918831944465637, "learning_rate": 4.408932352700918e-06, "loss": 0.0108, "step": 138310 }, { "epoch": 1.1679719659707417, "grad_norm": 0.49861985445022583, "learning_rate": 4.408200644130554e-06, "loss": 0.0094, "step": 138320 }, { "epoch": 1.1680564058179055, "grad_norm": 0.4317034184932709, "learning_rate": 4.4074689484139486e-06, "loss": 0.0075, "step": 138330 }, { "epoch": 1.1681408456650693, "grad_norm": 0.41905415058135986, "learning_rate": 4.406737265566997e-06, "loss": 0.0087, "step": 138340 }, { "epoch": 1.1682252855122333, "grad_norm": 0.04847581684589386, "learning_rate": 4.4060055956055895e-06, "loss": 0.0078, "step": 138350 }, { "epoch": 1.168309725359397, "grad_norm": 0.47801342606544495, "learning_rate": 4.40527393854562e-06, "loss": 0.0117, "step": 138360 }, { "epoch": 1.168394165206561, "grad_norm": 0.2772231101989746, "learning_rate": 4.404542294402978e-06, "loss": 0.0128, "step": 138370 }, { "epoch": 1.1684786050537248, "grad_norm": 0.21132831275463104, "learning_rate": 4.403810663193556e-06, "loss": 0.0092, "step": 138380 }, { "epoch": 1.1685630449008888, "grad_norm": 0.21546712517738342, "learning_rate": 4.4030790449332415e-06, "loss": 0.0088, "step": 138390 }, { "epoch": 1.1686474847480526, "grad_norm": 0.12923389673233032, "learning_rate": 4.402347439637931e-06, "loss": 0.0051, "step": 138400 }, { "epoch": 1.1687319245952166, "grad_norm": 0.7785712480545044, "learning_rate": 4.401615847323511e-06, "loss": 0.0146, "step": 138410 }, { "epoch": 1.1688163644423804, "grad_norm": 0.16262301802635193, "learning_rate": 4.400884268005872e-06, "loss": 0.0047, "step": 138420 }, { "epoch": 1.1689008042895441, "grad_norm": 0.29100242257118225, "learning_rate": 4.400152701700905e-06, "loss": 0.0069, "step": 138430 }, { "epoch": 1.1689852441367081, "grad_norm": 0.28310346603393555, "learning_rate": 4.399421148424496e-06, "loss": 0.013, "step": 138440 }, { "epoch": 1.169069683983872, "grad_norm": 0.22192877531051636, "learning_rate": 4.398689608192539e-06, "loss": 0.01, "step": 138450 }, { "epoch": 1.169154123831036, "grad_norm": 0.8277173638343811, "learning_rate": 4.3979580810209174e-06, "loss": 0.0083, "step": 138460 }, { "epoch": 1.1692385636781997, "grad_norm": 0.716555118560791, "learning_rate": 4.397226566925526e-06, "loss": 0.0101, "step": 138470 }, { "epoch": 1.1693230035253637, "grad_norm": 0.013694738037884235, "learning_rate": 4.396495065922248e-06, "loss": 0.007, "step": 138480 }, { "epoch": 1.1694074433725274, "grad_norm": 0.170553520321846, "learning_rate": 4.395763578026976e-06, "loss": 0.0064, "step": 138490 }, { "epoch": 1.1694918832196914, "grad_norm": 0.07914374023675919, "learning_rate": 4.395032103255592e-06, "loss": 0.0045, "step": 138500 }, { "epoch": 1.1695763230668552, "grad_norm": 1.3916211128234863, "learning_rate": 4.3943006416239885e-06, "loss": 0.0066, "step": 138510 }, { "epoch": 1.1696607629140192, "grad_norm": 0.17743854224681854, "learning_rate": 4.3935691931480505e-06, "loss": 0.0086, "step": 138520 }, { "epoch": 1.169745202761183, "grad_norm": 0.048102084547281265, "learning_rate": 4.392837757843666e-06, "loss": 0.0072, "step": 138530 }, { "epoch": 1.169829642608347, "grad_norm": 0.158777117729187, "learning_rate": 4.39210633572672e-06, "loss": 0.0071, "step": 138540 }, { "epoch": 1.1699140824555108, "grad_norm": 0.030777765437960625, "learning_rate": 4.391374926813101e-06, "loss": 0.0054, "step": 138550 }, { "epoch": 1.1699985223026745, "grad_norm": 0.3893420398235321, "learning_rate": 4.390643531118694e-06, "loss": 0.0102, "step": 138560 }, { "epoch": 1.1700829621498385, "grad_norm": 0.39994004368782043, "learning_rate": 4.3899121486593835e-06, "loss": 0.009, "step": 138570 }, { "epoch": 1.1701674019970023, "grad_norm": 0.004539805464446545, "learning_rate": 4.389180779451057e-06, "loss": 0.0056, "step": 138580 }, { "epoch": 1.1702518418441663, "grad_norm": 0.4587760269641876, "learning_rate": 4.388449423509597e-06, "loss": 0.0113, "step": 138590 }, { "epoch": 1.17033628169133, "grad_norm": 0.165756955742836, "learning_rate": 4.387718080850893e-06, "loss": 0.0115, "step": 138600 }, { "epoch": 1.170420721538494, "grad_norm": 1.189927101135254, "learning_rate": 4.386986751490825e-06, "loss": 0.0082, "step": 138610 }, { "epoch": 1.1705051613856579, "grad_norm": 0.4676445424556732, "learning_rate": 4.386255435445279e-06, "loss": 0.0063, "step": 138620 }, { "epoch": 1.1705896012328219, "grad_norm": 0.2528250813484192, "learning_rate": 4.385524132730139e-06, "loss": 0.0062, "step": 138630 }, { "epoch": 1.1706740410799856, "grad_norm": 0.15545140206813812, "learning_rate": 4.38479284336129e-06, "loss": 0.004, "step": 138640 }, { "epoch": 1.1707584809271494, "grad_norm": 0.24495980143547058, "learning_rate": 4.384061567354614e-06, "loss": 0.0037, "step": 138650 }, { "epoch": 1.1708429207743134, "grad_norm": 1.2929487228393555, "learning_rate": 4.383330304725995e-06, "loss": 0.0092, "step": 138660 }, { "epoch": 1.1709273606214774, "grad_norm": 0.24236707389354706, "learning_rate": 4.382599055491315e-06, "loss": 0.0045, "step": 138670 }, { "epoch": 1.1710118004686412, "grad_norm": 0.6077931523323059, "learning_rate": 4.381867819666456e-06, "loss": 0.0086, "step": 138680 }, { "epoch": 1.171096240315805, "grad_norm": 0.5130186080932617, "learning_rate": 4.381136597267303e-06, "loss": 0.0076, "step": 138690 }, { "epoch": 1.171180680162969, "grad_norm": 0.11450174450874329, "learning_rate": 4.3804053883097355e-06, "loss": 0.0067, "step": 138700 }, { "epoch": 1.1712651200101327, "grad_norm": 0.11320505291223526, "learning_rate": 4.379674192809637e-06, "loss": 0.0042, "step": 138710 }, { "epoch": 1.1713495598572967, "grad_norm": 0.2916601598262787, "learning_rate": 4.378943010782885e-06, "loss": 0.0097, "step": 138720 }, { "epoch": 1.1714339997044605, "grad_norm": 0.23026780784130096, "learning_rate": 4.378211842245367e-06, "loss": 0.0068, "step": 138730 }, { "epoch": 1.1715184395516245, "grad_norm": 0.07509538531303406, "learning_rate": 4.377480687212958e-06, "loss": 0.0071, "step": 138740 }, { "epoch": 1.1716028793987883, "grad_norm": 0.07893861085176468, "learning_rate": 4.376749545701543e-06, "loss": 0.0155, "step": 138750 }, { "epoch": 1.1716873192459523, "grad_norm": 0.6247223019599915, "learning_rate": 4.376018417726998e-06, "loss": 0.0113, "step": 138760 }, { "epoch": 1.171771759093116, "grad_norm": 0.16138947010040283, "learning_rate": 4.375287303305207e-06, "loss": 0.0053, "step": 138770 }, { "epoch": 1.1718561989402798, "grad_norm": 0.1764615923166275, "learning_rate": 4.374556202452045e-06, "loss": 0.0111, "step": 138780 }, { "epoch": 1.1719406387874438, "grad_norm": 0.0664556622505188, "learning_rate": 4.373825115183396e-06, "loss": 0.0089, "step": 138790 }, { "epoch": 1.1720250786346076, "grad_norm": 0.057171765714883804, "learning_rate": 4.373094041515138e-06, "loss": 0.0071, "step": 138800 }, { "epoch": 1.1721095184817716, "grad_norm": 0.09959927201271057, "learning_rate": 4.372362981463148e-06, "loss": 0.0113, "step": 138810 }, { "epoch": 1.1721939583289354, "grad_norm": 0.40818050503730774, "learning_rate": 4.371631935043306e-06, "loss": 0.01, "step": 138820 }, { "epoch": 1.1722783981760994, "grad_norm": 0.07106117904186249, "learning_rate": 4.370900902271488e-06, "loss": 0.0078, "step": 138830 }, { "epoch": 1.1723628380232631, "grad_norm": 0.27463582158088684, "learning_rate": 4.370169883163575e-06, "loss": 0.0056, "step": 138840 }, { "epoch": 1.1724472778704271, "grad_norm": 0.001890599844045937, "learning_rate": 4.369438877735443e-06, "loss": 0.0093, "step": 138850 }, { "epoch": 1.172531717717591, "grad_norm": 0.19190025329589844, "learning_rate": 4.368707886002968e-06, "loss": 0.0034, "step": 138860 }, { "epoch": 1.172616157564755, "grad_norm": 0.2257002741098404, "learning_rate": 4.3679769079820285e-06, "loss": 0.0057, "step": 138870 }, { "epoch": 1.1727005974119187, "grad_norm": 0.451761394739151, "learning_rate": 4.367245943688503e-06, "loss": 0.0074, "step": 138880 }, { "epoch": 1.1727850372590827, "grad_norm": 0.1280869096517563, "learning_rate": 4.366514993138265e-06, "loss": 0.0098, "step": 138890 }, { "epoch": 1.1728694771062464, "grad_norm": 0.22902415692806244, "learning_rate": 4.365784056347191e-06, "loss": 0.0057, "step": 138900 }, { "epoch": 1.1729539169534102, "grad_norm": 0.1283951997756958, "learning_rate": 4.365053133331159e-06, "loss": 0.0066, "step": 138910 }, { "epoch": 1.1730383568005742, "grad_norm": 0.47888875007629395, "learning_rate": 4.364322224106039e-06, "loss": 0.01, "step": 138920 }, { "epoch": 1.173122796647738, "grad_norm": 0.1804221272468567, "learning_rate": 4.3635913286877144e-06, "loss": 0.0087, "step": 138930 }, { "epoch": 1.173207236494902, "grad_norm": 0.3411199748516083, "learning_rate": 4.362860447092053e-06, "loss": 0.0065, "step": 138940 }, { "epoch": 1.1732916763420658, "grad_norm": 0.17063163220882416, "learning_rate": 4.362129579334934e-06, "loss": 0.008, "step": 138950 }, { "epoch": 1.1733761161892298, "grad_norm": 0.38324597477912903, "learning_rate": 4.361398725432227e-06, "loss": 0.01, "step": 138960 }, { "epoch": 1.1734605560363935, "grad_norm": 0.4520765244960785, "learning_rate": 4.360667885399811e-06, "loss": 0.0122, "step": 138970 }, { "epoch": 1.1735449958835575, "grad_norm": 0.23721376061439514, "learning_rate": 4.359937059253557e-06, "loss": 0.0083, "step": 138980 }, { "epoch": 1.1736294357307213, "grad_norm": 0.08314450085163116, "learning_rate": 4.35920624700934e-06, "loss": 0.0094, "step": 138990 }, { "epoch": 1.173713875577885, "grad_norm": 0.21484091877937317, "learning_rate": 4.358475448683029e-06, "loss": 0.0062, "step": 139000 }, { "epoch": 1.173798315425049, "grad_norm": 0.035671383142471313, "learning_rate": 4.357744664290504e-06, "loss": 0.0039, "step": 139010 }, { "epoch": 1.1738827552722129, "grad_norm": 0.47907841205596924, "learning_rate": 4.357013893847632e-06, "loss": 0.0045, "step": 139020 }, { "epoch": 1.1739671951193769, "grad_norm": 0.15964020788669586, "learning_rate": 4.356283137370284e-06, "loss": 0.0107, "step": 139030 }, { "epoch": 1.1740516349665406, "grad_norm": 0.09344083815813065, "learning_rate": 4.355552394874337e-06, "loss": 0.0103, "step": 139040 }, { "epoch": 1.1741360748137046, "grad_norm": 0.23744910955429077, "learning_rate": 4.354821666375658e-06, "loss": 0.0057, "step": 139050 }, { "epoch": 1.1742205146608684, "grad_norm": 0.052534304559230804, "learning_rate": 4.354090951890122e-06, "loss": 0.0085, "step": 139060 }, { "epoch": 1.1743049545080324, "grad_norm": 0.02501586079597473, "learning_rate": 4.353360251433596e-06, "loss": 0.0128, "step": 139070 }, { "epoch": 1.1743893943551962, "grad_norm": 0.3235504925251007, "learning_rate": 4.352629565021954e-06, "loss": 0.0051, "step": 139080 }, { "epoch": 1.1744738342023602, "grad_norm": 0.537291407585144, "learning_rate": 4.351898892671066e-06, "loss": 0.0095, "step": 139090 }, { "epoch": 1.174558274049524, "grad_norm": 0.24444296956062317, "learning_rate": 4.351168234396801e-06, "loss": 0.0062, "step": 139100 }, { "epoch": 1.174642713896688, "grad_norm": 0.19831357896327972, "learning_rate": 4.350437590215027e-06, "loss": 0.0097, "step": 139110 }, { "epoch": 1.1747271537438517, "grad_norm": 0.10682510584592819, "learning_rate": 4.349706960141618e-06, "loss": 0.0109, "step": 139120 }, { "epoch": 1.1748115935910155, "grad_norm": 0.5104036927223206, "learning_rate": 4.348976344192439e-06, "loss": 0.0108, "step": 139130 }, { "epoch": 1.1748960334381795, "grad_norm": 0.3905731737613678, "learning_rate": 4.3482457423833614e-06, "loss": 0.007, "step": 139140 }, { "epoch": 1.1749804732853433, "grad_norm": 0.25588348507881165, "learning_rate": 4.347515154730253e-06, "loss": 0.0085, "step": 139150 }, { "epoch": 1.1750649131325073, "grad_norm": 0.3098452091217041, "learning_rate": 4.34678458124898e-06, "loss": 0.0065, "step": 139160 }, { "epoch": 1.175149352979671, "grad_norm": 0.1514754742383957, "learning_rate": 4.346054021955413e-06, "loss": 0.0049, "step": 139170 }, { "epoch": 1.175233792826835, "grad_norm": 0.34352418780326843, "learning_rate": 4.345323476865418e-06, "loss": 0.01, "step": 139180 }, { "epoch": 1.1753182326739988, "grad_norm": 0.4127058982849121, "learning_rate": 4.344592945994865e-06, "loss": 0.0093, "step": 139190 }, { "epoch": 1.1754026725211628, "grad_norm": 0.43740415573120117, "learning_rate": 4.3438624293596155e-06, "loss": 0.009, "step": 139200 }, { "epoch": 1.1754871123683266, "grad_norm": 0.041490115225315094, "learning_rate": 4.343131926975542e-06, "loss": 0.0023, "step": 139210 }, { "epoch": 1.1755715522154904, "grad_norm": 0.31011471152305603, "learning_rate": 4.342401438858508e-06, "loss": 0.0143, "step": 139220 }, { "epoch": 1.1756559920626544, "grad_norm": 0.14424429833889008, "learning_rate": 4.34167096502438e-06, "loss": 0.0087, "step": 139230 }, { "epoch": 1.1757404319098184, "grad_norm": 0.217978835105896, "learning_rate": 4.340940505489023e-06, "loss": 0.0075, "step": 139240 }, { "epoch": 1.1758248717569821, "grad_norm": 0.18408681452274323, "learning_rate": 4.3402100602683045e-06, "loss": 0.0066, "step": 139250 }, { "epoch": 1.175909311604146, "grad_norm": 0.2790525555610657, "learning_rate": 4.339479629378089e-06, "loss": 0.0089, "step": 139260 }, { "epoch": 1.17599375145131, "grad_norm": 0.34350523352622986, "learning_rate": 4.3387492128342375e-06, "loss": 0.0064, "step": 139270 }, { "epoch": 1.1760781912984737, "grad_norm": 0.48052385449409485, "learning_rate": 4.338018810652621e-06, "loss": 0.0067, "step": 139280 }, { "epoch": 1.1761626311456377, "grad_norm": 0.07670409232378006, "learning_rate": 4.337288422849097e-06, "loss": 0.0071, "step": 139290 }, { "epoch": 1.1762470709928015, "grad_norm": 0.03871883079409599, "learning_rate": 4.336558049439536e-06, "loss": 0.0097, "step": 139300 }, { "epoch": 1.1763315108399655, "grad_norm": 0.38037753105163574, "learning_rate": 4.335827690439795e-06, "loss": 0.0072, "step": 139310 }, { "epoch": 1.1764159506871292, "grad_norm": 0.6583961248397827, "learning_rate": 4.335097345865743e-06, "loss": 0.0095, "step": 139320 }, { "epoch": 1.1765003905342932, "grad_norm": 0.38563239574432373, "learning_rate": 4.334367015733238e-06, "loss": 0.005, "step": 139330 }, { "epoch": 1.176584830381457, "grad_norm": 0.10365361720323563, "learning_rate": 4.333636700058148e-06, "loss": 0.0076, "step": 139340 }, { "epoch": 1.1766692702286208, "grad_norm": 0.31150469183921814, "learning_rate": 4.332906398856329e-06, "loss": 0.0093, "step": 139350 }, { "epoch": 1.1767537100757848, "grad_norm": 0.18026070296764374, "learning_rate": 4.332176112143649e-06, "loss": 0.0072, "step": 139360 }, { "epoch": 1.1768381499229486, "grad_norm": 0.49347177147865295, "learning_rate": 4.331445839935965e-06, "loss": 0.0092, "step": 139370 }, { "epoch": 1.1769225897701125, "grad_norm": 0.4264275133609772, "learning_rate": 4.330715582249143e-06, "loss": 0.008, "step": 139380 }, { "epoch": 1.1770070296172763, "grad_norm": 0.21647106111049652, "learning_rate": 4.32998533909904e-06, "loss": 0.0063, "step": 139390 }, { "epoch": 1.1770914694644403, "grad_norm": 0.014996851794421673, "learning_rate": 4.3292551105015166e-06, "loss": 0.0043, "step": 139400 }, { "epoch": 1.177175909311604, "grad_norm": 0.3100457191467285, "learning_rate": 4.328524896472437e-06, "loss": 0.0102, "step": 139410 }, { "epoch": 1.177260349158768, "grad_norm": 0.18588581681251526, "learning_rate": 4.327794697027657e-06, "loss": 0.0049, "step": 139420 }, { "epoch": 1.1773447890059319, "grad_norm": 0.23367995023727417, "learning_rate": 4.327064512183041e-06, "loss": 0.0072, "step": 139430 }, { "epoch": 1.1774292288530959, "grad_norm": 0.3404010534286499, "learning_rate": 4.326334341954442e-06, "loss": 0.0078, "step": 139440 }, { "epoch": 1.1775136687002596, "grad_norm": 0.35164394974708557, "learning_rate": 4.325604186357727e-06, "loss": 0.0075, "step": 139450 }, { "epoch": 1.1775981085474236, "grad_norm": 0.21572239696979523, "learning_rate": 4.324874045408749e-06, "loss": 0.0046, "step": 139460 }, { "epoch": 1.1776825483945874, "grad_norm": 0.4186396896839142, "learning_rate": 4.324143919123369e-06, "loss": 0.0055, "step": 139470 }, { "epoch": 1.1777669882417512, "grad_norm": 0.08717388659715652, "learning_rate": 4.323413807517443e-06, "loss": 0.0085, "step": 139480 }, { "epoch": 1.1778514280889152, "grad_norm": 0.43219703435897827, "learning_rate": 4.322683710606833e-06, "loss": 0.0058, "step": 139490 }, { "epoch": 1.177935867936079, "grad_norm": 0.20560690760612488, "learning_rate": 4.321953628407393e-06, "loss": 0.0027, "step": 139500 }, { "epoch": 1.178020307783243, "grad_norm": 0.06918516010046005, "learning_rate": 4.3212235609349805e-06, "loss": 0.007, "step": 139510 }, { "epoch": 1.1781047476304067, "grad_norm": 0.19593121111392975, "learning_rate": 4.320493508205455e-06, "loss": 0.0089, "step": 139520 }, { "epoch": 1.1781891874775707, "grad_norm": 0.26965245604515076, "learning_rate": 4.319763470234668e-06, "loss": 0.0065, "step": 139530 }, { "epoch": 1.1782736273247345, "grad_norm": 0.37195059657096863, "learning_rate": 4.319033447038482e-06, "loss": 0.0071, "step": 139540 }, { "epoch": 1.1783580671718985, "grad_norm": 0.541100800037384, "learning_rate": 4.318303438632749e-06, "loss": 0.005, "step": 139550 }, { "epoch": 1.1784425070190623, "grad_norm": 0.2182125747203827, "learning_rate": 4.317573445033326e-06, "loss": 0.0118, "step": 139560 }, { "epoch": 1.178526946866226, "grad_norm": 0.3809795081615448, "learning_rate": 4.3168434662560655e-06, "loss": 0.0078, "step": 139570 }, { "epoch": 1.17861138671339, "grad_norm": 0.283310204744339, "learning_rate": 4.316113502316828e-06, "loss": 0.0089, "step": 139580 }, { "epoch": 1.178695826560554, "grad_norm": 0.3926633596420288, "learning_rate": 4.315383553231464e-06, "loss": 0.008, "step": 139590 }, { "epoch": 1.1787802664077178, "grad_norm": 0.15193364024162292, "learning_rate": 4.314653619015829e-06, "loss": 0.0039, "step": 139600 }, { "epoch": 1.1788647062548816, "grad_norm": 0.23659807443618774, "learning_rate": 4.313923699685778e-06, "loss": 0.0109, "step": 139610 }, { "epoch": 1.1789491461020456, "grad_norm": 0.2181069403886795, "learning_rate": 4.313193795257163e-06, "loss": 0.0056, "step": 139620 }, { "epoch": 1.1790335859492094, "grad_norm": 0.41058990359306335, "learning_rate": 4.3124639057458375e-06, "loss": 0.0069, "step": 139630 }, { "epoch": 1.1791180257963734, "grad_norm": 0.07665719836950302, "learning_rate": 4.311734031167653e-06, "loss": 0.0113, "step": 139640 }, { "epoch": 1.1792024656435371, "grad_norm": 0.12238003313541412, "learning_rate": 4.311004171538468e-06, "loss": 0.0091, "step": 139650 }, { "epoch": 1.1792869054907011, "grad_norm": 0.18124882876873016, "learning_rate": 4.310274326874129e-06, "loss": 0.006, "step": 139660 }, { "epoch": 1.179371345337865, "grad_norm": 0.012302830815315247, "learning_rate": 4.309544497190492e-06, "loss": 0.0085, "step": 139670 }, { "epoch": 1.179455785185029, "grad_norm": 0.5812049508094788, "learning_rate": 4.308814682503404e-06, "loss": 0.0121, "step": 139680 }, { "epoch": 1.1795402250321927, "grad_norm": 0.543362021446228, "learning_rate": 4.308084882828722e-06, "loss": 0.0067, "step": 139690 }, { "epoch": 1.1796246648793565, "grad_norm": 0.23166567087173462, "learning_rate": 4.307355098182294e-06, "loss": 0.0043, "step": 139700 }, { "epoch": 1.1797091047265205, "grad_norm": 0.04195424169301987, "learning_rate": 4.306625328579972e-06, "loss": 0.0043, "step": 139710 }, { "epoch": 1.1797935445736842, "grad_norm": 0.25753673911094666, "learning_rate": 4.305895574037604e-06, "loss": 0.007, "step": 139720 }, { "epoch": 1.1798779844208482, "grad_norm": 0.5782794952392578, "learning_rate": 4.305165834571044e-06, "loss": 0.0067, "step": 139730 }, { "epoch": 1.179962424268012, "grad_norm": 0.33020076155662537, "learning_rate": 4.304436110196139e-06, "loss": 0.0104, "step": 139740 }, { "epoch": 1.180046864115176, "grad_norm": 0.40883973240852356, "learning_rate": 4.303706400928739e-06, "loss": 0.0111, "step": 139750 }, { "epoch": 1.1801313039623398, "grad_norm": 0.3847912847995758, "learning_rate": 4.302976706784693e-06, "loss": 0.0096, "step": 139760 }, { "epoch": 1.1802157438095038, "grad_norm": 0.26968494057655334, "learning_rate": 4.3022470277798485e-06, "loss": 0.0133, "step": 139770 }, { "epoch": 1.1803001836566676, "grad_norm": 0.12949968874454498, "learning_rate": 4.301517363930059e-06, "loss": 0.0114, "step": 139780 }, { "epoch": 1.1803846235038316, "grad_norm": 0.2944618761539459, "learning_rate": 4.3007877152511664e-06, "loss": 0.0032, "step": 139790 }, { "epoch": 1.1804690633509953, "grad_norm": 0.03662150353193283, "learning_rate": 4.300058081759024e-06, "loss": 0.0067, "step": 139800 }, { "epoch": 1.1805535031981593, "grad_norm": 0.26202306151390076, "learning_rate": 4.299328463469473e-06, "loss": 0.0088, "step": 139810 }, { "epoch": 1.180637943045323, "grad_norm": 0.2788044512271881, "learning_rate": 4.2985988603983675e-06, "loss": 0.0079, "step": 139820 }, { "epoch": 1.1807223828924869, "grad_norm": 0.4322844445705414, "learning_rate": 4.297869272561549e-06, "loss": 0.0083, "step": 139830 }, { "epoch": 1.1808068227396509, "grad_norm": 0.1248508170247078, "learning_rate": 4.297139699974868e-06, "loss": 0.005, "step": 139840 }, { "epoch": 1.1808912625868146, "grad_norm": 0.16893090307712555, "learning_rate": 4.2964101426541684e-06, "loss": 0.0075, "step": 139850 }, { "epoch": 1.1809757024339786, "grad_norm": 0.07643061876296997, "learning_rate": 4.295680600615294e-06, "loss": 0.0037, "step": 139860 }, { "epoch": 1.1810601422811424, "grad_norm": 0.08965275436639786, "learning_rate": 4.294951073874094e-06, "loss": 0.0062, "step": 139870 }, { "epoch": 1.1811445821283064, "grad_norm": 0.283561646938324, "learning_rate": 4.294221562446412e-06, "loss": 0.0116, "step": 139880 }, { "epoch": 1.1812290219754702, "grad_norm": 0.15583011507987976, "learning_rate": 4.293492066348094e-06, "loss": 0.0094, "step": 139890 }, { "epoch": 1.1813134618226342, "grad_norm": 0.4776056110858917, "learning_rate": 4.292762585594982e-06, "loss": 0.0142, "step": 139900 }, { "epoch": 1.181397901669798, "grad_norm": 0.14174221456050873, "learning_rate": 4.292033120202924e-06, "loss": 0.0082, "step": 139910 }, { "epoch": 1.1814823415169617, "grad_norm": 0.1355617344379425, "learning_rate": 4.291303670187758e-06, "loss": 0.0052, "step": 139920 }, { "epoch": 1.1815667813641257, "grad_norm": 0.20208676159381866, "learning_rate": 4.290574235565334e-06, "loss": 0.0077, "step": 139930 }, { "epoch": 1.1816512212112895, "grad_norm": 0.4328393340110779, "learning_rate": 4.2898448163514915e-06, "loss": 0.0078, "step": 139940 }, { "epoch": 1.1817356610584535, "grad_norm": 0.0011126353638246655, "learning_rate": 4.289115412562075e-06, "loss": 0.0065, "step": 139950 }, { "epoch": 1.1818201009056173, "grad_norm": 0.6072114706039429, "learning_rate": 4.288386024212924e-06, "loss": 0.0115, "step": 139960 }, { "epoch": 1.1819045407527813, "grad_norm": 0.4862394630908966, "learning_rate": 4.2876566513198846e-06, "loss": 0.0103, "step": 139970 }, { "epoch": 1.181988980599945, "grad_norm": 0.22334231436252594, "learning_rate": 4.286927293898796e-06, "loss": 0.0036, "step": 139980 }, { "epoch": 1.182073420447109, "grad_norm": 0.14521126449108124, "learning_rate": 4.2861979519655e-06, "loss": 0.0106, "step": 139990 }, { "epoch": 1.1821578602942728, "grad_norm": 0.2949386239051819, "learning_rate": 4.28546862553584e-06, "loss": 0.0077, "step": 140000 }, { "epoch": 1.1822423001414368, "grad_norm": 0.3401281535625458, "learning_rate": 4.2847393146256515e-06, "loss": 0.0059, "step": 140010 }, { "epoch": 1.1823267399886006, "grad_norm": 0.26730042695999146, "learning_rate": 4.284010019250782e-06, "loss": 0.0048, "step": 140020 }, { "epoch": 1.1824111798357646, "grad_norm": 0.2576341927051544, "learning_rate": 4.283280739427066e-06, "loss": 0.0062, "step": 140030 }, { "epoch": 1.1824956196829284, "grad_norm": 0.09234385937452316, "learning_rate": 4.2825514751703465e-06, "loss": 0.0059, "step": 140040 }, { "epoch": 1.1825800595300922, "grad_norm": 0.229140043258667, "learning_rate": 4.281822226496461e-06, "loss": 0.0063, "step": 140050 }, { "epoch": 1.1826644993772562, "grad_norm": 0.09545709937810898, "learning_rate": 4.2810929934212505e-06, "loss": 0.0067, "step": 140060 }, { "epoch": 1.18274893922442, "grad_norm": 0.07756086438894272, "learning_rate": 4.2803637759605524e-06, "loss": 0.0085, "step": 140070 }, { "epoch": 1.182833379071584, "grad_norm": 0.28185462951660156, "learning_rate": 4.279634574130207e-06, "loss": 0.0052, "step": 140080 }, { "epoch": 1.1829178189187477, "grad_norm": 0.6614686250686646, "learning_rate": 4.278905387946051e-06, "loss": 0.0099, "step": 140090 }, { "epoch": 1.1830022587659117, "grad_norm": 0.49663305282592773, "learning_rate": 4.278176217423919e-06, "loss": 0.0091, "step": 140100 }, { "epoch": 1.1830866986130755, "grad_norm": 0.3022804260253906, "learning_rate": 4.277447062579654e-06, "loss": 0.0142, "step": 140110 }, { "epoch": 1.1831711384602395, "grad_norm": 0.2172609567642212, "learning_rate": 4.276717923429091e-06, "loss": 0.0045, "step": 140120 }, { "epoch": 1.1832555783074032, "grad_norm": 0.18357449769973755, "learning_rate": 4.275988799988066e-06, "loss": 0.0082, "step": 140130 }, { "epoch": 1.183340018154567, "grad_norm": 0.2855425775051117, "learning_rate": 4.2752596922724145e-06, "loss": 0.0086, "step": 140140 }, { "epoch": 1.183424458001731, "grad_norm": 0.41868218779563904, "learning_rate": 4.274530600297976e-06, "loss": 0.0068, "step": 140150 }, { "epoch": 1.183508897848895, "grad_norm": 0.2648508548736572, "learning_rate": 4.273801524080584e-06, "loss": 0.0069, "step": 140160 }, { "epoch": 1.1835933376960588, "grad_norm": 0.49686628580093384, "learning_rate": 4.273072463636074e-06, "loss": 0.0094, "step": 140170 }, { "epoch": 1.1836777775432226, "grad_norm": 0.14459732174873352, "learning_rate": 4.272343418980279e-06, "loss": 0.0084, "step": 140180 }, { "epoch": 1.1837622173903866, "grad_norm": 0.0331445038318634, "learning_rate": 4.2716143901290385e-06, "loss": 0.006, "step": 140190 }, { "epoch": 1.1838466572375503, "grad_norm": 0.06450329720973969, "learning_rate": 4.270885377098183e-06, "loss": 0.0064, "step": 140200 }, { "epoch": 1.1839310970847143, "grad_norm": 0.08166619390249252, "learning_rate": 4.270156379903548e-06, "loss": 0.0089, "step": 140210 }, { "epoch": 1.184015536931878, "grad_norm": 0.3392353951931, "learning_rate": 4.269427398560968e-06, "loss": 0.0074, "step": 140220 }, { "epoch": 1.184099976779042, "grad_norm": 0.02268475852906704, "learning_rate": 4.268698433086274e-06, "loss": 0.0046, "step": 140230 }, { "epoch": 1.1841844166262059, "grad_norm": 0.20423851907253265, "learning_rate": 4.267969483495301e-06, "loss": 0.0135, "step": 140240 }, { "epoch": 1.1842688564733699, "grad_norm": 0.19487255811691284, "learning_rate": 4.267240549803878e-06, "loss": 0.0072, "step": 140250 }, { "epoch": 1.1843532963205337, "grad_norm": 0.4051656424999237, "learning_rate": 4.266511632027843e-06, "loss": 0.0122, "step": 140260 }, { "epoch": 1.1844377361676974, "grad_norm": 0.3116395175457001, "learning_rate": 4.265782730183024e-06, "loss": 0.0086, "step": 140270 }, { "epoch": 1.1845221760148614, "grad_norm": 0.18782147765159607, "learning_rate": 4.265053844285254e-06, "loss": 0.0074, "step": 140280 }, { "epoch": 1.1846066158620252, "grad_norm": 0.6348242163658142, "learning_rate": 4.264324974350363e-06, "loss": 0.0064, "step": 140290 }, { "epoch": 1.1846910557091892, "grad_norm": 0.16992533206939697, "learning_rate": 4.263596120394184e-06, "loss": 0.0054, "step": 140300 }, { "epoch": 1.184775495556353, "grad_norm": 0.014465474523603916, "learning_rate": 4.262867282432546e-06, "loss": 0.003, "step": 140310 }, { "epoch": 1.184859935403517, "grad_norm": 0.5777321457862854, "learning_rate": 4.26213846048128e-06, "loss": 0.0083, "step": 140320 }, { "epoch": 1.1849443752506807, "grad_norm": 0.11373412609100342, "learning_rate": 4.261409654556215e-06, "loss": 0.0073, "step": 140330 }, { "epoch": 1.1850288150978447, "grad_norm": 0.049480315297842026, "learning_rate": 4.260680864673179e-06, "loss": 0.0081, "step": 140340 }, { "epoch": 1.1851132549450085, "grad_norm": 0.1719455122947693, "learning_rate": 4.2599520908480065e-06, "loss": 0.004, "step": 140350 }, { "epoch": 1.1851976947921725, "grad_norm": 0.17760112881660461, "learning_rate": 4.259223333096521e-06, "loss": 0.0057, "step": 140360 }, { "epoch": 1.1852821346393363, "grad_norm": 0.31944605708122253, "learning_rate": 4.258494591434554e-06, "loss": 0.0069, "step": 140370 }, { "epoch": 1.1853665744865003, "grad_norm": 0.48694169521331787, "learning_rate": 4.2577658658779305e-06, "loss": 0.0092, "step": 140380 }, { "epoch": 1.185451014333664, "grad_norm": 0.4638471007347107, "learning_rate": 4.257037156442482e-06, "loss": 0.0042, "step": 140390 }, { "epoch": 1.1855354541808278, "grad_norm": 0.4785228669643402, "learning_rate": 4.256308463144033e-06, "loss": 0.0083, "step": 140400 }, { "epoch": 1.1856198940279918, "grad_norm": 0.15599721670150757, "learning_rate": 4.255579785998414e-06, "loss": 0.0078, "step": 140410 }, { "epoch": 1.1857043338751556, "grad_norm": 0.3820992112159729, "learning_rate": 4.254851125021446e-06, "loss": 0.011, "step": 140420 }, { "epoch": 1.1857887737223196, "grad_norm": 0.3077884614467621, "learning_rate": 4.254122480228962e-06, "loss": 0.0066, "step": 140430 }, { "epoch": 1.1858732135694834, "grad_norm": 0.29507267475128174, "learning_rate": 4.253393851636785e-06, "loss": 0.0137, "step": 140440 }, { "epoch": 1.1859576534166474, "grad_norm": 0.06167364493012428, "learning_rate": 4.2526652392607385e-06, "loss": 0.008, "step": 140450 }, { "epoch": 1.1860420932638112, "grad_norm": 0.6640868782997131, "learning_rate": 4.251936643116652e-06, "loss": 0.0076, "step": 140460 }, { "epoch": 1.1861265331109752, "grad_norm": 0.5611622333526611, "learning_rate": 4.251208063220346e-06, "loss": 0.0061, "step": 140470 }, { "epoch": 1.186210972958139, "grad_norm": 0.09606392681598663, "learning_rate": 4.250479499587648e-06, "loss": 0.0104, "step": 140480 }, { "epoch": 1.1862954128053027, "grad_norm": 0.38335877656936646, "learning_rate": 4.249750952234383e-06, "loss": 0.0076, "step": 140490 }, { "epoch": 1.1863798526524667, "grad_norm": 0.4432814419269562, "learning_rate": 4.249022421176373e-06, "loss": 0.0093, "step": 140500 }, { "epoch": 1.1864642924996305, "grad_norm": 0.3118348717689514, "learning_rate": 4.248293906429441e-06, "loss": 0.017, "step": 140510 }, { "epoch": 1.1865487323467945, "grad_norm": 0.5701588988304138, "learning_rate": 4.247565408009414e-06, "loss": 0.0053, "step": 140520 }, { "epoch": 1.1866331721939583, "grad_norm": 0.25880247354507446, "learning_rate": 4.246836925932109e-06, "loss": 0.0055, "step": 140530 }, { "epoch": 1.1867176120411222, "grad_norm": 0.3024354577064514, "learning_rate": 4.246108460213353e-06, "loss": 0.0098, "step": 140540 }, { "epoch": 1.186802051888286, "grad_norm": 0.08210074156522751, "learning_rate": 4.245380010868967e-06, "loss": 0.0083, "step": 140550 }, { "epoch": 1.18688649173545, "grad_norm": 0.2780698537826538, "learning_rate": 4.244651577914772e-06, "loss": 0.0086, "step": 140560 }, { "epoch": 1.1869709315826138, "grad_norm": 0.08149790018796921, "learning_rate": 4.243923161366591e-06, "loss": 0.0085, "step": 140570 }, { "epoch": 1.1870553714297778, "grad_norm": 0.07999689877033234, "learning_rate": 4.2431947612402425e-06, "loss": 0.0079, "step": 140580 }, { "epoch": 1.1871398112769416, "grad_norm": 0.05468558147549629, "learning_rate": 4.2424663775515505e-06, "loss": 0.0097, "step": 140590 }, { "epoch": 1.1872242511241056, "grad_norm": 0.019840434193611145, "learning_rate": 4.241738010316331e-06, "loss": 0.0078, "step": 140600 }, { "epoch": 1.1873086909712693, "grad_norm": 0.13523776829242706, "learning_rate": 4.241009659550409e-06, "loss": 0.0062, "step": 140610 }, { "epoch": 1.1873931308184331, "grad_norm": 0.19157199561595917, "learning_rate": 4.240281325269599e-06, "loss": 0.007, "step": 140620 }, { "epoch": 1.1874775706655971, "grad_norm": 0.3691718280315399, "learning_rate": 4.239553007489726e-06, "loss": 0.0077, "step": 140630 }, { "epoch": 1.187562010512761, "grad_norm": 0.3785167634487152, "learning_rate": 4.238824706226604e-06, "loss": 0.0051, "step": 140640 }, { "epoch": 1.1876464503599249, "grad_norm": 0.38959556818008423, "learning_rate": 4.2380964214960544e-06, "loss": 0.0032, "step": 140650 }, { "epoch": 1.1877308902070887, "grad_norm": 0.47722429037094116, "learning_rate": 4.237368153313892e-06, "loss": 0.0043, "step": 140660 }, { "epoch": 1.1878153300542527, "grad_norm": 0.23715850710868835, "learning_rate": 4.23663990169594e-06, "loss": 0.0107, "step": 140670 }, { "epoch": 1.1878997699014164, "grad_norm": 0.09348619729280472, "learning_rate": 4.235911666658013e-06, "loss": 0.0073, "step": 140680 }, { "epoch": 1.1879842097485804, "grad_norm": 0.49212703108787537, "learning_rate": 4.235183448215926e-06, "loss": 0.0149, "step": 140690 }, { "epoch": 1.1880686495957442, "grad_norm": 0.07036910206079483, "learning_rate": 4.2344552463854995e-06, "loss": 0.0086, "step": 140700 }, { "epoch": 1.188153089442908, "grad_norm": 0.339315265417099, "learning_rate": 4.233727061182545e-06, "loss": 0.0072, "step": 140710 }, { "epoch": 1.188237529290072, "grad_norm": 0.40973401069641113, "learning_rate": 4.232998892622886e-06, "loss": 0.0072, "step": 140720 }, { "epoch": 1.188321969137236, "grad_norm": 0.40495264530181885, "learning_rate": 4.2322707407223305e-06, "loss": 0.0089, "step": 140730 }, { "epoch": 1.1884064089843998, "grad_norm": 0.39165589213371277, "learning_rate": 4.2315426054967e-06, "loss": 0.0161, "step": 140740 }, { "epoch": 1.1884908488315635, "grad_norm": 0.14102624356746674, "learning_rate": 4.230814486961803e-06, "loss": 0.0075, "step": 140750 }, { "epoch": 1.1885752886787275, "grad_norm": 0.23537959158420563, "learning_rate": 4.230086385133461e-06, "loss": 0.0089, "step": 140760 }, { "epoch": 1.1886597285258913, "grad_norm": 0.4659899175167084, "learning_rate": 4.229358300027483e-06, "loss": 0.0105, "step": 140770 }, { "epoch": 1.1887441683730553, "grad_norm": 0.4475019574165344, "learning_rate": 4.228630231659687e-06, "loss": 0.0063, "step": 140780 }, { "epoch": 1.188828608220219, "grad_norm": 0.07217983156442642, "learning_rate": 4.227902180045881e-06, "loss": 0.0069, "step": 140790 }, { "epoch": 1.188913048067383, "grad_norm": 0.31998613476753235, "learning_rate": 4.227174145201884e-06, "loss": 0.0052, "step": 140800 }, { "epoch": 1.1889974879145468, "grad_norm": 0.39261141419410706, "learning_rate": 4.226446127143508e-06, "loss": 0.0122, "step": 140810 }, { "epoch": 1.1890819277617108, "grad_norm": 0.5902763605117798, "learning_rate": 4.225718125886559e-06, "loss": 0.0114, "step": 140820 }, { "epoch": 1.1891663676088746, "grad_norm": 0.19075150787830353, "learning_rate": 4.224990141446857e-06, "loss": 0.0119, "step": 140830 }, { "epoch": 1.1892508074560384, "grad_norm": 0.2812758684158325, "learning_rate": 4.224262173840209e-06, "loss": 0.0093, "step": 140840 }, { "epoch": 1.1893352473032024, "grad_norm": 0.11519408226013184, "learning_rate": 4.223534223082429e-06, "loss": 0.007, "step": 140850 }, { "epoch": 1.1894196871503662, "grad_norm": 0.005146298091858625, "learning_rate": 4.222806289189323e-06, "loss": 0.0039, "step": 140860 }, { "epoch": 1.1895041269975302, "grad_norm": 0.09979015588760376, "learning_rate": 4.222078372176708e-06, "loss": 0.0076, "step": 140870 }, { "epoch": 1.189588566844694, "grad_norm": 0.21330326795578003, "learning_rate": 4.221350472060391e-06, "loss": 0.0047, "step": 140880 }, { "epoch": 1.189673006691858, "grad_norm": 0.4372977316379547, "learning_rate": 4.220622588856183e-06, "loss": 0.0043, "step": 140890 }, { "epoch": 1.1897574465390217, "grad_norm": 0.14502118527889252, "learning_rate": 4.219894722579889e-06, "loss": 0.0074, "step": 140900 }, { "epoch": 1.1898418863861857, "grad_norm": 0.3144741356372833, "learning_rate": 4.219166873247326e-06, "loss": 0.0074, "step": 140910 }, { "epoch": 1.1899263262333495, "grad_norm": 0.1554013043642044, "learning_rate": 4.218439040874297e-06, "loss": 0.0064, "step": 140920 }, { "epoch": 1.1900107660805135, "grad_norm": 0.3022477328777313, "learning_rate": 4.217711225476612e-06, "loss": 0.0063, "step": 140930 }, { "epoch": 1.1900952059276773, "grad_norm": 0.2921132743358612, "learning_rate": 4.216983427070079e-06, "loss": 0.009, "step": 140940 }, { "epoch": 1.1901796457748413, "grad_norm": 0.27424225211143494, "learning_rate": 4.216255645670504e-06, "loss": 0.0077, "step": 140950 }, { "epoch": 1.190264085622005, "grad_norm": 0.06122283637523651, "learning_rate": 4.215527881293697e-06, "loss": 0.0062, "step": 140960 }, { "epoch": 1.1903485254691688, "grad_norm": 0.4552318751811981, "learning_rate": 4.2148001339554636e-06, "loss": 0.011, "step": 140970 }, { "epoch": 1.1904329653163328, "grad_norm": 0.16483564674854279, "learning_rate": 4.214072403671611e-06, "loss": 0.0044, "step": 140980 }, { "epoch": 1.1905174051634966, "grad_norm": 0.041857246309518814, "learning_rate": 4.2133446904579425e-06, "loss": 0.0099, "step": 140990 }, { "epoch": 1.1906018450106606, "grad_norm": 0.8026799559593201, "learning_rate": 4.212616994330269e-06, "loss": 0.0091, "step": 141000 }, { "epoch": 1.1906862848578244, "grad_norm": 0.5732076168060303, "learning_rate": 4.211889315304391e-06, "loss": 0.0116, "step": 141010 }, { "epoch": 1.1907707247049883, "grad_norm": 0.17942571640014648, "learning_rate": 4.211161653396118e-06, "loss": 0.0093, "step": 141020 }, { "epoch": 1.1908551645521521, "grad_norm": 0.14969421923160553, "learning_rate": 4.2104340086212505e-06, "loss": 0.0053, "step": 141030 }, { "epoch": 1.1909396043993161, "grad_norm": 0.1261402815580368, "learning_rate": 4.209706380995593e-06, "loss": 0.0102, "step": 141040 }, { "epoch": 1.19102404424648, "grad_norm": 0.5246052145957947, "learning_rate": 4.2089787705349535e-06, "loss": 0.0091, "step": 141050 }, { "epoch": 1.1911084840936437, "grad_norm": 0.018791895359754562, "learning_rate": 4.208251177255132e-06, "loss": 0.0055, "step": 141060 }, { "epoch": 1.1911929239408077, "grad_norm": 0.3940204083919525, "learning_rate": 4.207523601171934e-06, "loss": 0.0087, "step": 141070 }, { "epoch": 1.1912773637879717, "grad_norm": 0.2989790439605713, "learning_rate": 4.206796042301157e-06, "loss": 0.0081, "step": 141080 }, { "epoch": 1.1913618036351354, "grad_norm": 0.28175976872444153, "learning_rate": 4.206068500658611e-06, "loss": 0.0055, "step": 141090 }, { "epoch": 1.1914462434822992, "grad_norm": 0.2953736186027527, "learning_rate": 4.205340976260094e-06, "loss": 0.0061, "step": 141100 }, { "epoch": 1.1915306833294632, "grad_norm": 0.3240603804588318, "learning_rate": 4.204613469121409e-06, "loss": 0.0058, "step": 141110 }, { "epoch": 1.191615123176627, "grad_norm": 0.10803224891424179, "learning_rate": 4.203885979258353e-06, "loss": 0.007, "step": 141120 }, { "epoch": 1.191699563023791, "grad_norm": 0.42702528834342957, "learning_rate": 4.203158506686734e-06, "loss": 0.0072, "step": 141130 }, { "epoch": 1.1917840028709548, "grad_norm": 0.1116885170340538, "learning_rate": 4.202431051422347e-06, "loss": 0.0066, "step": 141140 }, { "epoch": 1.1918684427181188, "grad_norm": 0.0009462197776883841, "learning_rate": 4.2017036134809955e-06, "loss": 0.005, "step": 141150 }, { "epoch": 1.1919528825652825, "grad_norm": 0.08269380033016205, "learning_rate": 4.2009761928784775e-06, "loss": 0.0128, "step": 141160 }, { "epoch": 1.1920373224124465, "grad_norm": 0.3743862509727478, "learning_rate": 4.200248789630592e-06, "loss": 0.0121, "step": 141170 }, { "epoch": 1.1921217622596103, "grad_norm": 0.2097301036119461, "learning_rate": 4.199521403753141e-06, "loss": 0.0071, "step": 141180 }, { "epoch": 1.192206202106774, "grad_norm": 0.28593501448631287, "learning_rate": 4.198794035261918e-06, "loss": 0.0086, "step": 141190 }, { "epoch": 1.192290641953938, "grad_norm": 0.3915109634399414, "learning_rate": 4.1980666841727265e-06, "loss": 0.0067, "step": 141200 }, { "epoch": 1.1923750818011019, "grad_norm": 0.4553511440753937, "learning_rate": 4.197339350501361e-06, "loss": 0.0123, "step": 141210 }, { "epoch": 1.1924595216482659, "grad_norm": 0.20541054010391235, "learning_rate": 4.196612034263622e-06, "loss": 0.005, "step": 141220 }, { "epoch": 1.1925439614954296, "grad_norm": 0.06531114131212234, "learning_rate": 4.1958847354753025e-06, "loss": 0.0096, "step": 141230 }, { "epoch": 1.1926284013425936, "grad_norm": 0.33165332674980164, "learning_rate": 4.1951574541522035e-06, "loss": 0.0107, "step": 141240 }, { "epoch": 1.1927128411897574, "grad_norm": 0.2595686614513397, "learning_rate": 4.1944301903101196e-06, "loss": 0.0078, "step": 141250 }, { "epoch": 1.1927972810369214, "grad_norm": 0.1853899210691452, "learning_rate": 4.193702943964847e-06, "loss": 0.0056, "step": 141260 }, { "epoch": 1.1928817208840852, "grad_norm": 0.6052203178405762, "learning_rate": 4.192975715132182e-06, "loss": 0.0069, "step": 141270 }, { "epoch": 1.1929661607312492, "grad_norm": 0.24043205380439758, "learning_rate": 4.192248503827917e-06, "loss": 0.0094, "step": 141280 }, { "epoch": 1.193050600578413, "grad_norm": 0.1427563577890396, "learning_rate": 4.19152131006785e-06, "loss": 0.0059, "step": 141290 }, { "epoch": 1.193135040425577, "grad_norm": 0.023331057280302048, "learning_rate": 4.190794133867775e-06, "loss": 0.0086, "step": 141300 }, { "epoch": 1.1932194802727407, "grad_norm": 0.9189060926437378, "learning_rate": 4.190066975243486e-06, "loss": 0.0115, "step": 141310 }, { "epoch": 1.1933039201199045, "grad_norm": 0.04712337628006935, "learning_rate": 4.1893398342107724e-06, "loss": 0.0043, "step": 141320 }, { "epoch": 1.1933883599670685, "grad_norm": 0.2018953412771225, "learning_rate": 4.188612710785436e-06, "loss": 0.004, "step": 141330 }, { "epoch": 1.1934727998142323, "grad_norm": 0.2544173300266266, "learning_rate": 4.187885604983263e-06, "loss": 0.0092, "step": 141340 }, { "epoch": 1.1935572396613963, "grad_norm": 0.33467140793800354, "learning_rate": 4.187158516820049e-06, "loss": 0.0094, "step": 141350 }, { "epoch": 1.19364167950856, "grad_norm": 0.09452242404222488, "learning_rate": 4.186431446311585e-06, "loss": 0.0107, "step": 141360 }, { "epoch": 1.193726119355724, "grad_norm": 0.07285072654485703, "learning_rate": 4.185704393473664e-06, "loss": 0.0054, "step": 141370 }, { "epoch": 1.1938105592028878, "grad_norm": 0.14433850347995758, "learning_rate": 4.184977358322076e-06, "loss": 0.0101, "step": 141380 }, { "epoch": 1.1938949990500518, "grad_norm": 0.27659255266189575, "learning_rate": 4.184250340872614e-06, "loss": 0.0106, "step": 141390 }, { "epoch": 1.1939794388972156, "grad_norm": 0.1561356484889984, "learning_rate": 4.183523341141067e-06, "loss": 0.0052, "step": 141400 }, { "epoch": 1.1940638787443794, "grad_norm": 0.317324161529541, "learning_rate": 4.182796359143224e-06, "loss": 0.0074, "step": 141410 }, { "epoch": 1.1941483185915434, "grad_norm": 0.3910568952560425, "learning_rate": 4.18206939489488e-06, "loss": 0.0075, "step": 141420 }, { "epoch": 1.1942327584387071, "grad_norm": 0.4023078978061676, "learning_rate": 4.181342448411817e-06, "loss": 0.0137, "step": 141430 }, { "epoch": 1.1943171982858711, "grad_norm": 0.21588683128356934, "learning_rate": 4.180615519709832e-06, "loss": 0.0027, "step": 141440 }, { "epoch": 1.194401638133035, "grad_norm": 0.5590547323226929, "learning_rate": 4.179888608804707e-06, "loss": 0.0093, "step": 141450 }, { "epoch": 1.194486077980199, "grad_norm": 0.3161625266075134, "learning_rate": 4.179161715712236e-06, "loss": 0.0058, "step": 141460 }, { "epoch": 1.1945705178273627, "grad_norm": 0.32377293705940247, "learning_rate": 4.178434840448201e-06, "loss": 0.0061, "step": 141470 }, { "epoch": 1.1946549576745267, "grad_norm": 0.20235873758792877, "learning_rate": 4.1777079830283964e-06, "loss": 0.0067, "step": 141480 }, { "epoch": 1.1947393975216904, "grad_norm": 0.3159736692905426, "learning_rate": 4.176981143468604e-06, "loss": 0.0069, "step": 141490 }, { "epoch": 1.1948238373688544, "grad_norm": 0.40952056646347046, "learning_rate": 4.176254321784613e-06, "loss": 0.0079, "step": 141500 }, { "epoch": 1.1949082772160182, "grad_norm": 0.11627548933029175, "learning_rate": 4.175527517992211e-06, "loss": 0.0056, "step": 141510 }, { "epoch": 1.1949927170631822, "grad_norm": 0.28389689326286316, "learning_rate": 4.174800732107178e-06, "loss": 0.01, "step": 141520 }, { "epoch": 1.195077156910346, "grad_norm": 0.6623741388320923, "learning_rate": 4.174073964145308e-06, "loss": 0.0097, "step": 141530 }, { "epoch": 1.1951615967575098, "grad_norm": 0.17759883403778076, "learning_rate": 4.173347214122381e-06, "loss": 0.0064, "step": 141540 }, { "epoch": 1.1952460366046738, "grad_norm": 0.35851287841796875, "learning_rate": 4.172620482054183e-06, "loss": 0.0061, "step": 141550 }, { "epoch": 1.1953304764518375, "grad_norm": 0.2002781629562378, "learning_rate": 4.171893767956497e-06, "loss": 0.0063, "step": 141560 }, { "epoch": 1.1954149162990015, "grad_norm": 0.09917614609003067, "learning_rate": 4.1711670718451115e-06, "loss": 0.006, "step": 141570 }, { "epoch": 1.1954993561461653, "grad_norm": 0.14164145290851593, "learning_rate": 4.170440393735806e-06, "loss": 0.007, "step": 141580 }, { "epoch": 1.1955837959933293, "grad_norm": 0.36153316497802734, "learning_rate": 4.169713733644366e-06, "loss": 0.009, "step": 141590 }, { "epoch": 1.195668235840493, "grad_norm": 0.014890397898852825, "learning_rate": 4.168987091586571e-06, "loss": 0.0024, "step": 141600 }, { "epoch": 1.195752675687657, "grad_norm": 0.21878479421138763, "learning_rate": 4.168260467578209e-06, "loss": 0.0071, "step": 141610 }, { "epoch": 1.1958371155348209, "grad_norm": 0.16788282990455627, "learning_rate": 4.167533861635059e-06, "loss": 0.006, "step": 141620 }, { "epoch": 1.1959215553819846, "grad_norm": 0.23306679725646973, "learning_rate": 4.166807273772901e-06, "loss": 0.0062, "step": 141630 }, { "epoch": 1.1960059952291486, "grad_norm": 0.2719895541667938, "learning_rate": 4.16608070400752e-06, "loss": 0.0075, "step": 141640 }, { "epoch": 1.1960904350763126, "grad_norm": 0.0013191542821004987, "learning_rate": 4.165354152354693e-06, "loss": 0.0068, "step": 141650 }, { "epoch": 1.1961748749234764, "grad_norm": 0.504806637763977, "learning_rate": 4.164627618830206e-06, "loss": 0.0086, "step": 141660 }, { "epoch": 1.1962593147706402, "grad_norm": 0.20916545391082764, "learning_rate": 4.163901103449833e-06, "loss": 0.0055, "step": 141670 }, { "epoch": 1.1963437546178042, "grad_norm": 0.21506761014461517, "learning_rate": 4.1631746062293585e-06, "loss": 0.0059, "step": 141680 }, { "epoch": 1.196428194464968, "grad_norm": 0.648366391658783, "learning_rate": 4.162448127184558e-06, "loss": 0.0106, "step": 141690 }, { "epoch": 1.196512634312132, "grad_norm": 0.6629400253295898, "learning_rate": 4.161721666331215e-06, "loss": 0.0039, "step": 141700 }, { "epoch": 1.1965970741592957, "grad_norm": 0.6907026767730713, "learning_rate": 4.160995223685105e-06, "loss": 0.0065, "step": 141710 }, { "epoch": 1.1966815140064597, "grad_norm": 0.4110182821750641, "learning_rate": 4.1602687992620066e-06, "loss": 0.0058, "step": 141720 }, { "epoch": 1.1967659538536235, "grad_norm": 0.19348371028900146, "learning_rate": 4.159542393077696e-06, "loss": 0.0068, "step": 141730 }, { "epoch": 1.1968503937007875, "grad_norm": 0.2629718482494354, "learning_rate": 4.1588160051479566e-06, "loss": 0.0119, "step": 141740 }, { "epoch": 1.1969348335479513, "grad_norm": 0.4059391915798187, "learning_rate": 4.158089635488559e-06, "loss": 0.0091, "step": 141750 }, { "epoch": 1.197019273395115, "grad_norm": 0.26871395111083984, "learning_rate": 4.15736328411528e-06, "loss": 0.0077, "step": 141760 }, { "epoch": 1.197103713242279, "grad_norm": 0.15173257887363434, "learning_rate": 4.1566369510439e-06, "loss": 0.0096, "step": 141770 }, { "epoch": 1.1971881530894428, "grad_norm": 0.2761040925979614, "learning_rate": 4.155910636290192e-06, "loss": 0.0082, "step": 141780 }, { "epoch": 1.1972725929366068, "grad_norm": 0.12051494419574738, "learning_rate": 4.155184339869933e-06, "loss": 0.0096, "step": 141790 }, { "epoch": 1.1973570327837706, "grad_norm": 0.4523369371891022, "learning_rate": 4.154458061798895e-06, "loss": 0.0099, "step": 141800 }, { "epoch": 1.1974414726309346, "grad_norm": 0.18224214017391205, "learning_rate": 4.1537318020928565e-06, "loss": 0.004, "step": 141810 }, { "epoch": 1.1975259124780984, "grad_norm": 0.40564778447151184, "learning_rate": 4.153005560767589e-06, "loss": 0.0147, "step": 141820 }, { "epoch": 1.1976103523252624, "grad_norm": 0.2262464314699173, "learning_rate": 4.152279337838867e-06, "loss": 0.0089, "step": 141830 }, { "epoch": 1.1976947921724261, "grad_norm": 0.1835230588912964, "learning_rate": 4.151553133322463e-06, "loss": 0.0145, "step": 141840 }, { "epoch": 1.1977792320195901, "grad_norm": 1.1560815572738647, "learning_rate": 4.1508269472341535e-06, "loss": 0.0053, "step": 141850 }, { "epoch": 1.197863671866754, "grad_norm": 0.022113097831606865, "learning_rate": 4.150100779589708e-06, "loss": 0.0036, "step": 141860 }, { "epoch": 1.197948111713918, "grad_norm": 0.09783861041069031, "learning_rate": 4.149374630404899e-06, "loss": 0.0074, "step": 141870 }, { "epoch": 1.1980325515610817, "grad_norm": 0.6471390724182129, "learning_rate": 4.148648499695499e-06, "loss": 0.0086, "step": 141880 }, { "epoch": 1.1981169914082455, "grad_norm": 0.16359776258468628, "learning_rate": 4.147922387477278e-06, "loss": 0.0055, "step": 141890 }, { "epoch": 1.1982014312554095, "grad_norm": 0.033664099872112274, "learning_rate": 4.147196293766009e-06, "loss": 0.008, "step": 141900 }, { "epoch": 1.1982858711025732, "grad_norm": 0.5006899237632751, "learning_rate": 4.1464702185774615e-06, "loss": 0.0101, "step": 141910 }, { "epoch": 1.1983703109497372, "grad_norm": 0.19012649357318878, "learning_rate": 4.145744161927407e-06, "loss": 0.0097, "step": 141920 }, { "epoch": 1.198454750796901, "grad_norm": 0.06874649226665497, "learning_rate": 4.145018123831613e-06, "loss": 0.0091, "step": 141930 }, { "epoch": 1.198539190644065, "grad_norm": 0.022037141025066376, "learning_rate": 4.1442921043058505e-06, "loss": 0.0034, "step": 141940 }, { "epoch": 1.1986236304912288, "grad_norm": 0.36918821930885315, "learning_rate": 4.143566103365888e-06, "loss": 0.0068, "step": 141950 }, { "epoch": 1.1987080703383928, "grad_norm": 0.14980746805667877, "learning_rate": 4.142840121027495e-06, "loss": 0.0097, "step": 141960 }, { "epoch": 1.1987925101855565, "grad_norm": 0.3955517113208771, "learning_rate": 4.142114157306436e-06, "loss": 0.0083, "step": 141970 }, { "epoch": 1.1988769500327203, "grad_norm": 2.6475369930267334, "learning_rate": 4.141388212218485e-06, "loss": 0.0079, "step": 141980 }, { "epoch": 1.1989613898798843, "grad_norm": 0.14854852855205536, "learning_rate": 4.140662285779406e-06, "loss": 0.0109, "step": 141990 }, { "epoch": 1.1990458297270483, "grad_norm": 0.5312284231185913, "learning_rate": 4.139936378004965e-06, "loss": 0.0113, "step": 142000 }, { "epoch": 1.199130269574212, "grad_norm": 0.44609034061431885, "learning_rate": 4.139210488910931e-06, "loss": 0.0101, "step": 142010 }, { "epoch": 1.1992147094213759, "grad_norm": 0.12391684204339981, "learning_rate": 4.138484618513066e-06, "loss": 0.0041, "step": 142020 }, { "epoch": 1.1992991492685399, "grad_norm": 0.0007825143402442336, "learning_rate": 4.137758766827142e-06, "loss": 0.0038, "step": 142030 }, { "epoch": 1.1993835891157036, "grad_norm": 0.12710532546043396, "learning_rate": 4.137032933868917e-06, "loss": 0.0092, "step": 142040 }, { "epoch": 1.1994680289628676, "grad_norm": 0.5592535138130188, "learning_rate": 4.136307119654163e-06, "loss": 0.006, "step": 142050 }, { "epoch": 1.1995524688100314, "grad_norm": 0.14378370344638824, "learning_rate": 4.13558132419864e-06, "loss": 0.006, "step": 142060 }, { "epoch": 1.1996369086571954, "grad_norm": 0.6758620142936707, "learning_rate": 4.134855547518115e-06, "loss": 0.0123, "step": 142070 }, { "epoch": 1.1997213485043592, "grad_norm": 0.35716336965560913, "learning_rate": 4.134129789628347e-06, "loss": 0.0067, "step": 142080 }, { "epoch": 1.1998057883515232, "grad_norm": 0.10361016541719437, "learning_rate": 4.133404050545106e-06, "loss": 0.0075, "step": 142090 }, { "epoch": 1.199890228198687, "grad_norm": 0.10638660937547684, "learning_rate": 4.13267833028415e-06, "loss": 0.0073, "step": 142100 }, { "epoch": 1.1999746680458507, "grad_norm": 0.3224148154258728, "learning_rate": 4.131952628861243e-06, "loss": 0.0055, "step": 142110 }, { "epoch": 1.2000591078930147, "grad_norm": 0.2412024438381195, "learning_rate": 4.131226946292147e-06, "loss": 0.0097, "step": 142120 }, { "epoch": 1.2001435477401785, "grad_norm": 0.1128438264131546, "learning_rate": 4.130501282592623e-06, "loss": 0.0085, "step": 142130 }, { "epoch": 1.2002279875873425, "grad_norm": 0.18863445520401, "learning_rate": 4.129775637778434e-06, "loss": 0.0054, "step": 142140 }, { "epoch": 1.2003124274345063, "grad_norm": 0.22344370186328888, "learning_rate": 4.129050011865339e-06, "loss": 0.0034, "step": 142150 }, { "epoch": 1.2003968672816703, "grad_norm": 0.36256149411201477, "learning_rate": 4.1283244048691e-06, "loss": 0.0079, "step": 142160 }, { "epoch": 1.200481307128834, "grad_norm": 0.3390296399593353, "learning_rate": 4.127598816805475e-06, "loss": 0.0084, "step": 142170 }, { "epoch": 1.200565746975998, "grad_norm": 0.14285776019096375, "learning_rate": 4.126873247690226e-06, "loss": 0.0069, "step": 142180 }, { "epoch": 1.2006501868231618, "grad_norm": 0.5071380734443665, "learning_rate": 4.126147697539111e-06, "loss": 0.0099, "step": 142190 }, { "epoch": 1.2007346266703258, "grad_norm": 0.1678415685892105, "learning_rate": 4.12542216636789e-06, "loss": 0.0038, "step": 142200 }, { "epoch": 1.2008190665174896, "grad_norm": 0.33572453260421753, "learning_rate": 4.12469665419232e-06, "loss": 0.0135, "step": 142210 }, { "epoch": 1.2009035063646536, "grad_norm": 0.1978556364774704, "learning_rate": 4.123971161028157e-06, "loss": 0.0055, "step": 142220 }, { "epoch": 1.2009879462118174, "grad_norm": 0.4130333364009857, "learning_rate": 4.123245686891163e-06, "loss": 0.0069, "step": 142230 }, { "epoch": 1.2010723860589811, "grad_norm": 0.2476944774389267, "learning_rate": 4.122520231797093e-06, "loss": 0.0099, "step": 142240 }, { "epoch": 1.2011568259061451, "grad_norm": 0.10413151234388351, "learning_rate": 4.121794795761704e-06, "loss": 0.0069, "step": 142250 }, { "epoch": 1.201241265753309, "grad_norm": 0.23896677792072296, "learning_rate": 4.121069378800749e-06, "loss": 0.0086, "step": 142260 }, { "epoch": 1.201325705600473, "grad_norm": 0.32676127552986145, "learning_rate": 4.1203439809299906e-06, "loss": 0.0105, "step": 142270 }, { "epoch": 1.2014101454476367, "grad_norm": 0.17976891994476318, "learning_rate": 4.119618602165179e-06, "loss": 0.0047, "step": 142280 }, { "epoch": 1.2014945852948007, "grad_norm": 0.2885590195655823, "learning_rate": 4.118893242522072e-06, "loss": 0.0094, "step": 142290 }, { "epoch": 1.2015790251419645, "grad_norm": 0.2317477911710739, "learning_rate": 4.118167902016421e-06, "loss": 0.0067, "step": 142300 }, { "epoch": 1.2016634649891285, "grad_norm": 0.012420669198036194, "learning_rate": 4.1174425806639845e-06, "loss": 0.0028, "step": 142310 }, { "epoch": 1.2017479048362922, "grad_norm": 0.19578558206558228, "learning_rate": 4.1167172784805134e-06, "loss": 0.0061, "step": 142320 }, { "epoch": 1.201832344683456, "grad_norm": 0.24786144495010376, "learning_rate": 4.1159919954817635e-06, "loss": 0.0092, "step": 142330 }, { "epoch": 1.20191678453062, "grad_norm": 0.2523057460784912, "learning_rate": 4.115266731683486e-06, "loss": 0.0072, "step": 142340 }, { "epoch": 1.2020012243777838, "grad_norm": 0.6985853314399719, "learning_rate": 4.114541487101433e-06, "loss": 0.0083, "step": 142350 }, { "epoch": 1.2020856642249478, "grad_norm": 0.10892084985971451, "learning_rate": 4.113816261751358e-06, "loss": 0.0078, "step": 142360 }, { "epoch": 1.2021701040721116, "grad_norm": 0.07501132786273956, "learning_rate": 4.11309105564901e-06, "loss": 0.0059, "step": 142370 }, { "epoch": 1.2022545439192756, "grad_norm": 0.3428468406200409, "learning_rate": 4.112365868810145e-06, "loss": 0.0125, "step": 142380 }, { "epoch": 1.2023389837664393, "grad_norm": 0.24908748269081116, "learning_rate": 4.11164070125051e-06, "loss": 0.005, "step": 142390 }, { "epoch": 1.2024234236136033, "grad_norm": 0.6035357117652893, "learning_rate": 4.110915552985859e-06, "loss": 0.0105, "step": 142400 }, { "epoch": 1.202507863460767, "grad_norm": 0.11996103078126907, "learning_rate": 4.1101904240319366e-06, "loss": 0.0056, "step": 142410 }, { "epoch": 1.202592303307931, "grad_norm": 0.6023648381233215, "learning_rate": 4.1094653144044984e-06, "loss": 0.0084, "step": 142420 }, { "epoch": 1.2026767431550949, "grad_norm": 0.19675710797309875, "learning_rate": 4.1087402241192906e-06, "loss": 0.0117, "step": 142430 }, { "epoch": 1.2027611830022589, "grad_norm": 0.3562908172607422, "learning_rate": 4.108015153192063e-06, "loss": 0.005, "step": 142440 }, { "epoch": 1.2028456228494226, "grad_norm": 0.2591796815395355, "learning_rate": 4.107290101638564e-06, "loss": 0.0075, "step": 142450 }, { "epoch": 1.2029300626965864, "grad_norm": 0.21660074591636658, "learning_rate": 4.1065650694745375e-06, "loss": 0.0061, "step": 142460 }, { "epoch": 1.2030145025437504, "grad_norm": 0.5766470432281494, "learning_rate": 4.105840056715738e-06, "loss": 0.0102, "step": 142470 }, { "epoch": 1.2030989423909142, "grad_norm": 0.04099986329674721, "learning_rate": 4.105115063377908e-06, "loss": 0.0059, "step": 142480 }, { "epoch": 1.2031833822380782, "grad_norm": 0.28287243843078613, "learning_rate": 4.1043900894767965e-06, "loss": 0.0044, "step": 142490 }, { "epoch": 1.203267822085242, "grad_norm": 0.5619387030601501, "learning_rate": 4.1036651350281465e-06, "loss": 0.0081, "step": 142500 }, { "epoch": 1.203352261932406, "grad_norm": 0.32403767108917236, "learning_rate": 4.102940200047708e-06, "loss": 0.0079, "step": 142510 }, { "epoch": 1.2034367017795697, "grad_norm": 0.28201231360435486, "learning_rate": 4.102215284551224e-06, "loss": 0.0054, "step": 142520 }, { "epoch": 1.2035211416267337, "grad_norm": 0.213920459151268, "learning_rate": 4.101490388554441e-06, "loss": 0.0054, "step": 142530 }, { "epoch": 1.2036055814738975, "grad_norm": 0.20002484321594238, "learning_rate": 4.100765512073101e-06, "loss": 0.0062, "step": 142540 }, { "epoch": 1.2036900213210613, "grad_norm": 0.15819481015205383, "learning_rate": 4.1000406551229506e-06, "loss": 0.0061, "step": 142550 }, { "epoch": 1.2037744611682253, "grad_norm": 0.1687614768743515, "learning_rate": 4.099315817719732e-06, "loss": 0.0038, "step": 142560 }, { "epoch": 1.2038589010153893, "grad_norm": 0.10488894581794739, "learning_rate": 4.0985909998791915e-06, "loss": 0.0084, "step": 142570 }, { "epoch": 1.203943340862553, "grad_norm": 0.8480457067489624, "learning_rate": 4.0978662016170685e-06, "loss": 0.0091, "step": 142580 }, { "epoch": 1.2040277807097168, "grad_norm": 0.25214600563049316, "learning_rate": 4.097141422949105e-06, "loss": 0.0103, "step": 142590 }, { "epoch": 1.2041122205568808, "grad_norm": 0.2302130162715912, "learning_rate": 4.096416663891047e-06, "loss": 0.0066, "step": 142600 }, { "epoch": 1.2041966604040446, "grad_norm": 0.17611916363239288, "learning_rate": 4.095691924458633e-06, "loss": 0.0046, "step": 142610 }, { "epoch": 1.2042811002512086, "grad_norm": 0.5009124875068665, "learning_rate": 4.094967204667606e-06, "loss": 0.0074, "step": 142620 }, { "epoch": 1.2043655400983724, "grad_norm": 0.35737526416778564, "learning_rate": 4.094242504533703e-06, "loss": 0.0083, "step": 142630 }, { "epoch": 1.2044499799455364, "grad_norm": 0.6519811749458313, "learning_rate": 4.093517824072671e-06, "loss": 0.011, "step": 142640 }, { "epoch": 1.2045344197927002, "grad_norm": 0.25787052512168884, "learning_rate": 4.092793163300243e-06, "loss": 0.0057, "step": 142650 }, { "epoch": 1.2046188596398641, "grad_norm": 0.002957278862595558, "learning_rate": 4.092068522232164e-06, "loss": 0.0048, "step": 142660 }, { "epoch": 1.204703299487028, "grad_norm": 0.7070993781089783, "learning_rate": 4.091343900884169e-06, "loss": 0.0141, "step": 142670 }, { "epoch": 1.2047877393341917, "grad_norm": 0.6139382123947144, "learning_rate": 4.090619299272001e-06, "loss": 0.0079, "step": 142680 }, { "epoch": 1.2048721791813557, "grad_norm": 0.08069217205047607, "learning_rate": 4.0898947174113935e-06, "loss": 0.0095, "step": 142690 }, { "epoch": 1.2049566190285195, "grad_norm": 0.7380463480949402, "learning_rate": 4.089170155318085e-06, "loss": 0.0078, "step": 142700 }, { "epoch": 1.2050410588756835, "grad_norm": 0.34144264459609985, "learning_rate": 4.0884456130078154e-06, "loss": 0.0104, "step": 142710 }, { "epoch": 1.2051254987228472, "grad_norm": 0.16631869971752167, "learning_rate": 4.08772109049632e-06, "loss": 0.0078, "step": 142720 }, { "epoch": 1.2052099385700112, "grad_norm": 0.07578718662261963, "learning_rate": 4.086996587799336e-06, "loss": 0.0117, "step": 142730 }, { "epoch": 1.205294378417175, "grad_norm": 0.26291701197624207, "learning_rate": 4.0862721049325974e-06, "loss": 0.0093, "step": 142740 }, { "epoch": 1.205378818264339, "grad_norm": 0.15838350355625153, "learning_rate": 4.085547641911843e-06, "loss": 0.0041, "step": 142750 }, { "epoch": 1.2054632581115028, "grad_norm": 0.2944332957267761, "learning_rate": 4.084823198752806e-06, "loss": 0.0063, "step": 142760 }, { "epoch": 1.2055476979586668, "grad_norm": 0.23447783291339874, "learning_rate": 4.084098775471223e-06, "loss": 0.0057, "step": 142770 }, { "epoch": 1.2056321378058306, "grad_norm": 0.6252331733703613, "learning_rate": 4.083374372082824e-06, "loss": 0.0063, "step": 142780 }, { "epoch": 1.2057165776529946, "grad_norm": 0.26044440269470215, "learning_rate": 4.0826499886033485e-06, "loss": 0.0067, "step": 142790 }, { "epoch": 1.2058010175001583, "grad_norm": 0.42677971720695496, "learning_rate": 4.081925625048526e-06, "loss": 0.0059, "step": 142800 }, { "epoch": 1.205885457347322, "grad_norm": 0.11997886747121811, "learning_rate": 4.0812012814340916e-06, "loss": 0.0066, "step": 142810 }, { "epoch": 1.205969897194486, "grad_norm": 0.35513269901275635, "learning_rate": 4.080476957775777e-06, "loss": 0.0098, "step": 142820 }, { "epoch": 1.2060543370416499, "grad_norm": 0.3494023382663727, "learning_rate": 4.079752654089313e-06, "loss": 0.0118, "step": 142830 }, { "epoch": 1.2061387768888139, "grad_norm": 0.42442750930786133, "learning_rate": 4.079028370390434e-06, "loss": 0.0116, "step": 142840 }, { "epoch": 1.2062232167359777, "grad_norm": 0.30114495754241943, "learning_rate": 4.078304106694869e-06, "loss": 0.0068, "step": 142850 }, { "epoch": 1.2063076565831417, "grad_norm": 0.19177503883838654, "learning_rate": 4.077579863018352e-06, "loss": 0.0041, "step": 142860 }, { "epoch": 1.2063920964303054, "grad_norm": 0.009027170948684216, "learning_rate": 4.076855639376608e-06, "loss": 0.0056, "step": 142870 }, { "epoch": 1.2064765362774694, "grad_norm": 0.28592970967292786, "learning_rate": 4.076131435785374e-06, "loss": 0.0074, "step": 142880 }, { "epoch": 1.2065609761246332, "grad_norm": 0.4395575523376465, "learning_rate": 4.075407252260374e-06, "loss": 0.0081, "step": 142890 }, { "epoch": 1.206645415971797, "grad_norm": 0.5402177572250366, "learning_rate": 4.07468308881734e-06, "loss": 0.0093, "step": 142900 }, { "epoch": 1.206729855818961, "grad_norm": 0.44345277547836304, "learning_rate": 4.073958945471997e-06, "loss": 0.0059, "step": 142910 }, { "epoch": 1.2068142956661247, "grad_norm": 0.06418367475271225, "learning_rate": 4.073234822240079e-06, "loss": 0.0085, "step": 142920 }, { "epoch": 1.2068987355132887, "grad_norm": 0.7932271957397461, "learning_rate": 4.072510719137312e-06, "loss": 0.0079, "step": 142930 }, { "epoch": 1.2069831753604525, "grad_norm": 0.23061151802539825, "learning_rate": 4.071786636179418e-06, "loss": 0.004, "step": 142940 }, { "epoch": 1.2070676152076165, "grad_norm": 0.22635520994663239, "learning_rate": 4.071062573382131e-06, "loss": 0.0067, "step": 142950 }, { "epoch": 1.2071520550547803, "grad_norm": 0.13648943603038788, "learning_rate": 4.070338530761172e-06, "loss": 0.005, "step": 142960 }, { "epoch": 1.2072364949019443, "grad_norm": 0.06885632872581482, "learning_rate": 4.069614508332272e-06, "loss": 0.004, "step": 142970 }, { "epoch": 1.207320934749108, "grad_norm": 0.3161316514015198, "learning_rate": 4.068890506111152e-06, "loss": 0.0068, "step": 142980 }, { "epoch": 1.207405374596272, "grad_norm": 0.2593872845172882, "learning_rate": 4.068166524113542e-06, "loss": 0.0056, "step": 142990 }, { "epoch": 1.2074898144434358, "grad_norm": 0.3176007866859436, "learning_rate": 4.067442562355162e-06, "loss": 0.0117, "step": 143000 }, { "epoch": 1.2075742542905998, "grad_norm": 0.4179905354976654, "learning_rate": 4.06671862085174e-06, "loss": 0.009, "step": 143010 }, { "epoch": 1.2076586941377636, "grad_norm": 0.27260342240333557, "learning_rate": 4.0659946996189965e-06, "loss": 0.0104, "step": 143020 }, { "epoch": 1.2077431339849274, "grad_norm": 0.08074405789375305, "learning_rate": 4.0652707986726585e-06, "loss": 0.0119, "step": 143030 }, { "epoch": 1.2078275738320914, "grad_norm": 0.1016448438167572, "learning_rate": 4.064546918028447e-06, "loss": 0.0081, "step": 143040 }, { "epoch": 1.2079120136792552, "grad_norm": 0.22209928929805756, "learning_rate": 4.0638230577020845e-06, "loss": 0.0057, "step": 143050 }, { "epoch": 1.2079964535264192, "grad_norm": 0.2508966028690338, "learning_rate": 4.063099217709293e-06, "loss": 0.007, "step": 143060 }, { "epoch": 1.208080893373583, "grad_norm": 0.16102902591228485, "learning_rate": 4.0623753980657935e-06, "loss": 0.0079, "step": 143070 }, { "epoch": 1.208165333220747, "grad_norm": 0.42218026518821716, "learning_rate": 4.06165159878731e-06, "loss": 0.0102, "step": 143080 }, { "epoch": 1.2082497730679107, "grad_norm": 0.1225135400891304, "learning_rate": 4.0609278198895605e-06, "loss": 0.0077, "step": 143090 }, { "epoch": 1.2083342129150747, "grad_norm": 0.40769824385643005, "learning_rate": 4.060204061388268e-06, "loss": 0.0034, "step": 143100 }, { "epoch": 1.2084186527622385, "grad_norm": 0.4293544590473175, "learning_rate": 4.059480323299147e-06, "loss": 0.0078, "step": 143110 }, { "epoch": 1.2085030926094025, "grad_norm": 0.11173628270626068, "learning_rate": 4.058756605637924e-06, "loss": 0.0094, "step": 143120 }, { "epoch": 1.2085875324565662, "grad_norm": 0.18365024030208588, "learning_rate": 4.058032908420313e-06, "loss": 0.006, "step": 143130 }, { "epoch": 1.2086719723037302, "grad_norm": 0.4978134036064148, "learning_rate": 4.057309231662036e-06, "loss": 0.0068, "step": 143140 }, { "epoch": 1.208756412150894, "grad_norm": 0.14588509500026703, "learning_rate": 4.056585575378806e-06, "loss": 0.0057, "step": 143150 }, { "epoch": 1.2088408519980578, "grad_norm": 0.24435259401798248, "learning_rate": 4.055861939586347e-06, "loss": 0.0063, "step": 143160 }, { "epoch": 1.2089252918452218, "grad_norm": 0.4074881970882416, "learning_rate": 4.055138324300373e-06, "loss": 0.0056, "step": 143170 }, { "epoch": 1.2090097316923856, "grad_norm": 0.4286315143108368, "learning_rate": 4.0544147295366005e-06, "loss": 0.0096, "step": 143180 }, { "epoch": 1.2090941715395496, "grad_norm": 0.4457481801509857, "learning_rate": 4.053691155310747e-06, "loss": 0.0112, "step": 143190 }, { "epoch": 1.2091786113867133, "grad_norm": 0.2425093948841095, "learning_rate": 4.052967601638525e-06, "loss": 0.0096, "step": 143200 }, { "epoch": 1.2092630512338773, "grad_norm": 0.015429029241204262, "learning_rate": 4.052244068535655e-06, "loss": 0.0073, "step": 143210 }, { "epoch": 1.2093474910810411, "grad_norm": 0.05745459347963333, "learning_rate": 4.051520556017849e-06, "loss": 0.0041, "step": 143220 }, { "epoch": 1.2094319309282051, "grad_norm": 0.9940877556800842, "learning_rate": 4.050797064100823e-06, "loss": 0.0096, "step": 143230 }, { "epoch": 1.2095163707753689, "grad_norm": 0.24245022237300873, "learning_rate": 4.050073592800288e-06, "loss": 0.008, "step": 143240 }, { "epoch": 1.2096008106225327, "grad_norm": 0.40136057138442993, "learning_rate": 4.049350142131963e-06, "loss": 0.01, "step": 143250 }, { "epoch": 1.2096852504696967, "grad_norm": 0.10721246153116226, "learning_rate": 4.048626712111555e-06, "loss": 0.0083, "step": 143260 }, { "epoch": 1.2097696903168604, "grad_norm": 0.1244020164012909, "learning_rate": 4.047903302754782e-06, "loss": 0.01, "step": 143270 }, { "epoch": 1.2098541301640244, "grad_norm": 0.24480023980140686, "learning_rate": 4.047179914077354e-06, "loss": 0.0067, "step": 143280 }, { "epoch": 1.2099385700111882, "grad_norm": 0.23153457045555115, "learning_rate": 4.046456546094983e-06, "loss": 0.0117, "step": 143290 }, { "epoch": 1.2100230098583522, "grad_norm": 0.5236193537712097, "learning_rate": 4.045733198823381e-06, "loss": 0.0098, "step": 143300 }, { "epoch": 1.210107449705516, "grad_norm": 0.11061781644821167, "learning_rate": 4.045009872278255e-06, "loss": 0.0073, "step": 143310 }, { "epoch": 1.21019188955268, "grad_norm": 0.3082517087459564, "learning_rate": 4.044286566475322e-06, "loss": 0.0093, "step": 143320 }, { "epoch": 1.2102763293998438, "grad_norm": 0.0122413644567132, "learning_rate": 4.0435632814302875e-06, "loss": 0.0067, "step": 143330 }, { "epoch": 1.2103607692470078, "grad_norm": 0.15333294868469238, "learning_rate": 4.042840017158865e-06, "loss": 0.011, "step": 143340 }, { "epoch": 1.2104452090941715, "grad_norm": 0.225211039185524, "learning_rate": 4.042116773676757e-06, "loss": 0.0069, "step": 143350 }, { "epoch": 1.2105296489413355, "grad_norm": 0.3118380606174469, "learning_rate": 4.041393550999679e-06, "loss": 0.0081, "step": 143360 }, { "epoch": 1.2106140887884993, "grad_norm": 0.6261005401611328, "learning_rate": 4.040670349143337e-06, "loss": 0.0122, "step": 143370 }, { "epoch": 1.210698528635663, "grad_norm": 0.28459760546684265, "learning_rate": 4.039947168123438e-06, "loss": 0.0095, "step": 143380 }, { "epoch": 1.210782968482827, "grad_norm": 0.046980082988739014, "learning_rate": 4.039224007955688e-06, "loss": 0.008, "step": 143390 }, { "epoch": 1.2108674083299908, "grad_norm": 0.4265263080596924, "learning_rate": 4.038500868655798e-06, "loss": 0.0046, "step": 143400 }, { "epoch": 1.2109518481771548, "grad_norm": 0.09703169763088226, "learning_rate": 4.037777750239472e-06, "loss": 0.0093, "step": 143410 }, { "epoch": 1.2110362880243186, "grad_norm": 0.41391995549201965, "learning_rate": 4.037054652722415e-06, "loss": 0.012, "step": 143420 }, { "epoch": 1.2111207278714826, "grad_norm": 0.28465691208839417, "learning_rate": 4.0363315761203346e-06, "loss": 0.004, "step": 143430 }, { "epoch": 1.2112051677186464, "grad_norm": 0.2271394431591034, "learning_rate": 4.035608520448933e-06, "loss": 0.0109, "step": 143440 }, { "epoch": 1.2112896075658104, "grad_norm": 0.3572249114513397, "learning_rate": 4.034885485723919e-06, "loss": 0.0067, "step": 143450 }, { "epoch": 1.2113740474129742, "grad_norm": 0.17923833429813385, "learning_rate": 4.034162471960993e-06, "loss": 0.0045, "step": 143460 }, { "epoch": 1.211458487260138, "grad_norm": 0.05140228196978569, "learning_rate": 4.033439479175861e-06, "loss": 0.0116, "step": 143470 }, { "epoch": 1.211542927107302, "grad_norm": 0.2436186522245407, "learning_rate": 4.0327165073842245e-06, "loss": 0.006, "step": 143480 }, { "epoch": 1.211627366954466, "grad_norm": 0.6403630375862122, "learning_rate": 4.0319935566017886e-06, "loss": 0.0065, "step": 143490 }, { "epoch": 1.2117118068016297, "grad_norm": 0.11104167252779007, "learning_rate": 4.031270626844253e-06, "loss": 0.0074, "step": 143500 }, { "epoch": 1.2117962466487935, "grad_norm": 0.4536455571651459, "learning_rate": 4.030547718127322e-06, "loss": 0.0065, "step": 143510 }, { "epoch": 1.2118806864959575, "grad_norm": 0.37932077050209045, "learning_rate": 4.029824830466696e-06, "loss": 0.0058, "step": 143520 }, { "epoch": 1.2119651263431213, "grad_norm": 0.3470810651779175, "learning_rate": 4.029101963878074e-06, "loss": 0.0122, "step": 143530 }, { "epoch": 1.2120495661902853, "grad_norm": 0.21053969860076904, "learning_rate": 4.028379118377162e-06, "loss": 0.0117, "step": 143540 }, { "epoch": 1.212134006037449, "grad_norm": 0.3059298098087311, "learning_rate": 4.027656293979652e-06, "loss": 0.0063, "step": 143550 }, { "epoch": 1.212218445884613, "grad_norm": 0.2510265111923218, "learning_rate": 4.0269334907012515e-06, "loss": 0.0068, "step": 143560 }, { "epoch": 1.2123028857317768, "grad_norm": 0.7577645182609558, "learning_rate": 4.026210708557654e-06, "loss": 0.0098, "step": 143570 }, { "epoch": 1.2123873255789408, "grad_norm": 0.88908451795578, "learning_rate": 4.025487947564562e-06, "loss": 0.0086, "step": 143580 }, { "epoch": 1.2124717654261046, "grad_norm": 0.3985782563686371, "learning_rate": 4.02476520773767e-06, "loss": 0.0102, "step": 143590 }, { "epoch": 1.2125562052732684, "grad_norm": 0.10359565168619156, "learning_rate": 4.024042489092679e-06, "loss": 0.003, "step": 143600 }, { "epoch": 1.2126406451204323, "grad_norm": 0.056000012904405594, "learning_rate": 4.023319791645285e-06, "loss": 0.0089, "step": 143610 }, { "epoch": 1.2127250849675961, "grad_norm": 0.30379876494407654, "learning_rate": 4.022597115411186e-06, "loss": 0.0065, "step": 143620 }, { "epoch": 1.2128095248147601, "grad_norm": 0.23842661082744598, "learning_rate": 4.021874460406077e-06, "loss": 0.0079, "step": 143630 }, { "epoch": 1.212893964661924, "grad_norm": 0.18830333650112152, "learning_rate": 4.021151826645653e-06, "loss": 0.0048, "step": 143640 }, { "epoch": 1.212978404509088, "grad_norm": 0.047661710530519485, "learning_rate": 4.0204292141456126e-06, "loss": 0.0142, "step": 143650 }, { "epoch": 1.2130628443562517, "grad_norm": 0.45667386054992676, "learning_rate": 4.0197066229216476e-06, "loss": 0.0066, "step": 143660 }, { "epoch": 1.2131472842034157, "grad_norm": 0.511929452419281, "learning_rate": 4.018984052989455e-06, "loss": 0.0096, "step": 143670 }, { "epoch": 1.2132317240505794, "grad_norm": 0.16463901102542877, "learning_rate": 4.018261504364725e-06, "loss": 0.0048, "step": 143680 }, { "epoch": 1.2133161638977434, "grad_norm": 0.09700636565685272, "learning_rate": 4.017538977063157e-06, "loss": 0.0065, "step": 143690 }, { "epoch": 1.2134006037449072, "grad_norm": 0.022947387769818306, "learning_rate": 4.016816471100439e-06, "loss": 0.0077, "step": 143700 }, { "epoch": 1.2134850435920712, "grad_norm": 0.9509677290916443, "learning_rate": 4.016093986492268e-06, "loss": 0.011, "step": 143710 }, { "epoch": 1.213569483439235, "grad_norm": 0.19433912634849548, "learning_rate": 4.015371523254331e-06, "loss": 0.0063, "step": 143720 }, { "epoch": 1.2136539232863988, "grad_norm": 0.2362404465675354, "learning_rate": 4.0146490814023255e-06, "loss": 0.0069, "step": 143730 }, { "epoch": 1.2137383631335628, "grad_norm": 0.5245538353919983, "learning_rate": 4.013926660951938e-06, "loss": 0.0075, "step": 143740 }, { "epoch": 1.2138228029807265, "grad_norm": 0.027835344895720482, "learning_rate": 4.013204261918864e-06, "loss": 0.0068, "step": 143750 }, { "epoch": 1.2139072428278905, "grad_norm": 0.22315488755702972, "learning_rate": 4.01248188431879e-06, "loss": 0.0059, "step": 143760 }, { "epoch": 1.2139916826750543, "grad_norm": 0.19849184155464172, "learning_rate": 4.011759528167405e-06, "loss": 0.0069, "step": 143770 }, { "epoch": 1.2140761225222183, "grad_norm": 0.5066179037094116, "learning_rate": 4.011037193480403e-06, "loss": 0.0078, "step": 143780 }, { "epoch": 1.214160562369382, "grad_norm": 0.1296115517616272, "learning_rate": 4.01031488027347e-06, "loss": 0.0082, "step": 143790 }, { "epoch": 1.214245002216546, "grad_norm": 0.44048428535461426, "learning_rate": 4.009592588562295e-06, "loss": 0.011, "step": 143800 }, { "epoch": 1.2143294420637099, "grad_norm": 0.31998515129089355, "learning_rate": 4.008870318362564e-06, "loss": 0.0097, "step": 143810 }, { "epoch": 1.2144138819108736, "grad_norm": 0.10758936405181885, "learning_rate": 4.00814806968997e-06, "loss": 0.0069, "step": 143820 }, { "epoch": 1.2144983217580376, "grad_norm": 0.3890945315361023, "learning_rate": 4.007425842560194e-06, "loss": 0.0068, "step": 143830 }, { "epoch": 1.2145827616052014, "grad_norm": 0.4791816771030426, "learning_rate": 4.006703636988927e-06, "loss": 0.008, "step": 143840 }, { "epoch": 1.2146672014523654, "grad_norm": 0.4257316291332245, "learning_rate": 4.005981452991851e-06, "loss": 0.0104, "step": 143850 }, { "epoch": 1.2147516412995292, "grad_norm": 0.710672914981842, "learning_rate": 4.0052592905846585e-06, "loss": 0.0088, "step": 143860 }, { "epoch": 1.2148360811466932, "grad_norm": 0.35096466541290283, "learning_rate": 4.004537149783028e-06, "loss": 0.0062, "step": 143870 }, { "epoch": 1.214920520993857, "grad_norm": 0.25053778290748596, "learning_rate": 4.0038150306026455e-06, "loss": 0.0037, "step": 143880 }, { "epoch": 1.215004960841021, "grad_norm": 0.11267295479774475, "learning_rate": 4.003092933059198e-06, "loss": 0.0069, "step": 143890 }, { "epoch": 1.2150894006881847, "grad_norm": 0.1955910623073578, "learning_rate": 4.002370857168367e-06, "loss": 0.0097, "step": 143900 }, { "epoch": 1.2151738405353487, "grad_norm": 0.27328088879585266, "learning_rate": 4.0016488029458365e-06, "loss": 0.008, "step": 143910 }, { "epoch": 1.2152582803825125, "grad_norm": 0.32094526290893555, "learning_rate": 4.000926770407289e-06, "loss": 0.0087, "step": 143920 }, { "epoch": 1.2153427202296765, "grad_norm": 0.003283328842371702, "learning_rate": 4.000204759568408e-06, "loss": 0.0078, "step": 143930 }, { "epoch": 1.2154271600768403, "grad_norm": 0.009246626868844032, "learning_rate": 3.999482770444874e-06, "loss": 0.0088, "step": 143940 }, { "epoch": 1.215511599924004, "grad_norm": 0.5883471369743347, "learning_rate": 3.99876080305237e-06, "loss": 0.0084, "step": 143950 }, { "epoch": 1.215596039771168, "grad_norm": 0.10406206548213959, "learning_rate": 3.998038857406575e-06, "loss": 0.0065, "step": 143960 }, { "epoch": 1.2156804796183318, "grad_norm": 0.01253234688192606, "learning_rate": 3.997316933523173e-06, "loss": 0.0058, "step": 143970 }, { "epoch": 1.2157649194654958, "grad_norm": 0.06621817499399185, "learning_rate": 3.996595031417839e-06, "loss": 0.0064, "step": 143980 }, { "epoch": 1.2158493593126596, "grad_norm": 0.2151152491569519, "learning_rate": 3.995873151106258e-06, "loss": 0.0044, "step": 143990 }, { "epoch": 1.2159337991598236, "grad_norm": 1.1416970491409302, "learning_rate": 3.995151292604106e-06, "loss": 0.0121, "step": 144000 }, { "epoch": 1.2160182390069874, "grad_norm": 0.1654984951019287, "learning_rate": 3.994429455927059e-06, "loss": 0.0052, "step": 144010 }, { "epoch": 1.2161026788541514, "grad_norm": 0.19144780933856964, "learning_rate": 3.9937076410908016e-06, "loss": 0.0037, "step": 144020 }, { "epoch": 1.2161871187013151, "grad_norm": 0.005563573446124792, "learning_rate": 3.992985848111006e-06, "loss": 0.0069, "step": 144030 }, { "epoch": 1.216271558548479, "grad_norm": 0.13503025472164154, "learning_rate": 3.9922640770033525e-06, "loss": 0.007, "step": 144040 }, { "epoch": 1.216355998395643, "grad_norm": 0.19941456615924835, "learning_rate": 3.991542327783515e-06, "loss": 0.008, "step": 144050 }, { "epoch": 1.216440438242807, "grad_norm": 0.4948830306529999, "learning_rate": 3.9908206004671736e-06, "loss": 0.0114, "step": 144060 }, { "epoch": 1.2165248780899707, "grad_norm": 0.17790821194648743, "learning_rate": 3.9900988950700005e-06, "loss": 0.0054, "step": 144070 }, { "epoch": 1.2166093179371344, "grad_norm": 0.2507495582103729, "learning_rate": 3.989377211607675e-06, "loss": 0.009, "step": 144080 }, { "epoch": 1.2166937577842984, "grad_norm": 0.11285607516765594, "learning_rate": 3.988655550095866e-06, "loss": 0.0042, "step": 144090 }, { "epoch": 1.2167781976314622, "grad_norm": 0.03621244430541992, "learning_rate": 3.987933910550253e-06, "loss": 0.0051, "step": 144100 }, { "epoch": 1.2168626374786262, "grad_norm": 0.4965228736400604, "learning_rate": 3.987212292986508e-06, "loss": 0.0096, "step": 144110 }, { "epoch": 1.21694707732579, "grad_norm": 0.3897306025028229, "learning_rate": 3.986490697420304e-06, "loss": 0.0061, "step": 144120 }, { "epoch": 1.217031517172954, "grad_norm": 0.1218060627579689, "learning_rate": 3.985769123867316e-06, "loss": 0.0065, "step": 144130 }, { "epoch": 1.2171159570201178, "grad_norm": 0.15389622747898102, "learning_rate": 3.985047572343211e-06, "loss": 0.011, "step": 144140 }, { "epoch": 1.2172003968672818, "grad_norm": 0.22942590713500977, "learning_rate": 3.984326042863669e-06, "loss": 0.0063, "step": 144150 }, { "epoch": 1.2172848367144455, "grad_norm": 0.2543400824069977, "learning_rate": 3.983604535444354e-06, "loss": 0.0079, "step": 144160 }, { "epoch": 1.2173692765616093, "grad_norm": 0.00344658805988729, "learning_rate": 3.982883050100942e-06, "loss": 0.0069, "step": 144170 }, { "epoch": 1.2174537164087733, "grad_norm": 0.07647982984781265, "learning_rate": 3.982161586849101e-06, "loss": 0.0043, "step": 144180 }, { "epoch": 1.217538156255937, "grad_norm": 0.259220153093338, "learning_rate": 3.9814401457045035e-06, "loss": 0.0082, "step": 144190 }, { "epoch": 1.217622596103101, "grad_norm": 0.31026479601860046, "learning_rate": 3.980718726682814e-06, "loss": 0.006, "step": 144200 }, { "epoch": 1.2177070359502649, "grad_norm": 0.26237332820892334, "learning_rate": 3.9799973297997075e-06, "loss": 0.0073, "step": 144210 }, { "epoch": 1.2177914757974289, "grad_norm": 0.18982522189617157, "learning_rate": 3.9792759550708495e-06, "loss": 0.0063, "step": 144220 }, { "epoch": 1.2178759156445926, "grad_norm": 0.30779290199279785, "learning_rate": 3.978554602511908e-06, "loss": 0.0114, "step": 144230 }, { "epoch": 1.2179603554917566, "grad_norm": 0.018762296065688133, "learning_rate": 3.977833272138552e-06, "loss": 0.0124, "step": 144240 }, { "epoch": 1.2180447953389204, "grad_norm": 0.40039926767349243, "learning_rate": 3.977111963966447e-06, "loss": 0.0109, "step": 144250 }, { "epoch": 1.2181292351860844, "grad_norm": 0.12012998759746552, "learning_rate": 3.976390678011261e-06, "loss": 0.0123, "step": 144260 }, { "epoch": 1.2182136750332482, "grad_norm": 0.39946144819259644, "learning_rate": 3.97566941428866e-06, "loss": 0.0065, "step": 144270 }, { "epoch": 1.2182981148804122, "grad_norm": 0.20068594813346863, "learning_rate": 3.974948172814309e-06, "loss": 0.0121, "step": 144280 }, { "epoch": 1.218382554727576, "grad_norm": 0.06745491176843643, "learning_rate": 3.9742269536038725e-06, "loss": 0.0059, "step": 144290 }, { "epoch": 1.2184669945747397, "grad_norm": 0.17630347609519958, "learning_rate": 3.973505756673019e-06, "loss": 0.0106, "step": 144300 }, { "epoch": 1.2185514344219037, "grad_norm": 0.05321217700839043, "learning_rate": 3.972784582037408e-06, "loss": 0.0075, "step": 144310 }, { "epoch": 1.2186358742690675, "grad_norm": 0.15666566789150238, "learning_rate": 3.972063429712706e-06, "loss": 0.0049, "step": 144320 }, { "epoch": 1.2187203141162315, "grad_norm": 0.009392544627189636, "learning_rate": 3.971342299714575e-06, "loss": 0.0114, "step": 144330 }, { "epoch": 1.2188047539633953, "grad_norm": 0.06841056793928146, "learning_rate": 3.970621192058679e-06, "loss": 0.0052, "step": 144340 }, { "epoch": 1.2188891938105593, "grad_norm": 0.006672212854027748, "learning_rate": 3.969900106760681e-06, "loss": 0.0117, "step": 144350 }, { "epoch": 1.218973633657723, "grad_norm": 0.3127251863479614, "learning_rate": 3.96917904383624e-06, "loss": 0.0071, "step": 144360 }, { "epoch": 1.219058073504887, "grad_norm": 0.24454817175865173, "learning_rate": 3.968458003301021e-06, "loss": 0.0074, "step": 144370 }, { "epoch": 1.2191425133520508, "grad_norm": 0.5183365345001221, "learning_rate": 3.96773698517068e-06, "loss": 0.0112, "step": 144380 }, { "epoch": 1.2192269531992146, "grad_norm": 0.1452101618051529, "learning_rate": 3.967015989460881e-06, "loss": 0.0059, "step": 144390 }, { "epoch": 1.2193113930463786, "grad_norm": 0.2977348864078522, "learning_rate": 3.966295016187283e-06, "loss": 0.0093, "step": 144400 }, { "epoch": 1.2193958328935426, "grad_norm": 0.16475000977516174, "learning_rate": 3.965574065365547e-06, "loss": 0.0022, "step": 144410 }, { "epoch": 1.2194802727407064, "grad_norm": 0.01581408828496933, "learning_rate": 3.964853137011328e-06, "loss": 0.0107, "step": 144420 }, { "epoch": 1.2195647125878701, "grad_norm": 0.5177889466285706, "learning_rate": 3.9641322311402886e-06, "loss": 0.0105, "step": 144430 }, { "epoch": 1.2196491524350341, "grad_norm": 0.4936180114746094, "learning_rate": 3.963411347768084e-06, "loss": 0.0104, "step": 144440 }, { "epoch": 1.219733592282198, "grad_norm": 0.3766217827796936, "learning_rate": 3.962690486910373e-06, "loss": 0.0048, "step": 144450 }, { "epoch": 1.219818032129362, "grad_norm": 0.668765127658844, "learning_rate": 3.961969648582813e-06, "loss": 0.0106, "step": 144460 }, { "epoch": 1.2199024719765257, "grad_norm": 0.15378636121749878, "learning_rate": 3.961248832801058e-06, "loss": 0.0085, "step": 144470 }, { "epoch": 1.2199869118236897, "grad_norm": 0.5502285361289978, "learning_rate": 3.960528039580767e-06, "loss": 0.012, "step": 144480 }, { "epoch": 1.2200713516708535, "grad_norm": 0.4074711203575134, "learning_rate": 3.959807268937591e-06, "loss": 0.006, "step": 144490 }, { "epoch": 1.2201557915180175, "grad_norm": 0.29002586007118225, "learning_rate": 3.95908652088719e-06, "loss": 0.0123, "step": 144500 }, { "epoch": 1.2202402313651812, "grad_norm": 0.01851334422826767, "learning_rate": 3.9583657954452155e-06, "loss": 0.0061, "step": 144510 }, { "epoch": 1.220324671212345, "grad_norm": 0.33401355147361755, "learning_rate": 3.957645092627325e-06, "loss": 0.0051, "step": 144520 }, { "epoch": 1.220409111059509, "grad_norm": 0.02132178470492363, "learning_rate": 3.956924412449166e-06, "loss": 0.0051, "step": 144530 }, { "epoch": 1.2204935509066728, "grad_norm": 0.6115899085998535, "learning_rate": 3.956203754926397e-06, "loss": 0.0093, "step": 144540 }, { "epoch": 1.2205779907538368, "grad_norm": 0.19393417239189148, "learning_rate": 3.955483120074668e-06, "loss": 0.0105, "step": 144550 }, { "epoch": 1.2206624306010005, "grad_norm": 0.1192331612110138, "learning_rate": 3.954762507909632e-06, "loss": 0.0033, "step": 144560 }, { "epoch": 1.2207468704481645, "grad_norm": 0.15395201742649078, "learning_rate": 3.954041918446939e-06, "loss": 0.0068, "step": 144570 }, { "epoch": 1.2208313102953283, "grad_norm": 0.37864381074905396, "learning_rate": 3.953321351702243e-06, "loss": 0.0102, "step": 144580 }, { "epoch": 1.2209157501424923, "grad_norm": 0.03041842207312584, "learning_rate": 3.9526008076911926e-06, "loss": 0.0056, "step": 144590 }, { "epoch": 1.221000189989656, "grad_norm": 0.14673097431659698, "learning_rate": 3.9518802864294365e-06, "loss": 0.0094, "step": 144600 }, { "epoch": 1.22108462983682, "grad_norm": 0.08395750820636749, "learning_rate": 3.951159787932628e-06, "loss": 0.0068, "step": 144610 }, { "epoch": 1.2211690696839839, "grad_norm": 0.08333616703748703, "learning_rate": 3.950439312216411e-06, "loss": 0.0105, "step": 144620 }, { "epoch": 1.2212535095311479, "grad_norm": 0.5851827263832092, "learning_rate": 3.949718859296439e-06, "loss": 0.0075, "step": 144630 }, { "epoch": 1.2213379493783116, "grad_norm": 0.3419801592826843, "learning_rate": 3.948998429188358e-06, "loss": 0.0063, "step": 144640 }, { "epoch": 1.2214223892254754, "grad_norm": 0.195950448513031, "learning_rate": 3.948278021907816e-06, "loss": 0.0138, "step": 144650 }, { "epoch": 1.2215068290726394, "grad_norm": 0.07883039861917496, "learning_rate": 3.947557637470458e-06, "loss": 0.008, "step": 144660 }, { "epoch": 1.2215912689198032, "grad_norm": 0.12673549354076385, "learning_rate": 3.946837275891934e-06, "loss": 0.0079, "step": 144670 }, { "epoch": 1.2216757087669672, "grad_norm": 0.05787937715649605, "learning_rate": 3.946116937187888e-06, "loss": 0.0043, "step": 144680 }, { "epoch": 1.221760148614131, "grad_norm": 0.19604581594467163, "learning_rate": 3.9453966213739675e-06, "loss": 0.0039, "step": 144690 }, { "epoch": 1.221844588461295, "grad_norm": 0.032724060118198395, "learning_rate": 3.944676328465817e-06, "loss": 0.005, "step": 144700 }, { "epoch": 1.2219290283084587, "grad_norm": 0.058277592062950134, "learning_rate": 3.943956058479076e-06, "loss": 0.0059, "step": 144710 }, { "epoch": 1.2220134681556227, "grad_norm": 0.4910035729408264, "learning_rate": 3.943235811429397e-06, "loss": 0.0055, "step": 144720 }, { "epoch": 1.2220979080027865, "grad_norm": 0.15539692342281342, "learning_rate": 3.942515587332418e-06, "loss": 0.0057, "step": 144730 }, { "epoch": 1.2221823478499503, "grad_norm": 0.5846278071403503, "learning_rate": 3.941795386203784e-06, "loss": 0.0064, "step": 144740 }, { "epoch": 1.2222667876971143, "grad_norm": 0.5586581826210022, "learning_rate": 3.941075208059137e-06, "loss": 0.0134, "step": 144750 }, { "epoch": 1.222351227544278, "grad_norm": 0.23660427331924438, "learning_rate": 3.940355052914122e-06, "loss": 0.0098, "step": 144760 }, { "epoch": 1.222435667391442, "grad_norm": 0.18693368136882782, "learning_rate": 3.9396349207843745e-06, "loss": 0.0073, "step": 144770 }, { "epoch": 1.2225201072386058, "grad_norm": 0.2847278118133545, "learning_rate": 3.938914811685541e-06, "loss": 0.0068, "step": 144780 }, { "epoch": 1.2226045470857698, "grad_norm": 0.06913458555936813, "learning_rate": 3.93819472563326e-06, "loss": 0.0123, "step": 144790 }, { "epoch": 1.2226889869329336, "grad_norm": 0.48032641410827637, "learning_rate": 3.937474662643172e-06, "loss": 0.0068, "step": 144800 }, { "epoch": 1.2227734267800976, "grad_norm": 0.2847127318382263, "learning_rate": 3.936754622730916e-06, "loss": 0.0091, "step": 144810 }, { "epoch": 1.2228578666272614, "grad_norm": 0.2719183564186096, "learning_rate": 3.936034605912133e-06, "loss": 0.0074, "step": 144820 }, { "epoch": 1.2229423064744254, "grad_norm": 0.39445579051971436, "learning_rate": 3.935314612202461e-06, "loss": 0.0116, "step": 144830 }, { "epoch": 1.2230267463215891, "grad_norm": 0.2492913007736206, "learning_rate": 3.934594641617535e-06, "loss": 0.0047, "step": 144840 }, { "epoch": 1.2231111861687531, "grad_norm": 0.14235681295394897, "learning_rate": 3.933874694172998e-06, "loss": 0.0039, "step": 144850 }, { "epoch": 1.223195626015917, "grad_norm": 0.16878065466880798, "learning_rate": 3.933154769884481e-06, "loss": 0.0054, "step": 144860 }, { "epoch": 1.2232800658630807, "grad_norm": 0.2510403096675873, "learning_rate": 3.932434868767625e-06, "loss": 0.0111, "step": 144870 }, { "epoch": 1.2233645057102447, "grad_norm": 0.13120770454406738, "learning_rate": 3.931714990838065e-06, "loss": 0.0053, "step": 144880 }, { "epoch": 1.2234489455574085, "grad_norm": 0.5987173914909363, "learning_rate": 3.930995136111438e-06, "loss": 0.0099, "step": 144890 }, { "epoch": 1.2235333854045725, "grad_norm": 0.25827041268348694, "learning_rate": 3.930275304603374e-06, "loss": 0.0074, "step": 144900 }, { "epoch": 1.2236178252517362, "grad_norm": 0.1891675740480423, "learning_rate": 3.929555496329514e-06, "loss": 0.0066, "step": 144910 }, { "epoch": 1.2237022650989002, "grad_norm": 1.0825705528259277, "learning_rate": 3.928835711305488e-06, "loss": 0.011, "step": 144920 }, { "epoch": 1.223786704946064, "grad_norm": 0.24017153680324554, "learning_rate": 3.928115949546933e-06, "loss": 0.0091, "step": 144930 }, { "epoch": 1.223871144793228, "grad_norm": 0.40641483664512634, "learning_rate": 3.9273962110694785e-06, "loss": 0.0115, "step": 144940 }, { "epoch": 1.2239555846403918, "grad_norm": 0.0853787437081337, "learning_rate": 3.926676495888757e-06, "loss": 0.0069, "step": 144950 }, { "epoch": 1.2240400244875556, "grad_norm": 0.15599682927131653, "learning_rate": 3.925956804020404e-06, "loss": 0.0076, "step": 144960 }, { "epoch": 1.2241244643347196, "grad_norm": 0.18771539628505707, "learning_rate": 3.925237135480049e-06, "loss": 0.0192, "step": 144970 }, { "epoch": 1.2242089041818836, "grad_norm": 0.001917291316203773, "learning_rate": 3.924517490283323e-06, "loss": 0.0057, "step": 144980 }, { "epoch": 1.2242933440290473, "grad_norm": 0.24441015720367432, "learning_rate": 3.923797868445855e-06, "loss": 0.0056, "step": 144990 }, { "epoch": 1.224377783876211, "grad_norm": 0.27094224095344543, "learning_rate": 3.9230782699832795e-06, "loss": 0.0069, "step": 145000 }, { "epoch": 1.224462223723375, "grad_norm": 0.2972703278064728, "learning_rate": 3.922358694911221e-06, "loss": 0.0074, "step": 145010 }, { "epoch": 1.2245466635705389, "grad_norm": 0.11563479155302048, "learning_rate": 3.921639143245313e-06, "loss": 0.009, "step": 145020 }, { "epoch": 1.2246311034177029, "grad_norm": 0.39027583599090576, "learning_rate": 3.920919615001179e-06, "loss": 0.0071, "step": 145030 }, { "epoch": 1.2247155432648666, "grad_norm": 0.3920572102069855, "learning_rate": 3.920200110194453e-06, "loss": 0.0086, "step": 145040 }, { "epoch": 1.2247999831120306, "grad_norm": 0.09454408288002014, "learning_rate": 3.91948062884076e-06, "loss": 0.0089, "step": 145050 }, { "epoch": 1.2248844229591944, "grad_norm": 0.28359872102737427, "learning_rate": 3.918761170955722e-06, "loss": 0.0173, "step": 145060 }, { "epoch": 1.2249688628063584, "grad_norm": 0.5007611513137817, "learning_rate": 3.918041736554973e-06, "loss": 0.0097, "step": 145070 }, { "epoch": 1.2250533026535222, "grad_norm": 0.19976924359798431, "learning_rate": 3.917322325654134e-06, "loss": 0.013, "step": 145080 }, { "epoch": 1.225137742500686, "grad_norm": 0.13042858242988586, "learning_rate": 3.916602938268834e-06, "loss": 0.0054, "step": 145090 }, { "epoch": 1.22522218234785, "grad_norm": 0.21263065934181213, "learning_rate": 3.915883574414694e-06, "loss": 0.0074, "step": 145100 }, { "epoch": 1.2253066221950137, "grad_norm": 0.2150624692440033, "learning_rate": 3.915164234107342e-06, "loss": 0.0112, "step": 145110 }, { "epoch": 1.2253910620421777, "grad_norm": 0.3245658874511719, "learning_rate": 3.9144449173624e-06, "loss": 0.0072, "step": 145120 }, { "epoch": 1.2254755018893415, "grad_norm": 0.3041238784790039, "learning_rate": 3.913725624195493e-06, "loss": 0.007, "step": 145130 }, { "epoch": 1.2255599417365055, "grad_norm": 0.42252233624458313, "learning_rate": 3.91300635462224e-06, "loss": 0.0077, "step": 145140 }, { "epoch": 1.2256443815836693, "grad_norm": 0.4348152279853821, "learning_rate": 3.912287108658269e-06, "loss": 0.0072, "step": 145150 }, { "epoch": 1.2257288214308333, "grad_norm": 0.0766541063785553, "learning_rate": 3.911567886319199e-06, "loss": 0.0038, "step": 145160 }, { "epoch": 1.225813261277997, "grad_norm": 0.04131869226694107, "learning_rate": 3.910848687620651e-06, "loss": 0.008, "step": 145170 }, { "epoch": 1.225897701125161, "grad_norm": 0.37341028451919556, "learning_rate": 3.910129512578247e-06, "loss": 0.0068, "step": 145180 }, { "epoch": 1.2259821409723248, "grad_norm": 0.1819162666797638, "learning_rate": 3.909410361207604e-06, "loss": 0.0116, "step": 145190 }, { "epoch": 1.2260665808194888, "grad_norm": 0.25972694158554077, "learning_rate": 3.9086912335243475e-06, "loss": 0.0056, "step": 145200 }, { "epoch": 1.2261510206666526, "grad_norm": 0.21239574253559113, "learning_rate": 3.907972129544092e-06, "loss": 0.0076, "step": 145210 }, { "epoch": 1.2262354605138164, "grad_norm": 0.05773225426673889, "learning_rate": 3.907253049282459e-06, "loss": 0.0052, "step": 145220 }, { "epoch": 1.2263199003609804, "grad_norm": 0.2708910405635834, "learning_rate": 3.906533992755064e-06, "loss": 0.0064, "step": 145230 }, { "epoch": 1.2264043402081442, "grad_norm": 0.6798348426818848, "learning_rate": 3.90581495997753e-06, "loss": 0.0136, "step": 145240 }, { "epoch": 1.2264887800553081, "grad_norm": 0.39534977078437805, "learning_rate": 3.905095950965468e-06, "loss": 0.0099, "step": 145250 }, { "epoch": 1.226573219902472, "grad_norm": 0.6657136082649231, "learning_rate": 3.904376965734499e-06, "loss": 0.0068, "step": 145260 }, { "epoch": 1.226657659749636, "grad_norm": 0.276823490858078, "learning_rate": 3.903658004300236e-06, "loss": 0.0069, "step": 145270 }, { "epoch": 1.2267420995967997, "grad_norm": 0.3690364956855774, "learning_rate": 3.9029390666782985e-06, "loss": 0.0116, "step": 145280 }, { "epoch": 1.2268265394439637, "grad_norm": 0.1780572086572647, "learning_rate": 3.9022201528843e-06, "loss": 0.0048, "step": 145290 }, { "epoch": 1.2269109792911275, "grad_norm": 0.15485304594039917, "learning_rate": 3.901501262933853e-06, "loss": 0.0066, "step": 145300 }, { "epoch": 1.2269954191382912, "grad_norm": 0.23115716874599457, "learning_rate": 3.900782396842575e-06, "loss": 0.0151, "step": 145310 }, { "epoch": 1.2270798589854552, "grad_norm": 0.04271857813000679, "learning_rate": 3.900063554626076e-06, "loss": 0.006, "step": 145320 }, { "epoch": 1.227164298832619, "grad_norm": 0.33346569538116455, "learning_rate": 3.899344736299975e-06, "loss": 0.0073, "step": 145330 }, { "epoch": 1.227248738679783, "grad_norm": 0.3970043659210205, "learning_rate": 3.898625941879877e-06, "loss": 0.0113, "step": 145340 }, { "epoch": 1.2273331785269468, "grad_norm": 0.15087859332561493, "learning_rate": 3.897907171381399e-06, "loss": 0.0055, "step": 145350 }, { "epoch": 1.2274176183741108, "grad_norm": 0.18232578039169312, "learning_rate": 3.897188424820152e-06, "loss": 0.0074, "step": 145360 }, { "epoch": 1.2275020582212746, "grad_norm": 0.19916875660419464, "learning_rate": 3.896469702211747e-06, "loss": 0.0092, "step": 145370 }, { "epoch": 1.2275864980684386, "grad_norm": 0.8997047543525696, "learning_rate": 3.895751003571791e-06, "loss": 0.0079, "step": 145380 }, { "epoch": 1.2276709379156023, "grad_norm": 0.20882710814476013, "learning_rate": 3.8950323289159e-06, "loss": 0.009, "step": 145390 }, { "epoch": 1.2277553777627663, "grad_norm": 0.18844833970069885, "learning_rate": 3.894313678259678e-06, "loss": 0.009, "step": 145400 }, { "epoch": 1.22783981760993, "grad_norm": 0.3100869953632355, "learning_rate": 3.893595051618738e-06, "loss": 0.0066, "step": 145410 }, { "epoch": 1.227924257457094, "grad_norm": 0.15891608595848083, "learning_rate": 3.892876449008688e-06, "loss": 0.003, "step": 145420 }, { "epoch": 1.2280086973042579, "grad_norm": 0.12198061496019363, "learning_rate": 3.8921578704451315e-06, "loss": 0.004, "step": 145430 }, { "epoch": 1.2280931371514217, "grad_norm": 0.04172622039914131, "learning_rate": 3.891439315943681e-06, "loss": 0.0098, "step": 145440 }, { "epoch": 1.2281775769985857, "grad_norm": 0.15093722939491272, "learning_rate": 3.89072078551994e-06, "loss": 0.0082, "step": 145450 }, { "epoch": 1.2282620168457494, "grad_norm": 0.3668496310710907, "learning_rate": 3.890002279189519e-06, "loss": 0.0065, "step": 145460 }, { "epoch": 1.2283464566929134, "grad_norm": 0.6733412742614746, "learning_rate": 3.889283796968017e-06, "loss": 0.0063, "step": 145470 }, { "epoch": 1.2284308965400772, "grad_norm": 0.2630462944507599, "learning_rate": 3.888565338871046e-06, "loss": 0.0038, "step": 145480 }, { "epoch": 1.2285153363872412, "grad_norm": 0.49902090430259705, "learning_rate": 3.887846904914208e-06, "loss": 0.0061, "step": 145490 }, { "epoch": 1.228599776234405, "grad_norm": 0.21714268624782562, "learning_rate": 3.887128495113107e-06, "loss": 0.0039, "step": 145500 }, { "epoch": 1.228684216081569, "grad_norm": 0.020027169957756996, "learning_rate": 3.886410109483346e-06, "loss": 0.0086, "step": 145510 }, { "epoch": 1.2287686559287327, "grad_norm": 0.26831111311912537, "learning_rate": 3.8856917480405306e-06, "loss": 0.0043, "step": 145520 }, { "epoch": 1.2288530957758967, "grad_norm": 0.08282732963562012, "learning_rate": 3.884973410800262e-06, "loss": 0.0029, "step": 145530 }, { "epoch": 1.2289375356230605, "grad_norm": 0.43837904930114746, "learning_rate": 3.884255097778142e-06, "loss": 0.0095, "step": 145540 }, { "epoch": 1.2290219754702245, "grad_norm": 0.34775230288505554, "learning_rate": 3.883536808989773e-06, "loss": 0.0106, "step": 145550 }, { "epoch": 1.2291064153173883, "grad_norm": 0.28258347511291504, "learning_rate": 3.882818544450754e-06, "loss": 0.0047, "step": 145560 }, { "epoch": 1.229190855164552, "grad_norm": 0.38597655296325684, "learning_rate": 3.88210030417669e-06, "loss": 0.0085, "step": 145570 }, { "epoch": 1.229275295011716, "grad_norm": 0.11435484886169434, "learning_rate": 3.881382088183177e-06, "loss": 0.0074, "step": 145580 }, { "epoch": 1.2293597348588798, "grad_norm": 0.3506484925746918, "learning_rate": 3.880663896485816e-06, "loss": 0.0044, "step": 145590 }, { "epoch": 1.2294441747060438, "grad_norm": 0.031159112229943275, "learning_rate": 3.879945729100205e-06, "loss": 0.0055, "step": 145600 }, { "epoch": 1.2295286145532076, "grad_norm": 0.29590725898742676, "learning_rate": 3.879227586041944e-06, "loss": 0.0088, "step": 145610 }, { "epoch": 1.2296130544003716, "grad_norm": 0.2586207985877991, "learning_rate": 3.87850946732663e-06, "loss": 0.0142, "step": 145620 }, { "epoch": 1.2296974942475354, "grad_norm": 0.2932136654853821, "learning_rate": 3.877791372969861e-06, "loss": 0.008, "step": 145630 }, { "epoch": 1.2297819340946994, "grad_norm": 0.3097120523452759, "learning_rate": 3.8770733029872335e-06, "loss": 0.0111, "step": 145640 }, { "epoch": 1.2298663739418632, "grad_norm": 0.06377984583377838, "learning_rate": 3.8763552573943435e-06, "loss": 0.0041, "step": 145650 }, { "epoch": 1.229950813789027, "grad_norm": 0.41445237398147583, "learning_rate": 3.875637236206788e-06, "loss": 0.0108, "step": 145660 }, { "epoch": 1.230035253636191, "grad_norm": 0.11300147324800491, "learning_rate": 3.874919239440158e-06, "loss": 0.0062, "step": 145670 }, { "epoch": 1.2301196934833547, "grad_norm": 0.6612300276756287, "learning_rate": 3.874201267110055e-06, "loss": 0.0163, "step": 145680 }, { "epoch": 1.2302041333305187, "grad_norm": 0.0962507352232933, "learning_rate": 3.873483319232068e-06, "loss": 0.0046, "step": 145690 }, { "epoch": 1.2302885731776825, "grad_norm": 0.5496397018432617, "learning_rate": 3.8727653958217934e-06, "loss": 0.0116, "step": 145700 }, { "epoch": 1.2303730130248465, "grad_norm": 0.04434318467974663, "learning_rate": 3.8720474968948216e-06, "loss": 0.0058, "step": 145710 }, { "epoch": 1.2304574528720102, "grad_norm": 0.4431414306163788, "learning_rate": 3.871329622466749e-06, "loss": 0.0068, "step": 145720 }, { "epoch": 1.2305418927191742, "grad_norm": 0.04233943298459053, "learning_rate": 3.870611772553165e-06, "loss": 0.0132, "step": 145730 }, { "epoch": 1.230626332566338, "grad_norm": 0.2675410807132721, "learning_rate": 3.8698939471696626e-06, "loss": 0.0065, "step": 145740 }, { "epoch": 1.230710772413502, "grad_norm": 0.15199637413024902, "learning_rate": 3.86917614633183e-06, "loss": 0.0113, "step": 145750 }, { "epoch": 1.2307952122606658, "grad_norm": 0.09669223427772522, "learning_rate": 3.868458370055262e-06, "loss": 0.0067, "step": 145760 }, { "epoch": 1.2308796521078298, "grad_norm": 0.26050812005996704, "learning_rate": 3.867740618355546e-06, "loss": 0.0097, "step": 145770 }, { "epoch": 1.2309640919549936, "grad_norm": 0.11078089475631714, "learning_rate": 3.867022891248271e-06, "loss": 0.0082, "step": 145780 }, { "epoch": 1.2310485318021573, "grad_norm": 0.17743059992790222, "learning_rate": 3.866305188749027e-06, "loss": 0.006, "step": 145790 }, { "epoch": 1.2311329716493213, "grad_norm": 0.17072975635528564, "learning_rate": 3.865587510873402e-06, "loss": 0.011, "step": 145800 }, { "epoch": 1.2312174114964851, "grad_norm": 0.5192262530326843, "learning_rate": 3.8648698576369835e-06, "loss": 0.0083, "step": 145810 }, { "epoch": 1.2313018513436491, "grad_norm": 0.6480841636657715, "learning_rate": 3.86415222905536e-06, "loss": 0.0092, "step": 145820 }, { "epoch": 1.2313862911908129, "grad_norm": 0.3202302157878876, "learning_rate": 3.863434625144117e-06, "loss": 0.006, "step": 145830 }, { "epoch": 1.2314707310379769, "grad_norm": 0.35110634565353394, "learning_rate": 3.862717045918839e-06, "loss": 0.0091, "step": 145840 }, { "epoch": 1.2315551708851407, "grad_norm": 0.2707653045654297, "learning_rate": 3.861999491395117e-06, "loss": 0.0064, "step": 145850 }, { "epoch": 1.2316396107323047, "grad_norm": 0.5447304844856262, "learning_rate": 3.86128196158853e-06, "loss": 0.0134, "step": 145860 }, { "epoch": 1.2317240505794684, "grad_norm": 0.698921799659729, "learning_rate": 3.860564456514667e-06, "loss": 0.0088, "step": 145870 }, { "epoch": 1.2318084904266322, "grad_norm": 0.2990868091583252, "learning_rate": 3.859846976189111e-06, "loss": 0.0032, "step": 145880 }, { "epoch": 1.2318929302737962, "grad_norm": 0.07583300024271011, "learning_rate": 3.8591295206274434e-06, "loss": 0.0078, "step": 145890 }, { "epoch": 1.2319773701209602, "grad_norm": 0.2601369619369507, "learning_rate": 3.85841208984525e-06, "loss": 0.0122, "step": 145900 }, { "epoch": 1.232061809968124, "grad_norm": 0.33990994095802307, "learning_rate": 3.857694683858111e-06, "loss": 0.006, "step": 145910 }, { "epoch": 1.2321462498152878, "grad_norm": 0.20687848329544067, "learning_rate": 3.856977302681611e-06, "loss": 0.0071, "step": 145920 }, { "epoch": 1.2322306896624518, "grad_norm": 0.11567794531583786, "learning_rate": 3.856259946331328e-06, "loss": 0.0063, "step": 145930 }, { "epoch": 1.2323151295096155, "grad_norm": 0.033322401344776154, "learning_rate": 3.855542614822847e-06, "loss": 0.0068, "step": 145940 }, { "epoch": 1.2323995693567795, "grad_norm": 0.5025679469108582, "learning_rate": 3.8548253081717425e-06, "loss": 0.0108, "step": 145950 }, { "epoch": 1.2324840092039433, "grad_norm": 0.20247440040111542, "learning_rate": 3.854108026393601e-06, "loss": 0.0058, "step": 145960 }, { "epoch": 1.2325684490511073, "grad_norm": 0.15732963383197784, "learning_rate": 3.8533907695039966e-06, "loss": 0.0055, "step": 145970 }, { "epoch": 1.232652888898271, "grad_norm": 0.31993818283081055, "learning_rate": 3.85267353751851e-06, "loss": 0.005, "step": 145980 }, { "epoch": 1.232737328745435, "grad_norm": 0.3557644188404083, "learning_rate": 3.851956330452718e-06, "loss": 0.006, "step": 145990 }, { "epoch": 1.2328217685925988, "grad_norm": 0.16876563429832458, "learning_rate": 3.851239148322201e-06, "loss": 0.0089, "step": 146000 }, { "epoch": 1.2329062084397626, "grad_norm": 0.2961406409740448, "learning_rate": 3.850521991142534e-06, "loss": 0.0201, "step": 146010 }, { "epoch": 1.2329906482869266, "grad_norm": 0.03440544381737709, "learning_rate": 3.849804858929294e-06, "loss": 0.004, "step": 146020 }, { "epoch": 1.2330750881340904, "grad_norm": 0.22438731789588928, "learning_rate": 3.849087751698056e-06, "loss": 0.008, "step": 146030 }, { "epoch": 1.2331595279812544, "grad_norm": 0.38950449228286743, "learning_rate": 3.848370669464395e-06, "loss": 0.009, "step": 146040 }, { "epoch": 1.2332439678284182, "grad_norm": 0.20614543557167053, "learning_rate": 3.847653612243889e-06, "loss": 0.0058, "step": 146050 }, { "epoch": 1.2333284076755822, "grad_norm": 0.4369513690471649, "learning_rate": 3.84693658005211e-06, "loss": 0.0093, "step": 146060 }, { "epoch": 1.233412847522746, "grad_norm": 0.10354430973529816, "learning_rate": 3.846219572904633e-06, "loss": 0.0099, "step": 146070 }, { "epoch": 1.23349728736991, "grad_norm": 0.06054495647549629, "learning_rate": 3.845502590817027e-06, "loss": 0.0068, "step": 146080 }, { "epoch": 1.2335817272170737, "grad_norm": 0.7303281426429749, "learning_rate": 3.844785633804872e-06, "loss": 0.0048, "step": 146090 }, { "epoch": 1.2336661670642377, "grad_norm": 0.21309693157672882, "learning_rate": 3.844068701883734e-06, "loss": 0.0069, "step": 146100 }, { "epoch": 1.2337506069114015, "grad_norm": 0.22899030148983002, "learning_rate": 3.843351795069189e-06, "loss": 0.0088, "step": 146110 }, { "epoch": 1.2338350467585655, "grad_norm": 0.2062903344631195, "learning_rate": 3.842634913376805e-06, "loss": 0.0118, "step": 146120 }, { "epoch": 1.2339194866057293, "grad_norm": 0.35189303755760193, "learning_rate": 3.841918056822153e-06, "loss": 0.0065, "step": 146130 }, { "epoch": 1.234003926452893, "grad_norm": 0.17841781675815582, "learning_rate": 3.841201225420804e-06, "loss": 0.0067, "step": 146140 }, { "epoch": 1.234088366300057, "grad_norm": 0.4103912115097046, "learning_rate": 3.840484419188326e-06, "loss": 0.0139, "step": 146150 }, { "epoch": 1.2341728061472208, "grad_norm": 0.35650280117988586, "learning_rate": 3.8397676381402905e-06, "loss": 0.0098, "step": 146160 }, { "epoch": 1.2342572459943848, "grad_norm": 0.09220848232507706, "learning_rate": 3.839050882292262e-06, "loss": 0.0066, "step": 146170 }, { "epoch": 1.2343416858415486, "grad_norm": 0.1185905933380127, "learning_rate": 3.8383341516598125e-06, "loss": 0.007, "step": 146180 }, { "epoch": 1.2344261256887126, "grad_norm": 0.1330975592136383, "learning_rate": 3.8376174462585054e-06, "loss": 0.0079, "step": 146190 }, { "epoch": 1.2345105655358763, "grad_norm": 0.3949213922023773, "learning_rate": 3.836900766103911e-06, "loss": 0.0066, "step": 146200 }, { "epoch": 1.2345950053830403, "grad_norm": 0.34779226779937744, "learning_rate": 3.836184111211591e-06, "loss": 0.0056, "step": 146210 }, { "epoch": 1.2346794452302041, "grad_norm": 0.32647860050201416, "learning_rate": 3.835467481597116e-06, "loss": 0.0083, "step": 146220 }, { "epoch": 1.234763885077368, "grad_norm": 0.3416101336479187, "learning_rate": 3.83475087727605e-06, "loss": 0.0138, "step": 146230 }, { "epoch": 1.234848324924532, "grad_norm": 0.2935608923435211, "learning_rate": 3.834034298263953e-06, "loss": 0.0063, "step": 146240 }, { "epoch": 1.2349327647716957, "grad_norm": 0.32781660556793213, "learning_rate": 3.833317744576393e-06, "loss": 0.0036, "step": 146250 }, { "epoch": 1.2350172046188597, "grad_norm": 0.2510334849357605, "learning_rate": 3.832601216228933e-06, "loss": 0.0104, "step": 146260 }, { "epoch": 1.2351016444660234, "grad_norm": 0.5836463570594788, "learning_rate": 3.831884713237136e-06, "loss": 0.016, "step": 146270 }, { "epoch": 1.2351860843131874, "grad_norm": 0.42715221643447876, "learning_rate": 3.831168235616561e-06, "loss": 0.006, "step": 146280 }, { "epoch": 1.2352705241603512, "grad_norm": 0.2636358439922333, "learning_rate": 3.830451783382775e-06, "loss": 0.0083, "step": 146290 }, { "epoch": 1.2353549640075152, "grad_norm": 0.14441192150115967, "learning_rate": 3.829735356551335e-06, "loss": 0.0156, "step": 146300 }, { "epoch": 1.235439403854679, "grad_norm": 0.0076326364651322365, "learning_rate": 3.829018955137805e-06, "loss": 0.0034, "step": 146310 }, { "epoch": 1.235523843701843, "grad_norm": 0.5091220736503601, "learning_rate": 3.828302579157742e-06, "loss": 0.0112, "step": 146320 }, { "epoch": 1.2356082835490068, "grad_norm": 0.06814923137426376, "learning_rate": 3.827586228626707e-06, "loss": 0.0069, "step": 146330 }, { "epoch": 1.2356927233961708, "grad_norm": 0.22564244270324707, "learning_rate": 3.826869903560259e-06, "loss": 0.006, "step": 146340 }, { "epoch": 1.2357771632433345, "grad_norm": 0.09113556146621704, "learning_rate": 3.826153603973958e-06, "loss": 0.0074, "step": 146350 }, { "epoch": 1.2358616030904983, "grad_norm": 0.22628074884414673, "learning_rate": 3.825437329883359e-06, "loss": 0.0074, "step": 146360 }, { "epoch": 1.2359460429376623, "grad_norm": 0.29034730792045593, "learning_rate": 3.8247210813040185e-06, "loss": 0.0125, "step": 146370 }, { "epoch": 1.236030482784826, "grad_norm": 0.2571118175983429, "learning_rate": 3.824004858251497e-06, "loss": 0.0067, "step": 146380 }, { "epoch": 1.23611492263199, "grad_norm": 0.4232949912548065, "learning_rate": 3.823288660741349e-06, "loss": 0.0089, "step": 146390 }, { "epoch": 1.2361993624791539, "grad_norm": 0.45568615198135376, "learning_rate": 3.82257248878913e-06, "loss": 0.009, "step": 146400 }, { "epoch": 1.2362838023263178, "grad_norm": 0.35665440559387207, "learning_rate": 3.821856342410395e-06, "loss": 0.0083, "step": 146410 }, { "epoch": 1.2363682421734816, "grad_norm": 0.9046440720558167, "learning_rate": 3.8211402216207e-06, "loss": 0.0131, "step": 146420 }, { "epoch": 1.2364526820206456, "grad_norm": 0.47819066047668457, "learning_rate": 3.820424126435596e-06, "loss": 0.0077, "step": 146430 }, { "epoch": 1.2365371218678094, "grad_norm": 0.2692100405693054, "learning_rate": 3.81970805687064e-06, "loss": 0.0116, "step": 146440 }, { "epoch": 1.2366215617149732, "grad_norm": 0.1672251969575882, "learning_rate": 3.818992012941381e-06, "loss": 0.0077, "step": 146450 }, { "epoch": 1.2367060015621372, "grad_norm": 0.1992231160402298, "learning_rate": 3.818275994663375e-06, "loss": 0.0068, "step": 146460 }, { "epoch": 1.2367904414093012, "grad_norm": 0.29639339447021484, "learning_rate": 3.817560002052173e-06, "loss": 0.0073, "step": 146470 }, { "epoch": 1.236874881256465, "grad_norm": 0.3079564571380615, "learning_rate": 3.816844035123324e-06, "loss": 0.0049, "step": 146480 }, { "epoch": 1.2369593211036287, "grad_norm": 0.031052378937602043, "learning_rate": 3.816128093892381e-06, "loss": 0.0075, "step": 146490 }, { "epoch": 1.2370437609507927, "grad_norm": 0.31441569328308105, "learning_rate": 3.815412178374892e-06, "loss": 0.0046, "step": 146500 }, { "epoch": 1.2371282007979565, "grad_norm": 0.023551655933260918, "learning_rate": 3.814696288586409e-06, "loss": 0.0051, "step": 146510 }, { "epoch": 1.2372126406451205, "grad_norm": 0.06183311343193054, "learning_rate": 3.8139804245424792e-06, "loss": 0.0054, "step": 146520 }, { "epoch": 1.2372970804922843, "grad_norm": 0.9308942556381226, "learning_rate": 3.813264586258652e-06, "loss": 0.0179, "step": 146530 }, { "epoch": 1.2373815203394483, "grad_norm": 0.19817398488521576, "learning_rate": 3.812548773750473e-06, "loss": 0.0086, "step": 146540 }, { "epoch": 1.237465960186612, "grad_norm": 0.1474209427833557, "learning_rate": 3.8118329870334946e-06, "loss": 0.0068, "step": 146550 }, { "epoch": 1.237550400033776, "grad_norm": 0.425657719373703, "learning_rate": 3.811117226123257e-06, "loss": 0.0085, "step": 146560 }, { "epoch": 1.2376348398809398, "grad_norm": 0.1256529837846756, "learning_rate": 3.8104014910353115e-06, "loss": 0.0082, "step": 146570 }, { "epoch": 1.2377192797281036, "grad_norm": 0.14008575677871704, "learning_rate": 3.8096857817852014e-06, "loss": 0.0079, "step": 146580 }, { "epoch": 1.2378037195752676, "grad_norm": 0.0831104964017868, "learning_rate": 3.8089700983884726e-06, "loss": 0.0044, "step": 146590 }, { "epoch": 1.2378881594224314, "grad_norm": 0.1630707085132599, "learning_rate": 3.8082544408606693e-06, "loss": 0.0101, "step": 146600 }, { "epoch": 1.2379725992695954, "grad_norm": 1.0742545127868652, "learning_rate": 3.807538809217333e-06, "loss": 0.0052, "step": 146610 }, { "epoch": 1.2380570391167591, "grad_norm": 0.381203830242157, "learning_rate": 3.806823203474012e-06, "loss": 0.004, "step": 146620 }, { "epoch": 1.2381414789639231, "grad_norm": 0.0929776281118393, "learning_rate": 3.8061076236462456e-06, "loss": 0.0067, "step": 146630 }, { "epoch": 1.238225918811087, "grad_norm": 0.3775583505630493, "learning_rate": 3.805392069749578e-06, "loss": 0.0093, "step": 146640 }, { "epoch": 1.238310358658251, "grad_norm": 0.35984179377555847, "learning_rate": 3.8046765417995467e-06, "loss": 0.0114, "step": 146650 }, { "epoch": 1.2383947985054147, "grad_norm": 0.19670821726322174, "learning_rate": 3.8039610398116995e-06, "loss": 0.006, "step": 146660 }, { "epoch": 1.2384792383525787, "grad_norm": 0.9840865135192871, "learning_rate": 3.8032455638015715e-06, "loss": 0.0075, "step": 146670 }, { "epoch": 1.2385636781997424, "grad_norm": 0.03493180498480797, "learning_rate": 3.802530113784706e-06, "loss": 0.007, "step": 146680 }, { "epoch": 1.2386481180469064, "grad_norm": 0.07256671786308289, "learning_rate": 3.8018146897766386e-06, "loss": 0.0084, "step": 146690 }, { "epoch": 1.2387325578940702, "grad_norm": 0.27588963508605957, "learning_rate": 3.801099291792913e-06, "loss": 0.0097, "step": 146700 }, { "epoch": 1.238816997741234, "grad_norm": 0.27405282855033875, "learning_rate": 3.800383919849065e-06, "loss": 0.0086, "step": 146710 }, { "epoch": 1.238901437588398, "grad_norm": 0.10024677962064743, "learning_rate": 3.7996685739606315e-06, "loss": 0.0077, "step": 146720 }, { "epoch": 1.2389858774355618, "grad_norm": 0.18420493602752686, "learning_rate": 3.7989532541431515e-06, "loss": 0.0091, "step": 146730 }, { "epoch": 1.2390703172827258, "grad_norm": 0.19619639217853546, "learning_rate": 3.798237960412159e-06, "loss": 0.0052, "step": 146740 }, { "epoch": 1.2391547571298895, "grad_norm": 0.34018075466156006, "learning_rate": 3.797522692783193e-06, "loss": 0.0084, "step": 146750 }, { "epoch": 1.2392391969770535, "grad_norm": 0.5349039435386658, "learning_rate": 3.7968074512717873e-06, "loss": 0.0084, "step": 146760 }, { "epoch": 1.2393236368242173, "grad_norm": 0.8569497466087341, "learning_rate": 3.796092235893478e-06, "loss": 0.0084, "step": 146770 }, { "epoch": 1.2394080766713813, "grad_norm": 0.28647977113723755, "learning_rate": 3.795377046663797e-06, "loss": 0.0079, "step": 146780 }, { "epoch": 1.239492516518545, "grad_norm": 0.2690390348434448, "learning_rate": 3.794661883598281e-06, "loss": 0.0097, "step": 146790 }, { "epoch": 1.2395769563657089, "grad_norm": 0.09838294237852097, "learning_rate": 3.793946746712461e-06, "loss": 0.0056, "step": 146800 }, { "epoch": 1.2396613962128729, "grad_norm": 0.24561245739459991, "learning_rate": 3.7932316360218724e-06, "loss": 0.0033, "step": 146810 }, { "epoch": 1.2397458360600369, "grad_norm": 0.12033675611019135, "learning_rate": 3.7925165515420428e-06, "loss": 0.003, "step": 146820 }, { "epoch": 1.2398302759072006, "grad_norm": 0.46736669540405273, "learning_rate": 3.7918014932885084e-06, "loss": 0.0064, "step": 146830 }, { "epoch": 1.2399147157543644, "grad_norm": 0.43856924772262573, "learning_rate": 3.7910864612767984e-06, "loss": 0.0068, "step": 146840 }, { "epoch": 1.2399991556015284, "grad_norm": 0.26128697395324707, "learning_rate": 3.7903714555224402e-06, "loss": 0.0071, "step": 146850 }, { "epoch": 1.2400835954486922, "grad_norm": 0.2324288934469223, "learning_rate": 3.789656476040968e-06, "loss": 0.004, "step": 146860 }, { "epoch": 1.2401680352958562, "grad_norm": 0.18872109055519104, "learning_rate": 3.7889415228479087e-06, "loss": 0.0066, "step": 146870 }, { "epoch": 1.24025247514302, "grad_norm": 0.06161106005311012, "learning_rate": 3.7882265959587918e-06, "loss": 0.0096, "step": 146880 }, { "epoch": 1.240336914990184, "grad_norm": 0.21362408995628357, "learning_rate": 3.787511695389143e-06, "loss": 0.0081, "step": 146890 }, { "epoch": 1.2404213548373477, "grad_norm": 0.10142143070697784, "learning_rate": 3.786796821154494e-06, "loss": 0.0091, "step": 146900 }, { "epoch": 1.2405057946845117, "grad_norm": 0.2872254550457001, "learning_rate": 3.7860819732703676e-06, "loss": 0.0048, "step": 146910 }, { "epoch": 1.2405902345316755, "grad_norm": 0.16004733741283417, "learning_rate": 3.7853671517522933e-06, "loss": 0.007, "step": 146920 }, { "epoch": 1.2406746743788393, "grad_norm": 0.4079154133796692, "learning_rate": 3.7846523566157937e-06, "loss": 0.0093, "step": 146930 }, { "epoch": 1.2407591142260033, "grad_norm": 0.3175332248210907, "learning_rate": 3.7839375878763983e-06, "loss": 0.012, "step": 146940 }, { "epoch": 1.240843554073167, "grad_norm": 0.1985190063714981, "learning_rate": 3.783222845549629e-06, "loss": 0.0073, "step": 146950 }, { "epoch": 1.240927993920331, "grad_norm": 0.23609355092048645, "learning_rate": 3.782508129651009e-06, "loss": 0.0064, "step": 146960 }, { "epoch": 1.2410124337674948, "grad_norm": 0.18878744542598724, "learning_rate": 3.781793440196064e-06, "loss": 0.005, "step": 146970 }, { "epoch": 1.2410968736146588, "grad_norm": 0.888629138469696, "learning_rate": 3.7810787772003136e-06, "loss": 0.0162, "step": 146980 }, { "epoch": 1.2411813134618226, "grad_norm": 0.07841023057699203, "learning_rate": 3.7803641406792846e-06, "loss": 0.0079, "step": 146990 }, { "epoch": 1.2412657533089866, "grad_norm": 0.10812309384346008, "learning_rate": 3.779649530648496e-06, "loss": 0.0046, "step": 147000 }, { "epoch": 1.2413501931561504, "grad_norm": 0.3373720049858093, "learning_rate": 3.77893494712347e-06, "loss": 0.0085, "step": 147010 }, { "epoch": 1.2414346330033144, "grad_norm": 0.22805972397327423, "learning_rate": 3.7782203901197245e-06, "loss": 0.0096, "step": 147020 }, { "epoch": 1.2415190728504781, "grad_norm": 0.1106417253613472, "learning_rate": 3.777505859652784e-06, "loss": 0.0087, "step": 147030 }, { "epoch": 1.2416035126976421, "grad_norm": 0.5801452398300171, "learning_rate": 3.776791355738165e-06, "loss": 0.0113, "step": 147040 }, { "epoch": 1.241687952544806, "grad_norm": 0.39941832423210144, "learning_rate": 3.776076878391388e-06, "loss": 0.0077, "step": 147050 }, { "epoch": 1.2417723923919697, "grad_norm": 0.40246546268463135, "learning_rate": 3.7753624276279704e-06, "loss": 0.0068, "step": 147060 }, { "epoch": 1.2418568322391337, "grad_norm": 0.1771516054868698, "learning_rate": 3.7746480034634274e-06, "loss": 0.0073, "step": 147070 }, { "epoch": 1.2419412720862975, "grad_norm": 0.13127970695495605, "learning_rate": 3.7739336059132813e-06, "loss": 0.006, "step": 147080 }, { "epoch": 1.2420257119334615, "grad_norm": 0.3728621304035187, "learning_rate": 3.7732192349930447e-06, "loss": 0.0066, "step": 147090 }, { "epoch": 1.2421101517806252, "grad_norm": 0.8671064972877502, "learning_rate": 3.772504890718236e-06, "loss": 0.0095, "step": 147100 }, { "epoch": 1.2421945916277892, "grad_norm": 0.31622275710105896, "learning_rate": 3.7717905731043676e-06, "loss": 0.0075, "step": 147110 }, { "epoch": 1.242279031474953, "grad_norm": 0.32086193561553955, "learning_rate": 3.771076282166958e-06, "loss": 0.0076, "step": 147120 }, { "epoch": 1.242363471322117, "grad_norm": 0.26815688610076904, "learning_rate": 3.7703620179215194e-06, "loss": 0.0082, "step": 147130 }, { "epoch": 1.2424479111692808, "grad_norm": 0.06473606079816818, "learning_rate": 3.769647780383566e-06, "loss": 0.0079, "step": 147140 }, { "epoch": 1.2425323510164445, "grad_norm": 0.8955382108688354, "learning_rate": 3.768933569568609e-06, "loss": 0.0164, "step": 147150 }, { "epoch": 1.2426167908636085, "grad_norm": 0.519203782081604, "learning_rate": 3.768219385492166e-06, "loss": 0.0048, "step": 147160 }, { "epoch": 1.2427012307107723, "grad_norm": 0.0011764849768951535, "learning_rate": 3.7675052281697423e-06, "loss": 0.0035, "step": 147170 }, { "epoch": 1.2427856705579363, "grad_norm": 0.16394975781440735, "learning_rate": 3.766791097616854e-06, "loss": 0.0084, "step": 147180 }, { "epoch": 1.2428701104051, "grad_norm": 0.21460770070552826, "learning_rate": 3.766076993849011e-06, "loss": 0.0056, "step": 147190 }, { "epoch": 1.242954550252264, "grad_norm": 0.46446558833122253, "learning_rate": 3.765362916881721e-06, "loss": 0.0138, "step": 147200 }, { "epoch": 1.2430389900994279, "grad_norm": 0.20053631067276, "learning_rate": 3.7646488667304977e-06, "loss": 0.0059, "step": 147210 }, { "epoch": 1.2431234299465919, "grad_norm": 0.036468204110860825, "learning_rate": 3.763934843410845e-06, "loss": 0.0112, "step": 147220 }, { "epoch": 1.2432078697937556, "grad_norm": 0.07968051731586456, "learning_rate": 3.7632208469382764e-06, "loss": 0.0048, "step": 147230 }, { "epoch": 1.2432923096409196, "grad_norm": 0.3171159625053406, "learning_rate": 3.7625068773282962e-06, "loss": 0.0087, "step": 147240 }, { "epoch": 1.2433767494880834, "grad_norm": 0.12741202116012573, "learning_rate": 3.7617929345964142e-06, "loss": 0.0082, "step": 147250 }, { "epoch": 1.2434611893352474, "grad_norm": 0.20664317905902863, "learning_rate": 3.761079018758134e-06, "loss": 0.0061, "step": 147260 }, { "epoch": 1.2435456291824112, "grad_norm": 0.1478755921125412, "learning_rate": 3.7603651298289662e-06, "loss": 0.0077, "step": 147270 }, { "epoch": 1.243630069029575, "grad_norm": 0.35740089416503906, "learning_rate": 3.759651267824412e-06, "loss": 0.0055, "step": 147280 }, { "epoch": 1.243714508876739, "grad_norm": 0.20830799639225006, "learning_rate": 3.758937432759979e-06, "loss": 0.0038, "step": 147290 }, { "epoch": 1.2437989487239027, "grad_norm": 0.35617390275001526, "learning_rate": 3.758223624651172e-06, "loss": 0.0102, "step": 147300 }, { "epoch": 1.2438833885710667, "grad_norm": 0.5679875016212463, "learning_rate": 3.75750984351349e-06, "loss": 0.0111, "step": 147310 }, { "epoch": 1.2439678284182305, "grad_norm": 0.15029500424861908, "learning_rate": 3.7567960893624423e-06, "loss": 0.0068, "step": 147320 }, { "epoch": 1.2440522682653945, "grad_norm": 0.1821918785572052, "learning_rate": 3.7560823622135274e-06, "loss": 0.0111, "step": 147330 }, { "epoch": 1.2441367081125583, "grad_norm": 0.38209670782089233, "learning_rate": 3.7553686620822496e-06, "loss": 0.0077, "step": 147340 }, { "epoch": 1.2442211479597223, "grad_norm": 0.36504533886909485, "learning_rate": 3.754654988984108e-06, "loss": 0.0062, "step": 147350 }, { "epoch": 1.244305587806886, "grad_norm": 0.11870487779378891, "learning_rate": 3.753941342934606e-06, "loss": 0.0068, "step": 147360 }, { "epoch": 1.2443900276540498, "grad_norm": 0.20575106143951416, "learning_rate": 3.753227723949242e-06, "loss": 0.0083, "step": 147370 }, { "epoch": 1.2444744675012138, "grad_norm": 0.2097906768321991, "learning_rate": 3.7525141320435177e-06, "loss": 0.0058, "step": 147380 }, { "epoch": 1.2445589073483778, "grad_norm": 1.2206881046295166, "learning_rate": 3.7518005672329284e-06, "loss": 0.0065, "step": 147390 }, { "epoch": 1.2446433471955416, "grad_norm": 0.08934222906827927, "learning_rate": 3.751087029532977e-06, "loss": 0.0069, "step": 147400 }, { "epoch": 1.2447277870427054, "grad_norm": 1.0794203281402588, "learning_rate": 3.750373518959159e-06, "loss": 0.0151, "step": 147410 }, { "epoch": 1.2448122268898694, "grad_norm": 0.11026330292224884, "learning_rate": 3.7496600355269725e-06, "loss": 0.0053, "step": 147420 }, { "epoch": 1.2448966667370331, "grad_norm": 0.1583011895418167, "learning_rate": 3.7489465792519143e-06, "loss": 0.0053, "step": 147430 }, { "epoch": 1.2449811065841971, "grad_norm": 0.3044632375240326, "learning_rate": 3.7482331501494773e-06, "loss": 0.0074, "step": 147440 }, { "epoch": 1.245065546431361, "grad_norm": 0.1945389360189438, "learning_rate": 3.747519748235163e-06, "loss": 0.0055, "step": 147450 }, { "epoch": 1.245149986278525, "grad_norm": 0.6432015299797058, "learning_rate": 3.7468063735244608e-06, "loss": 0.01, "step": 147460 }, { "epoch": 1.2452344261256887, "grad_norm": 0.097282774746418, "learning_rate": 3.7460930260328686e-06, "loss": 0.0053, "step": 147470 }, { "epoch": 1.2453188659728527, "grad_norm": 0.7788234949111938, "learning_rate": 3.7453797057758785e-06, "loss": 0.0102, "step": 147480 }, { "epoch": 1.2454033058200165, "grad_norm": 0.21235093474388123, "learning_rate": 3.744666412768985e-06, "loss": 0.0054, "step": 147490 }, { "epoch": 1.2454877456671802, "grad_norm": 0.16636957228183746, "learning_rate": 3.743953147027677e-06, "loss": 0.0038, "step": 147500 }, { "epoch": 1.2455721855143442, "grad_norm": 0.34238702058792114, "learning_rate": 3.743239908567452e-06, "loss": 0.0134, "step": 147510 }, { "epoch": 1.245656625361508, "grad_norm": 0.6906521320343018, "learning_rate": 3.742526697403798e-06, "loss": 0.0087, "step": 147520 }, { "epoch": 1.245741065208672, "grad_norm": 0.3530988097190857, "learning_rate": 3.741813513552206e-06, "loss": 0.0065, "step": 147530 }, { "epoch": 1.2458255050558358, "grad_norm": 0.24530909955501556, "learning_rate": 3.741100357028168e-06, "loss": 0.0073, "step": 147540 }, { "epoch": 1.2459099449029998, "grad_norm": 0.5061724185943604, "learning_rate": 3.74038722784717e-06, "loss": 0.0064, "step": 147550 }, { "epoch": 1.2459943847501636, "grad_norm": 0.09667747467756271, "learning_rate": 3.739674126024705e-06, "loss": 0.0087, "step": 147560 }, { "epoch": 1.2460788245973276, "grad_norm": 0.038243021816015244, "learning_rate": 3.7389610515762585e-06, "loss": 0.0052, "step": 147570 }, { "epoch": 1.2461632644444913, "grad_norm": 0.3290296792984009, "learning_rate": 3.738248004517321e-06, "loss": 0.0051, "step": 147580 }, { "epoch": 1.2462477042916553, "grad_norm": 0.3750101923942566, "learning_rate": 3.737534984863376e-06, "loss": 0.0096, "step": 147590 }, { "epoch": 1.246332144138819, "grad_norm": 0.12116415053606033, "learning_rate": 3.7368219926299152e-06, "loss": 0.0073, "step": 147600 }, { "epoch": 1.246416583985983, "grad_norm": 0.17468729615211487, "learning_rate": 3.73610902783242e-06, "loss": 0.0043, "step": 147610 }, { "epoch": 1.2465010238331469, "grad_norm": 0.33191919326782227, "learning_rate": 3.7353960904863797e-06, "loss": 0.0047, "step": 147620 }, { "epoch": 1.2465854636803106, "grad_norm": 0.0011379435891285539, "learning_rate": 3.7346831806072735e-06, "loss": 0.005, "step": 147630 }, { "epoch": 1.2466699035274746, "grad_norm": 0.13494230806827545, "learning_rate": 3.733970298210593e-06, "loss": 0.0164, "step": 147640 }, { "epoch": 1.2467543433746384, "grad_norm": 0.2776210904121399, "learning_rate": 3.733257443311817e-06, "loss": 0.009, "step": 147650 }, { "epoch": 1.2468387832218024, "grad_norm": 0.3588249981403351, "learning_rate": 3.7325446159264296e-06, "loss": 0.0062, "step": 147660 }, { "epoch": 1.2469232230689662, "grad_norm": 0.34123265743255615, "learning_rate": 3.731831816069914e-06, "loss": 0.0123, "step": 147670 }, { "epoch": 1.2470076629161302, "grad_norm": 0.38441136479377747, "learning_rate": 3.731119043757749e-06, "loss": 0.006, "step": 147680 }, { "epoch": 1.247092102763294, "grad_norm": 0.054419368505477905, "learning_rate": 3.7304062990054214e-06, "loss": 0.0043, "step": 147690 }, { "epoch": 1.247176542610458, "grad_norm": 0.5266465544700623, "learning_rate": 3.7296935818284075e-06, "loss": 0.004, "step": 147700 }, { "epoch": 1.2472609824576217, "grad_norm": 0.4118025004863739, "learning_rate": 3.7289808922421895e-06, "loss": 0.0111, "step": 147710 }, { "epoch": 1.2473454223047855, "grad_norm": 0.20230042934417725, "learning_rate": 3.728268230262244e-06, "loss": 0.0092, "step": 147720 }, { "epoch": 1.2474298621519495, "grad_norm": 0.4119945168495178, "learning_rate": 3.727555595904054e-06, "loss": 0.0056, "step": 147730 }, { "epoch": 1.2475143019991135, "grad_norm": 0.028683815151453018, "learning_rate": 3.7268429891830947e-06, "loss": 0.0073, "step": 147740 }, { "epoch": 1.2475987418462773, "grad_norm": 0.11409925669431686, "learning_rate": 3.7261304101148456e-06, "loss": 0.0077, "step": 147750 }, { "epoch": 1.247683181693441, "grad_norm": 0.004771948792040348, "learning_rate": 3.725417858714781e-06, "loss": 0.0093, "step": 147760 }, { "epoch": 1.247767621540605, "grad_norm": 0.4160650372505188, "learning_rate": 3.724705334998383e-06, "loss": 0.01, "step": 147770 }, { "epoch": 1.2478520613877688, "grad_norm": 0.17127084732055664, "learning_rate": 3.723992838981122e-06, "loss": 0.0046, "step": 147780 }, { "epoch": 1.2479365012349328, "grad_norm": 0.13191792368888855, "learning_rate": 3.723280370678473e-06, "loss": 0.0075, "step": 147790 }, { "epoch": 1.2480209410820966, "grad_norm": 0.10108733922243118, "learning_rate": 3.722567930105915e-06, "loss": 0.0028, "step": 147800 }, { "epoch": 1.2481053809292606, "grad_norm": 0.07114049792289734, "learning_rate": 3.7218555172789193e-06, "loss": 0.0033, "step": 147810 }, { "epoch": 1.2481898207764244, "grad_norm": 0.15364514291286469, "learning_rate": 3.7211431322129598e-06, "loss": 0.005, "step": 147820 }, { "epoch": 1.2482742606235884, "grad_norm": 0.19032736122608185, "learning_rate": 3.720430774923508e-06, "loss": 0.0101, "step": 147830 }, { "epoch": 1.2483587004707521, "grad_norm": 0.013743546791374683, "learning_rate": 3.71971844542604e-06, "loss": 0.0054, "step": 147840 }, { "epoch": 1.248443140317916, "grad_norm": 0.31192171573638916, "learning_rate": 3.7190061437360236e-06, "loss": 0.0082, "step": 147850 }, { "epoch": 1.24852758016508, "grad_norm": 0.49375995993614197, "learning_rate": 3.718293869868932e-06, "loss": 0.0077, "step": 147860 }, { "epoch": 1.2486120200122437, "grad_norm": 0.2232738435268402, "learning_rate": 3.717581623840233e-06, "loss": 0.01, "step": 147870 }, { "epoch": 1.2486964598594077, "grad_norm": 0.12618643045425415, "learning_rate": 3.716869405665401e-06, "loss": 0.0097, "step": 147880 }, { "epoch": 1.2487808997065715, "grad_norm": 0.4589276611804962, "learning_rate": 3.7161572153599018e-06, "loss": 0.0056, "step": 147890 }, { "epoch": 1.2488653395537355, "grad_norm": 0.3216513693332672, "learning_rate": 3.7154450529392037e-06, "loss": 0.0047, "step": 147900 }, { "epoch": 1.2489497794008992, "grad_norm": 0.1835312396287918, "learning_rate": 3.7147329184187773e-06, "loss": 0.0051, "step": 147910 }, { "epoch": 1.2490342192480632, "grad_norm": 0.15579882264137268, "learning_rate": 3.7140208118140865e-06, "loss": 0.0117, "step": 147920 }, { "epoch": 1.249118659095227, "grad_norm": 0.4765090346336365, "learning_rate": 3.713308733140602e-06, "loss": 0.011, "step": 147930 }, { "epoch": 1.249203098942391, "grad_norm": 0.2439432144165039, "learning_rate": 3.7125966824137873e-06, "loss": 0.0044, "step": 147940 }, { "epoch": 1.2492875387895548, "grad_norm": 0.22379949688911438, "learning_rate": 3.7118846596491096e-06, "loss": 0.0143, "step": 147950 }, { "epoch": 1.2493719786367188, "grad_norm": 0.3392597734928131, "learning_rate": 3.711172664862031e-06, "loss": 0.0047, "step": 147960 }, { "epoch": 1.2494564184838826, "grad_norm": 0.3991162180900574, "learning_rate": 3.7104606980680204e-06, "loss": 0.0058, "step": 147970 }, { "epoch": 1.2495408583310463, "grad_norm": 0.499074250459671, "learning_rate": 3.709748759282538e-06, "loss": 0.0036, "step": 147980 }, { "epoch": 1.2496252981782103, "grad_norm": 0.10965743660926819, "learning_rate": 3.709036848521049e-06, "loss": 0.004, "step": 147990 }, { "epoch": 1.249709738025374, "grad_norm": 0.15947578847408295, "learning_rate": 3.7083249657990138e-06, "loss": 0.0049, "step": 148000 }, { "epoch": 1.249794177872538, "grad_norm": 0.24885404109954834, "learning_rate": 3.707613111131897e-06, "loss": 0.004, "step": 148010 }, { "epoch": 1.2498786177197019, "grad_norm": 0.36172789335250854, "learning_rate": 3.706901284535159e-06, "loss": 0.0095, "step": 148020 }, { "epoch": 1.2499630575668659, "grad_norm": 0.4849531352519989, "learning_rate": 3.7061894860242586e-06, "loss": 0.0063, "step": 148030 }, { "epoch": 1.2500474974140297, "grad_norm": 0.25538694858551025, "learning_rate": 3.705477715614659e-06, "loss": 0.0039, "step": 148040 }, { "epoch": 1.2501319372611936, "grad_norm": 0.1605142503976822, "learning_rate": 3.7047659733218156e-06, "loss": 0.0089, "step": 148050 }, { "epoch": 1.2502163771083574, "grad_norm": 0.7909146547317505, "learning_rate": 3.704054259161193e-06, "loss": 0.0089, "step": 148060 }, { "epoch": 1.2503008169555212, "grad_norm": 0.25717294216156006, "learning_rate": 3.703342573148243e-06, "loss": 0.0057, "step": 148070 }, { "epoch": 1.2503852568026852, "grad_norm": 0.20712542533874512, "learning_rate": 3.7026309152984296e-06, "loss": 0.0075, "step": 148080 }, { "epoch": 1.2504696966498492, "grad_norm": 0.3251776397228241, "learning_rate": 3.701919285627205e-06, "loss": 0.0082, "step": 148090 }, { "epoch": 1.250554136497013, "grad_norm": 0.374772846698761, "learning_rate": 3.7012076841500288e-06, "loss": 0.0093, "step": 148100 }, { "epoch": 1.2506385763441767, "grad_norm": 0.21354784071445465, "learning_rate": 3.7004961108823534e-06, "loss": 0.0038, "step": 148110 }, { "epoch": 1.2507230161913407, "grad_norm": 0.2219158411026001, "learning_rate": 3.6997845658396387e-06, "loss": 0.0063, "step": 148120 }, { "epoch": 1.2508074560385045, "grad_norm": 0.6440087556838989, "learning_rate": 3.699073049037336e-06, "loss": 0.0069, "step": 148130 }, { "epoch": 1.2508918958856685, "grad_norm": 0.2649799883365631, "learning_rate": 3.6983615604908996e-06, "loss": 0.0101, "step": 148140 }, { "epoch": 1.2509763357328323, "grad_norm": 0.0674804225564003, "learning_rate": 3.697650100215785e-06, "loss": 0.0054, "step": 148150 }, { "epoch": 1.251060775579996, "grad_norm": 0.32562607526779175, "learning_rate": 3.69693866822744e-06, "loss": 0.0068, "step": 148160 }, { "epoch": 1.25114521542716, "grad_norm": 0.4507704973220825, "learning_rate": 3.6962272645413227e-06, "loss": 0.0049, "step": 148170 }, { "epoch": 1.251229655274324, "grad_norm": 0.06980641931295395, "learning_rate": 3.6955158891728815e-06, "loss": 0.0101, "step": 148180 }, { "epoch": 1.2513140951214878, "grad_norm": 0.20183470845222473, "learning_rate": 3.6948045421375682e-06, "loss": 0.0081, "step": 148190 }, { "epoch": 1.2513985349686516, "grad_norm": 0.3850346505641937, "learning_rate": 3.6940932234508308e-06, "loss": 0.0047, "step": 148200 }, { "epoch": 1.2514829748158156, "grad_norm": 0.6091978549957275, "learning_rate": 3.6933819331281236e-06, "loss": 0.0111, "step": 148210 }, { "epoch": 1.2515674146629794, "grad_norm": 0.5699825882911682, "learning_rate": 3.6926706711848915e-06, "loss": 0.0063, "step": 148220 }, { "epoch": 1.2516518545101434, "grad_norm": 0.10257630050182343, "learning_rate": 3.691959437636586e-06, "loss": 0.0075, "step": 148230 }, { "epoch": 1.2517362943573072, "grad_norm": 0.1930115669965744, "learning_rate": 3.6912482324986533e-06, "loss": 0.0047, "step": 148240 }, { "epoch": 1.2518207342044712, "grad_norm": 0.34239572286605835, "learning_rate": 3.690537055786538e-06, "loss": 0.0087, "step": 148250 }, { "epoch": 1.251905174051635, "grad_norm": 0.18929484486579895, "learning_rate": 3.689825907515693e-06, "loss": 0.0079, "step": 148260 }, { "epoch": 1.251989613898799, "grad_norm": 0.2951771318912506, "learning_rate": 3.6891147877015588e-06, "loss": 0.0078, "step": 148270 }, { "epoch": 1.2520740537459627, "grad_norm": 0.029910199344158173, "learning_rate": 3.6884036963595837e-06, "loss": 0.0031, "step": 148280 }, { "epoch": 1.2521584935931265, "grad_norm": 0.30906417965888977, "learning_rate": 3.68769263350521e-06, "loss": 0.009, "step": 148290 }, { "epoch": 1.2522429334402905, "grad_norm": 0.5058164000511169, "learning_rate": 3.686981599153885e-06, "loss": 0.0062, "step": 148300 }, { "epoch": 1.2523273732874545, "grad_norm": 0.14535437524318695, "learning_rate": 3.6862705933210498e-06, "loss": 0.0082, "step": 148310 }, { "epoch": 1.2524118131346182, "grad_norm": 0.6789205074310303, "learning_rate": 3.685559616022149e-06, "loss": 0.0117, "step": 148320 }, { "epoch": 1.252496252981782, "grad_norm": 0.047036062926054, "learning_rate": 3.6848486672726225e-06, "loss": 0.0075, "step": 148330 }, { "epoch": 1.252580692828946, "grad_norm": 0.3358635902404785, "learning_rate": 3.6841377470879146e-06, "loss": 0.0097, "step": 148340 }, { "epoch": 1.2526651326761098, "grad_norm": 0.2955605387687683, "learning_rate": 3.6834268554834647e-06, "loss": 0.0058, "step": 148350 }, { "epoch": 1.2527495725232738, "grad_norm": 0.33260613679885864, "learning_rate": 3.682715992474715e-06, "loss": 0.009, "step": 148360 }, { "epoch": 1.2528340123704376, "grad_norm": 0.5684035420417786, "learning_rate": 3.6820051580771032e-06, "loss": 0.0055, "step": 148370 }, { "epoch": 1.2529184522176016, "grad_norm": 0.3275986909866333, "learning_rate": 3.6812943523060686e-06, "loss": 0.004, "step": 148380 }, { "epoch": 1.2530028920647653, "grad_norm": 0.0879635140299797, "learning_rate": 3.6805835751770517e-06, "loss": 0.0054, "step": 148390 }, { "epoch": 1.2530873319119293, "grad_norm": 0.16874416172504425, "learning_rate": 3.6798728267054874e-06, "loss": 0.0057, "step": 148400 }, { "epoch": 1.2531717717590931, "grad_norm": 0.33235809206962585, "learning_rate": 3.6791621069068164e-06, "loss": 0.0075, "step": 148410 }, { "epoch": 1.2532562116062569, "grad_norm": 0.3040847182273865, "learning_rate": 3.6784514157964723e-06, "loss": 0.0088, "step": 148420 }, { "epoch": 1.2533406514534209, "grad_norm": 0.39663225412368774, "learning_rate": 3.6777407533898946e-06, "loss": 0.0084, "step": 148430 }, { "epoch": 1.2534250913005849, "grad_norm": 0.08276403695344925, "learning_rate": 3.6770301197025137e-06, "loss": 0.0119, "step": 148440 }, { "epoch": 1.2535095311477487, "grad_norm": 0.008458963595330715, "learning_rate": 3.6763195147497704e-06, "loss": 0.0094, "step": 148450 }, { "epoch": 1.2535939709949124, "grad_norm": 0.45802563428878784, "learning_rate": 3.675608938547095e-06, "loss": 0.0063, "step": 148460 }, { "epoch": 1.2536784108420764, "grad_norm": 0.2662310004234314, "learning_rate": 3.6748983911099224e-06, "loss": 0.0089, "step": 148470 }, { "epoch": 1.2537628506892402, "grad_norm": 0.13380004465579987, "learning_rate": 3.674187872453686e-06, "loss": 0.0065, "step": 148480 }, { "epoch": 1.2538472905364042, "grad_norm": 0.14883744716644287, "learning_rate": 3.6734773825938145e-06, "loss": 0.0106, "step": 148490 }, { "epoch": 1.253931730383568, "grad_norm": 0.6704590916633606, "learning_rate": 3.672766921545745e-06, "loss": 0.0081, "step": 148500 }, { "epoch": 1.2540161702307318, "grad_norm": 0.13940022885799408, "learning_rate": 3.6720564893249045e-06, "loss": 0.0088, "step": 148510 }, { "epoch": 1.2541006100778957, "grad_norm": 0.00787657406181097, "learning_rate": 3.6713460859467263e-06, "loss": 0.0146, "step": 148520 }, { "epoch": 1.2541850499250597, "grad_norm": 0.31548747420310974, "learning_rate": 3.6706357114266366e-06, "loss": 0.0056, "step": 148530 }, { "epoch": 1.2542694897722235, "grad_norm": 0.3044205904006958, "learning_rate": 3.6699253657800693e-06, "loss": 0.006, "step": 148540 }, { "epoch": 1.2543539296193873, "grad_norm": 0.25261250138282776, "learning_rate": 3.669215049022449e-06, "loss": 0.0076, "step": 148550 }, { "epoch": 1.2544383694665513, "grad_norm": 0.18759873509407043, "learning_rate": 3.668504761169207e-06, "loss": 0.0065, "step": 148560 }, { "epoch": 1.254522809313715, "grad_norm": 0.38987961411476135, "learning_rate": 3.6677945022357657e-06, "loss": 0.0042, "step": 148570 }, { "epoch": 1.254607249160879, "grad_norm": 0.27003026008605957, "learning_rate": 3.6670842722375577e-06, "loss": 0.0036, "step": 148580 }, { "epoch": 1.2546916890080428, "grad_norm": 0.08072488754987717, "learning_rate": 3.666374071190004e-06, "loss": 0.0049, "step": 148590 }, { "epoch": 1.2547761288552068, "grad_norm": 0.371614009141922, "learning_rate": 3.665663899108534e-06, "loss": 0.0064, "step": 148600 }, { "epoch": 1.2548605687023706, "grad_norm": 0.5620595216751099, "learning_rate": 3.6649537560085707e-06, "loss": 0.0066, "step": 148610 }, { "epoch": 1.2549450085495346, "grad_norm": 0.08100013434886932, "learning_rate": 3.664243641905536e-06, "loss": 0.0092, "step": 148620 }, { "epoch": 1.2550294483966984, "grad_norm": 0.21611973643302917, "learning_rate": 3.663533556814858e-06, "loss": 0.0046, "step": 148630 }, { "epoch": 1.2551138882438622, "grad_norm": 0.5976895093917847, "learning_rate": 3.6628235007519556e-06, "loss": 0.0098, "step": 148640 }, { "epoch": 1.2551983280910262, "grad_norm": 0.07899198681116104, "learning_rate": 3.6621134737322546e-06, "loss": 0.0063, "step": 148650 }, { "epoch": 1.2552827679381902, "grad_norm": 0.24256505072116852, "learning_rate": 3.6614034757711725e-06, "loss": 0.0052, "step": 148660 }, { "epoch": 1.255367207785354, "grad_norm": 0.14508278667926788, "learning_rate": 3.6606935068841354e-06, "loss": 0.008, "step": 148670 }, { "epoch": 1.2554516476325177, "grad_norm": 0.2355128973722458, "learning_rate": 3.659983567086558e-06, "loss": 0.004, "step": 148680 }, { "epoch": 1.2555360874796817, "grad_norm": 0.2319919615983963, "learning_rate": 3.659273656393865e-06, "loss": 0.0079, "step": 148690 }, { "epoch": 1.2556205273268455, "grad_norm": 0.5110026001930237, "learning_rate": 3.658563774821472e-06, "loss": 0.0075, "step": 148700 }, { "epoch": 1.2557049671740095, "grad_norm": 0.2985369861125946, "learning_rate": 3.6578539223848004e-06, "loss": 0.0084, "step": 148710 }, { "epoch": 1.2557894070211733, "grad_norm": 0.16026578843593597, "learning_rate": 3.6571440990992664e-06, "loss": 0.0047, "step": 148720 }, { "epoch": 1.2558738468683373, "grad_norm": 0.06496332585811615, "learning_rate": 3.6564343049802854e-06, "loss": 0.0058, "step": 148730 }, { "epoch": 1.255958286715501, "grad_norm": 0.2866533398628235, "learning_rate": 3.655724540043278e-06, "loss": 0.0074, "step": 148740 }, { "epoch": 1.256042726562665, "grad_norm": 0.7304020524024963, "learning_rate": 3.655014804303657e-06, "loss": 0.0077, "step": 148750 }, { "epoch": 1.2561271664098288, "grad_norm": 0.02166820503771305, "learning_rate": 3.6543050977768402e-06, "loss": 0.0063, "step": 148760 }, { "epoch": 1.2562116062569926, "grad_norm": 0.42536088824272156, "learning_rate": 3.6535954204782386e-06, "loss": 0.0087, "step": 148770 }, { "epoch": 1.2562960461041566, "grad_norm": 0.15298207104206085, "learning_rate": 3.6528857724232703e-06, "loss": 0.0049, "step": 148780 }, { "epoch": 1.2563804859513203, "grad_norm": 0.22115172445774078, "learning_rate": 3.6521761536273466e-06, "loss": 0.0078, "step": 148790 }, { "epoch": 1.2564649257984843, "grad_norm": 0.4227680265903473, "learning_rate": 3.651466564105881e-06, "loss": 0.0075, "step": 148800 }, { "epoch": 1.2565493656456481, "grad_norm": 0.14174088835716248, "learning_rate": 3.6507570038742835e-06, "loss": 0.0105, "step": 148810 }, { "epoch": 1.2566338054928121, "grad_norm": 0.0675484910607338, "learning_rate": 3.6500474729479697e-06, "loss": 0.0089, "step": 148820 }, { "epoch": 1.256718245339976, "grad_norm": 0.43810099363327026, "learning_rate": 3.6493379713423484e-06, "loss": 0.0064, "step": 148830 }, { "epoch": 1.25680268518714, "grad_norm": 0.04342817887663841, "learning_rate": 3.648628499072828e-06, "loss": 0.0058, "step": 148840 }, { "epoch": 1.2568871250343037, "grad_norm": 0.16372567415237427, "learning_rate": 3.6479190561548206e-06, "loss": 0.0027, "step": 148850 }, { "epoch": 1.2569715648814674, "grad_norm": 0.11173520982265472, "learning_rate": 3.647209642603733e-06, "loss": 0.0061, "step": 148860 }, { "epoch": 1.2570560047286314, "grad_norm": 0.3025175631046295, "learning_rate": 3.6465002584349764e-06, "loss": 0.0052, "step": 148870 }, { "epoch": 1.2571404445757954, "grad_norm": 0.15769773721694946, "learning_rate": 3.645790903663955e-06, "loss": 0.0114, "step": 148880 }, { "epoch": 1.2572248844229592, "grad_norm": 0.17597249150276184, "learning_rate": 3.6450815783060795e-06, "loss": 0.0096, "step": 148890 }, { "epoch": 1.257309324270123, "grad_norm": 0.42926231026649475, "learning_rate": 3.6443722823767525e-06, "loss": 0.0064, "step": 148900 }, { "epoch": 1.257393764117287, "grad_norm": 0.8900652527809143, "learning_rate": 3.6436630158913833e-06, "loss": 0.0082, "step": 148910 }, { "epoch": 1.2574782039644508, "grad_norm": 0.5681053996086121, "learning_rate": 3.6429537788653745e-06, "loss": 0.0139, "step": 148920 }, { "epoch": 1.2575626438116148, "grad_norm": 0.9430492520332336, "learning_rate": 3.642244571314133e-06, "loss": 0.0095, "step": 148930 }, { "epoch": 1.2576470836587785, "grad_norm": 0.0975283533334732, "learning_rate": 3.6415353932530585e-06, "loss": 0.0059, "step": 148940 }, { "epoch": 1.2577315235059425, "grad_norm": 0.4262884855270386, "learning_rate": 3.6408262446975594e-06, "loss": 0.0127, "step": 148950 }, { "epoch": 1.2578159633531063, "grad_norm": 0.22652310132980347, "learning_rate": 3.640117125663037e-06, "loss": 0.0075, "step": 148960 }, { "epoch": 1.2579004032002703, "grad_norm": 0.019565867260098457, "learning_rate": 3.6394080361648887e-06, "loss": 0.0047, "step": 148970 }, { "epoch": 1.257984843047434, "grad_norm": 0.7667688131332397, "learning_rate": 3.638698976218521e-06, "loss": 0.008, "step": 148980 }, { "epoch": 1.2580692828945979, "grad_norm": 0.21839158236980438, "learning_rate": 3.637989945839331e-06, "loss": 0.0046, "step": 148990 }, { "epoch": 1.2581537227417618, "grad_norm": 0.1963198482990265, "learning_rate": 3.6372809450427215e-06, "loss": 0.0078, "step": 149000 }, { "epoch": 1.2582381625889258, "grad_norm": 0.09911756962537766, "learning_rate": 3.636571973844089e-06, "loss": 0.0086, "step": 149010 }, { "epoch": 1.2583226024360896, "grad_norm": 0.30289217829704285, "learning_rate": 3.6358630322588363e-06, "loss": 0.0075, "step": 149020 }, { "epoch": 1.2584070422832534, "grad_norm": 0.104335717856884, "learning_rate": 3.6351541203023573e-06, "loss": 0.0095, "step": 149030 }, { "epoch": 1.2584914821304174, "grad_norm": 0.19205492734909058, "learning_rate": 3.634445237990052e-06, "loss": 0.0033, "step": 149040 }, { "epoch": 1.2585759219775812, "grad_norm": 0.25194621086120605, "learning_rate": 3.6337363853373146e-06, "loss": 0.0022, "step": 149050 }, { "epoch": 1.2586603618247452, "grad_norm": 0.17470292747020721, "learning_rate": 3.633027562359545e-06, "loss": 0.0087, "step": 149060 }, { "epoch": 1.258744801671909, "grad_norm": 0.1729622781276703, "learning_rate": 3.632318769072136e-06, "loss": 0.0075, "step": 149070 }, { "epoch": 1.2588292415190727, "grad_norm": 0.38121497631073, "learning_rate": 3.6316100054904836e-06, "loss": 0.0057, "step": 149080 }, { "epoch": 1.2589136813662367, "grad_norm": 0.030474117025732994, "learning_rate": 3.6309012716299806e-06, "loss": 0.0039, "step": 149090 }, { "epoch": 1.2589981212134007, "grad_norm": 0.03919261693954468, "learning_rate": 3.6301925675060207e-06, "loss": 0.0083, "step": 149100 }, { "epoch": 1.2590825610605645, "grad_norm": 0.6458202600479126, "learning_rate": 3.6294838931339994e-06, "loss": 0.009, "step": 149110 }, { "epoch": 1.2591670009077283, "grad_norm": 0.297848641872406, "learning_rate": 3.628775248529306e-06, "loss": 0.0088, "step": 149120 }, { "epoch": 1.2592514407548923, "grad_norm": 0.6167057752609253, "learning_rate": 3.6280666337073335e-06, "loss": 0.0058, "step": 149130 }, { "epoch": 1.259335880602056, "grad_norm": 0.40639904141426086, "learning_rate": 3.6273580486834714e-06, "loss": 0.0078, "step": 149140 }, { "epoch": 1.25942032044922, "grad_norm": 0.556760311126709, "learning_rate": 3.626649493473113e-06, "loss": 0.0103, "step": 149150 }, { "epoch": 1.2595047602963838, "grad_norm": 0.39456865191459656, "learning_rate": 3.6259409680916446e-06, "loss": 0.0082, "step": 149160 }, { "epoch": 1.2595892001435478, "grad_norm": 0.37392234802246094, "learning_rate": 3.6252324725544585e-06, "loss": 0.0091, "step": 149170 }, { "epoch": 1.2596736399907116, "grad_norm": 0.31439292430877686, "learning_rate": 3.6245240068769394e-06, "loss": 0.01, "step": 149180 }, { "epoch": 1.2597580798378756, "grad_norm": 0.18685345351696014, "learning_rate": 3.623815571074479e-06, "loss": 0.0063, "step": 149190 }, { "epoch": 1.2598425196850394, "grad_norm": 0.4424764811992645, "learning_rate": 3.6231071651624615e-06, "loss": 0.0113, "step": 149200 }, { "epoch": 1.2599269595322031, "grad_norm": 0.4515964984893799, "learning_rate": 3.622398789156274e-06, "loss": 0.0052, "step": 149210 }, { "epoch": 1.2600113993793671, "grad_norm": 0.33828702569007874, "learning_rate": 3.621690443071304e-06, "loss": 0.0077, "step": 149220 }, { "epoch": 1.2600958392265311, "grad_norm": 0.14146631956100464, "learning_rate": 3.620982126922932e-06, "loss": 0.0081, "step": 149230 }, { "epoch": 1.260180279073695, "grad_norm": 0.1029251366853714, "learning_rate": 3.6202738407265483e-06, "loss": 0.006, "step": 149240 }, { "epoch": 1.2602647189208587, "grad_norm": 0.056581202894449234, "learning_rate": 3.619565584497533e-06, "loss": 0.0074, "step": 149250 }, { "epoch": 1.2603491587680227, "grad_norm": 0.40796616673469543, "learning_rate": 3.618857358251271e-06, "loss": 0.0117, "step": 149260 }, { "epoch": 1.2604335986151864, "grad_norm": 0.38974088430404663, "learning_rate": 3.618149162003143e-06, "loss": 0.0102, "step": 149270 }, { "epoch": 1.2605180384623504, "grad_norm": 0.11037486046552658, "learning_rate": 3.617440995768534e-06, "loss": 0.0109, "step": 149280 }, { "epoch": 1.2606024783095142, "grad_norm": 0.19431504607200623, "learning_rate": 3.6167328595628203e-06, "loss": 0.0072, "step": 149290 }, { "epoch": 1.2606869181566782, "grad_norm": 0.11843986064195633, "learning_rate": 3.6160247534013883e-06, "loss": 0.0059, "step": 149300 }, { "epoch": 1.260771358003842, "grad_norm": 0.039620570838451385, "learning_rate": 3.6153166772996144e-06, "loss": 0.0174, "step": 149310 }, { "epoch": 1.260855797851006, "grad_norm": 0.21659144759178162, "learning_rate": 3.6146086312728778e-06, "loss": 0.0077, "step": 149320 }, { "epoch": 1.2609402376981698, "grad_norm": 0.3440254330635071, "learning_rate": 3.6139006153365584e-06, "loss": 0.0046, "step": 149330 }, { "epoch": 1.2610246775453335, "grad_norm": 0.5280486345291138, "learning_rate": 3.613192629506032e-06, "loss": 0.0082, "step": 149340 }, { "epoch": 1.2611091173924975, "grad_norm": 0.2567838430404663, "learning_rate": 3.6124846737966797e-06, "loss": 0.0101, "step": 149350 }, { "epoch": 1.2611935572396615, "grad_norm": 0.07051510363817215, "learning_rate": 3.611776748223874e-06, "loss": 0.0089, "step": 149360 }, { "epoch": 1.2612779970868253, "grad_norm": 0.30121049284935, "learning_rate": 3.6110688528029946e-06, "loss": 0.0045, "step": 149370 }, { "epoch": 1.261362436933989, "grad_norm": 0.45197564363479614, "learning_rate": 3.6103609875494133e-06, "loss": 0.0054, "step": 149380 }, { "epoch": 1.261446876781153, "grad_norm": 0.01545228622853756, "learning_rate": 3.6096531524785082e-06, "loss": 0.0079, "step": 149390 }, { "epoch": 1.2615313166283169, "grad_norm": 0.27635401487350464, "learning_rate": 3.608945347605651e-06, "loss": 0.0097, "step": 149400 }, { "epoch": 1.2616157564754809, "grad_norm": 0.37236642837524414, "learning_rate": 3.608237572946217e-06, "loss": 0.009, "step": 149410 }, { "epoch": 1.2617001963226446, "grad_norm": 0.21135056018829346, "learning_rate": 3.6075298285155745e-06, "loss": 0.0091, "step": 149420 }, { "epoch": 1.2617846361698084, "grad_norm": 0.12212474644184113, "learning_rate": 3.6068221143291023e-06, "loss": 0.013, "step": 149430 }, { "epoch": 1.2618690760169724, "grad_norm": 0.0337267741560936, "learning_rate": 3.606114430402168e-06, "loss": 0.0096, "step": 149440 }, { "epoch": 1.2619535158641364, "grad_norm": 0.417802095413208, "learning_rate": 3.605406776750141e-06, "loss": 0.0063, "step": 149450 }, { "epoch": 1.2620379557113002, "grad_norm": 0.45894095301628113, "learning_rate": 3.604699153388394e-06, "loss": 0.0089, "step": 149460 }, { "epoch": 1.262122395558464, "grad_norm": 0.7233720421791077, "learning_rate": 3.603991560332294e-06, "loss": 0.0141, "step": 149470 }, { "epoch": 1.262206835405628, "grad_norm": 0.7000628113746643, "learning_rate": 3.603283997597213e-06, "loss": 0.0089, "step": 149480 }, { "epoch": 1.2622912752527917, "grad_norm": 0.11581818759441376, "learning_rate": 3.6025764651985163e-06, "loss": 0.0055, "step": 149490 }, { "epoch": 1.2623757150999557, "grad_norm": 0.3279139995574951, "learning_rate": 3.6018689631515735e-06, "loss": 0.0054, "step": 149500 }, { "epoch": 1.2624601549471195, "grad_norm": 0.4569284915924072, "learning_rate": 3.601161491471748e-06, "loss": 0.0069, "step": 149510 }, { "epoch": 1.2625445947942835, "grad_norm": 0.7074213624000549, "learning_rate": 3.6004540501744105e-06, "loss": 0.0092, "step": 149520 }, { "epoch": 1.2626290346414473, "grad_norm": 0.430414617061615, "learning_rate": 3.5997466392749235e-06, "loss": 0.0045, "step": 149530 }, { "epoch": 1.2627134744886113, "grad_norm": 0.009397157467901707, "learning_rate": 3.599039258788653e-06, "loss": 0.0062, "step": 149540 }, { "epoch": 1.262797914335775, "grad_norm": 0.673253059387207, "learning_rate": 3.5983319087309632e-06, "loss": 0.0159, "step": 149550 }, { "epoch": 1.2628823541829388, "grad_norm": 0.20761574804782867, "learning_rate": 3.5976245891172148e-06, "loss": 0.0045, "step": 149560 }, { "epoch": 1.2629667940301028, "grad_norm": 0.1058126762509346, "learning_rate": 3.5969172999627766e-06, "loss": 0.0054, "step": 149570 }, { "epoch": 1.2630512338772668, "grad_norm": 0.2009480744600296, "learning_rate": 3.596210041283003e-06, "loss": 0.0061, "step": 149580 }, { "epoch": 1.2631356737244306, "grad_norm": 0.20337112247943878, "learning_rate": 3.595502813093262e-06, "loss": 0.0112, "step": 149590 }, { "epoch": 1.2632201135715944, "grad_norm": 0.43344560265541077, "learning_rate": 3.594795615408912e-06, "loss": 0.0056, "step": 149600 }, { "epoch": 1.2633045534187584, "grad_norm": 0.24616996943950653, "learning_rate": 3.5940884482453135e-06, "loss": 0.0088, "step": 149610 }, { "epoch": 1.2633889932659221, "grad_norm": 0.10317317396402359, "learning_rate": 3.593381311617824e-06, "loss": 0.0102, "step": 149620 }, { "epoch": 1.2634734331130861, "grad_norm": 0.45148518681526184, "learning_rate": 3.592674205541806e-06, "loss": 0.0116, "step": 149630 }, { "epoch": 1.26355787296025, "grad_norm": 0.11389747262001038, "learning_rate": 3.5919671300326155e-06, "loss": 0.0083, "step": 149640 }, { "epoch": 1.2636423128074137, "grad_norm": 0.2018197625875473, "learning_rate": 3.59126008510561e-06, "loss": 0.0077, "step": 149650 }, { "epoch": 1.2637267526545777, "grad_norm": 0.061738111078739166, "learning_rate": 3.5905530707761484e-06, "loss": 0.0102, "step": 149660 }, { "epoch": 1.2638111925017417, "grad_norm": 0.3327123522758484, "learning_rate": 3.589846087059582e-06, "loss": 0.0057, "step": 149670 }, { "epoch": 1.2638956323489055, "grad_norm": 0.14786212146282196, "learning_rate": 3.5891391339712724e-06, "loss": 0.0054, "step": 149680 }, { "epoch": 1.2639800721960692, "grad_norm": 1.7605725526809692, "learning_rate": 3.58843221152657e-06, "loss": 0.0127, "step": 149690 }, { "epoch": 1.2640645120432332, "grad_norm": 0.43468207120895386, "learning_rate": 3.5877253197408324e-06, "loss": 0.0066, "step": 149700 }, { "epoch": 1.264148951890397, "grad_norm": 0.24253898859024048, "learning_rate": 3.5870184586294094e-06, "loss": 0.0056, "step": 149710 }, { "epoch": 1.264233391737561, "grad_norm": 0.15889376401901245, "learning_rate": 3.586311628207658e-06, "loss": 0.0077, "step": 149720 }, { "epoch": 1.2643178315847248, "grad_norm": 0.06497278064489365, "learning_rate": 3.585604828490927e-06, "loss": 0.0063, "step": 149730 }, { "epoch": 1.2644022714318888, "grad_norm": 0.45459794998168945, "learning_rate": 3.5848980594945703e-06, "loss": 0.0124, "step": 149740 }, { "epoch": 1.2644867112790525, "grad_norm": 0.7707501649856567, "learning_rate": 3.5841913212339353e-06, "loss": 0.0066, "step": 149750 }, { "epoch": 1.2645711511262165, "grad_norm": 0.8637831807136536, "learning_rate": 3.583484613724377e-06, "loss": 0.006, "step": 149760 }, { "epoch": 1.2646555909733803, "grad_norm": 0.9937877655029297, "learning_rate": 3.582777936981242e-06, "loss": 0.0074, "step": 149770 }, { "epoch": 1.264740030820544, "grad_norm": 0.21968381106853485, "learning_rate": 3.582071291019881e-06, "loss": 0.0065, "step": 149780 }, { "epoch": 1.264824470667708, "grad_norm": 0.10512018203735352, "learning_rate": 3.581364675855641e-06, "loss": 0.011, "step": 149790 }, { "epoch": 1.264908910514872, "grad_norm": 0.20197957754135132, "learning_rate": 3.580658091503867e-06, "loss": 0.0082, "step": 149800 }, { "epoch": 1.2649933503620359, "grad_norm": 0.12954601645469666, "learning_rate": 3.5799515379799103e-06, "loss": 0.0216, "step": 149810 }, { "epoch": 1.2650777902091996, "grad_norm": 0.8722584843635559, "learning_rate": 3.5792450152991143e-06, "loss": 0.0148, "step": 149820 }, { "epoch": 1.2651622300563636, "grad_norm": 0.5555413365364075, "learning_rate": 3.578538523476827e-06, "loss": 0.0085, "step": 149830 }, { "epoch": 1.2652466699035274, "grad_norm": 0.42852431535720825, "learning_rate": 3.5778320625283892e-06, "loss": 0.0089, "step": 149840 }, { "epoch": 1.2653311097506914, "grad_norm": 0.08594484627246857, "learning_rate": 3.5771256324691494e-06, "loss": 0.0063, "step": 149850 }, { "epoch": 1.2654155495978552, "grad_norm": 0.520125150680542, "learning_rate": 3.5764192333144487e-06, "loss": 0.0082, "step": 149860 }, { "epoch": 1.2654999894450192, "grad_norm": 0.15906187891960144, "learning_rate": 3.575712865079631e-06, "loss": 0.0054, "step": 149870 }, { "epoch": 1.265584429292183, "grad_norm": 0.47764477133750916, "learning_rate": 3.5750065277800366e-06, "loss": 0.013, "step": 149880 }, { "epoch": 1.265668869139347, "grad_norm": 0.3385627567768097, "learning_rate": 3.574300221431011e-06, "loss": 0.0055, "step": 149890 }, { "epoch": 1.2657533089865107, "grad_norm": 0.22668981552124023, "learning_rate": 3.573593946047892e-06, "loss": 0.0062, "step": 149900 }, { "epoch": 1.2658377488336745, "grad_norm": 0.3267008066177368, "learning_rate": 3.572887701646017e-06, "loss": 0.0046, "step": 149910 }, { "epoch": 1.2659221886808385, "grad_norm": 0.5803389549255371, "learning_rate": 3.5721814882407323e-06, "loss": 0.0085, "step": 149920 }, { "epoch": 1.2660066285280025, "grad_norm": 0.6035217642784119, "learning_rate": 3.571475305847371e-06, "loss": 0.0069, "step": 149930 }, { "epoch": 1.2660910683751663, "grad_norm": 0.25313904881477356, "learning_rate": 3.570769154481275e-06, "loss": 0.007, "step": 149940 }, { "epoch": 1.26617550822233, "grad_norm": 0.32095834612846375, "learning_rate": 3.5700630341577775e-06, "loss": 0.0081, "step": 149950 }, { "epoch": 1.266259948069494, "grad_norm": 0.14641202986240387, "learning_rate": 3.5693569448922198e-06, "loss": 0.0101, "step": 149960 }, { "epoch": 1.2663443879166578, "grad_norm": 0.16151168942451477, "learning_rate": 3.5686508866999357e-06, "loss": 0.01, "step": 149970 }, { "epoch": 1.2664288277638218, "grad_norm": 0.32166895270347595, "learning_rate": 3.5679448595962616e-06, "loss": 0.0077, "step": 149980 }, { "epoch": 1.2665132676109856, "grad_norm": 0.05046302452683449, "learning_rate": 3.5672388635965293e-06, "loss": 0.0074, "step": 149990 }, { "epoch": 1.2665977074581494, "grad_norm": 0.40318718552589417, "learning_rate": 3.566532898716078e-06, "loss": 0.0077, "step": 150000 }, { "epoch": 1.2666821473053134, "grad_norm": 0.3778607249259949, "learning_rate": 3.565826964970237e-06, "loss": 0.005, "step": 150010 }, { "epoch": 1.2667665871524774, "grad_norm": 0.3616645932197571, "learning_rate": 3.5651210623743415e-06, "loss": 0.0076, "step": 150020 }, { "epoch": 1.2668510269996411, "grad_norm": 0.07975402474403381, "learning_rate": 3.564415190943723e-06, "loss": 0.0061, "step": 150030 }, { "epoch": 1.266935466846805, "grad_norm": 0.2683495879173279, "learning_rate": 3.56370935069371e-06, "loss": 0.0084, "step": 150040 }, { "epoch": 1.267019906693969, "grad_norm": 0.12803839147090912, "learning_rate": 3.5630035416396374e-06, "loss": 0.0055, "step": 150050 }, { "epoch": 1.2671043465411327, "grad_norm": 0.2245764136314392, "learning_rate": 3.5622977637968325e-06, "loss": 0.0077, "step": 150060 }, { "epoch": 1.2671887863882967, "grad_norm": 0.14695434272289276, "learning_rate": 3.561592017180626e-06, "loss": 0.0085, "step": 150070 }, { "epoch": 1.2672732262354605, "grad_norm": 0.18025553226470947, "learning_rate": 3.5608863018063455e-06, "loss": 0.0062, "step": 150080 }, { "epoch": 1.2673576660826245, "grad_norm": 0.3199561536312103, "learning_rate": 3.5601806176893206e-06, "loss": 0.008, "step": 150090 }, { "epoch": 1.2674421059297882, "grad_norm": 0.31702467799186707, "learning_rate": 3.5594749648448766e-06, "loss": 0.0067, "step": 150100 }, { "epoch": 1.2675265457769522, "grad_norm": 0.3371320962905884, "learning_rate": 3.558769343288343e-06, "loss": 0.0085, "step": 150110 }, { "epoch": 1.267610985624116, "grad_norm": 0.12332648783922195, "learning_rate": 3.558063753035041e-06, "loss": 0.007, "step": 150120 }, { "epoch": 1.2676954254712798, "grad_norm": 0.5937885642051697, "learning_rate": 3.557358194100301e-06, "loss": 0.0109, "step": 150130 }, { "epoch": 1.2677798653184438, "grad_norm": 0.2678877115249634, "learning_rate": 3.556652666499446e-06, "loss": 0.0104, "step": 150140 }, { "epoch": 1.2678643051656078, "grad_norm": 0.19200769066810608, "learning_rate": 3.5559471702477977e-06, "loss": 0.0071, "step": 150150 }, { "epoch": 1.2679487450127715, "grad_norm": 0.31292691826820374, "learning_rate": 3.5552417053606826e-06, "loss": 0.0053, "step": 150160 }, { "epoch": 1.2680331848599353, "grad_norm": 0.555230438709259, "learning_rate": 3.5545362718534187e-06, "loss": 0.0068, "step": 150170 }, { "epoch": 1.2681176247070993, "grad_norm": 0.2314891815185547, "learning_rate": 3.5538308697413345e-06, "loss": 0.0059, "step": 150180 }, { "epoch": 1.268202064554263, "grad_norm": 0.25512945652008057, "learning_rate": 3.553125499039744e-06, "loss": 0.0094, "step": 150190 }, { "epoch": 1.268286504401427, "grad_norm": 0.07908497750759125, "learning_rate": 3.552420159763973e-06, "loss": 0.006, "step": 150200 }, { "epoch": 1.2683709442485909, "grad_norm": 0.3936684727668762, "learning_rate": 3.5517148519293387e-06, "loss": 0.0125, "step": 150210 }, { "epoch": 1.2684553840957549, "grad_norm": 0.05324085056781769, "learning_rate": 3.5510095755511627e-06, "loss": 0.0129, "step": 150220 }, { "epoch": 1.2685398239429186, "grad_norm": 0.3335123062133789, "learning_rate": 3.5503043306447593e-06, "loss": 0.0136, "step": 150230 }, { "epoch": 1.2686242637900826, "grad_norm": 0.77519690990448, "learning_rate": 3.549599117225451e-06, "loss": 0.0117, "step": 150240 }, { "epoch": 1.2687087036372464, "grad_norm": 0.023422211408615112, "learning_rate": 3.548893935308552e-06, "loss": 0.0037, "step": 150250 }, { "epoch": 1.2687931434844102, "grad_norm": 0.07932368665933609, "learning_rate": 3.548188784909379e-06, "loss": 0.0088, "step": 150260 }, { "epoch": 1.2688775833315742, "grad_norm": 0.14585210382938385, "learning_rate": 3.547483666043249e-06, "loss": 0.0052, "step": 150270 }, { "epoch": 1.268962023178738, "grad_norm": 0.10724478214979172, "learning_rate": 3.5467785787254736e-06, "loss": 0.0071, "step": 150280 }, { "epoch": 1.269046463025902, "grad_norm": 0.14654281735420227, "learning_rate": 3.5460735229713716e-06, "loss": 0.0081, "step": 150290 }, { "epoch": 1.2691309028730657, "grad_norm": 0.7526417374610901, "learning_rate": 3.5453684987962544e-06, "loss": 0.0059, "step": 150300 }, { "epoch": 1.2692153427202297, "grad_norm": 0.31420961022377014, "learning_rate": 3.5446635062154366e-06, "loss": 0.008, "step": 150310 }, { "epoch": 1.2692997825673935, "grad_norm": 0.3229520916938782, "learning_rate": 3.543958545244226e-06, "loss": 0.0075, "step": 150320 }, { "epoch": 1.2693842224145575, "grad_norm": 0.7175201773643494, "learning_rate": 3.54325361589794e-06, "loss": 0.0074, "step": 150330 }, { "epoch": 1.2694686622617213, "grad_norm": 0.13715852797031403, "learning_rate": 3.5425487181918867e-06, "loss": 0.0075, "step": 150340 }, { "epoch": 1.269553102108885, "grad_norm": 0.5122551321983337, "learning_rate": 3.5418438521413765e-06, "loss": 0.0102, "step": 150350 }, { "epoch": 1.269637541956049, "grad_norm": 0.2149316370487213, "learning_rate": 3.541139017761718e-06, "loss": 0.0046, "step": 150360 }, { "epoch": 1.269721981803213, "grad_norm": 0.2540041506290436, "learning_rate": 3.5404342150682223e-06, "loss": 0.0061, "step": 150370 }, { "epoch": 1.2698064216503768, "grad_norm": 0.046066008508205414, "learning_rate": 3.5397294440761964e-06, "loss": 0.0036, "step": 150380 }, { "epoch": 1.2698908614975406, "grad_norm": 0.45951613783836365, "learning_rate": 3.5390247048009473e-06, "loss": 0.0031, "step": 150390 }, { "epoch": 1.2699753013447046, "grad_norm": 0.052016910165548325, "learning_rate": 3.538319997257783e-06, "loss": 0.0091, "step": 150400 }, { "epoch": 1.2700597411918684, "grad_norm": 0.028879495337605476, "learning_rate": 3.5376153214620067e-06, "loss": 0.0064, "step": 150410 }, { "epoch": 1.2701441810390324, "grad_norm": 0.20939765870571136, "learning_rate": 3.5369106774289282e-06, "loss": 0.0062, "step": 150420 }, { "epoch": 1.2702286208861961, "grad_norm": 0.3933534026145935, "learning_rate": 3.5362060651738484e-06, "loss": 0.0061, "step": 150430 }, { "epoch": 1.2703130607333601, "grad_norm": 0.14452636241912842, "learning_rate": 3.5355014847120745e-06, "loss": 0.0061, "step": 150440 }, { "epoch": 1.270397500580524, "grad_norm": 1.0850200653076172, "learning_rate": 3.5347969360589064e-06, "loss": 0.0069, "step": 150450 }, { "epoch": 1.270481940427688, "grad_norm": 0.4174882173538208, "learning_rate": 3.53409241922965e-06, "loss": 0.0077, "step": 150460 }, { "epoch": 1.2705663802748517, "grad_norm": 0.1591835469007492, "learning_rate": 3.533387934239605e-06, "loss": 0.0053, "step": 150470 }, { "epoch": 1.2706508201220155, "grad_norm": 0.10059861838817596, "learning_rate": 3.5326834811040743e-06, "loss": 0.009, "step": 150480 }, { "epoch": 1.2707352599691795, "grad_norm": 0.2564837336540222, "learning_rate": 3.5319790598383587e-06, "loss": 0.0051, "step": 150490 }, { "epoch": 1.2708196998163435, "grad_norm": 0.17470782995224, "learning_rate": 3.531274670457755e-06, "loss": 0.014, "step": 150500 }, { "epoch": 1.2709041396635072, "grad_norm": 0.6273664832115173, "learning_rate": 3.530570312977566e-06, "loss": 0.0106, "step": 150510 }, { "epoch": 1.270988579510671, "grad_norm": 0.18207433819770813, "learning_rate": 3.5298659874130864e-06, "loss": 0.0069, "step": 150520 }, { "epoch": 1.271073019357835, "grad_norm": 0.32318857312202454, "learning_rate": 3.5291616937796187e-06, "loss": 0.0074, "step": 150530 }, { "epoch": 1.2711574592049988, "grad_norm": 0.8468876481056213, "learning_rate": 3.5284574320924557e-06, "loss": 0.0077, "step": 150540 }, { "epoch": 1.2712418990521628, "grad_norm": 0.044618960469961166, "learning_rate": 3.527753202366898e-06, "loss": 0.005, "step": 150550 }, { "epoch": 1.2713263388993266, "grad_norm": 0.3847184181213379, "learning_rate": 3.527049004618236e-06, "loss": 0.0107, "step": 150560 }, { "epoch": 1.2714107787464903, "grad_norm": 0.2045937180519104, "learning_rate": 3.5263448388617703e-06, "loss": 0.0047, "step": 150570 }, { "epoch": 1.2714952185936543, "grad_norm": 0.12542687356472015, "learning_rate": 3.5256407051127917e-06, "loss": 0.0052, "step": 150580 }, { "epoch": 1.2715796584408183, "grad_norm": 0.0018750735325738788, "learning_rate": 3.5249366033865962e-06, "loss": 0.006, "step": 150590 }, { "epoch": 1.271664098287982, "grad_norm": 0.38201436400413513, "learning_rate": 3.524232533698473e-06, "loss": 0.0048, "step": 150600 }, { "epoch": 1.2717485381351459, "grad_norm": 0.312604159116745, "learning_rate": 3.5235284960637183e-06, "loss": 0.0081, "step": 150610 }, { "epoch": 1.2718329779823099, "grad_norm": 0.04870343208312988, "learning_rate": 3.5228244904976226e-06, "loss": 0.007, "step": 150620 }, { "epoch": 1.2719174178294737, "grad_norm": 0.795330822467804, "learning_rate": 3.522120517015476e-06, "loss": 0.0054, "step": 150630 }, { "epoch": 1.2720018576766376, "grad_norm": 0.07755855470895767, "learning_rate": 3.5214165756325692e-06, "loss": 0.0061, "step": 150640 }, { "epoch": 1.2720862975238014, "grad_norm": 0.07940079271793365, "learning_rate": 3.520712666364189e-06, "loss": 0.0051, "step": 150650 }, { "epoch": 1.2721707373709654, "grad_norm": 0.31574878096580505, "learning_rate": 3.520008789225629e-06, "loss": 0.0097, "step": 150660 }, { "epoch": 1.2722551772181292, "grad_norm": 0.055841200053691864, "learning_rate": 3.5193049442321735e-06, "loss": 0.0052, "step": 150670 }, { "epoch": 1.2723396170652932, "grad_norm": 0.08003102988004684, "learning_rate": 3.5186011313991127e-06, "loss": 0.0029, "step": 150680 }, { "epoch": 1.272424056912457, "grad_norm": 0.19908879697322845, "learning_rate": 3.517897350741729e-06, "loss": 0.007, "step": 150690 }, { "epoch": 1.2725084967596207, "grad_norm": 0.21455788612365723, "learning_rate": 3.517193602275314e-06, "loss": 0.0081, "step": 150700 }, { "epoch": 1.2725929366067847, "grad_norm": 0.32356780767440796, "learning_rate": 3.516489886015149e-06, "loss": 0.011, "step": 150710 }, { "epoch": 1.2726773764539487, "grad_norm": 0.2409028857946396, "learning_rate": 3.51578620197652e-06, "loss": 0.0084, "step": 150720 }, { "epoch": 1.2727618163011125, "grad_norm": 0.17261433601379395, "learning_rate": 3.515082550174711e-06, "loss": 0.0082, "step": 150730 }, { "epoch": 1.2728462561482763, "grad_norm": 0.2762826383113861, "learning_rate": 3.514378930625003e-06, "loss": 0.0059, "step": 150740 }, { "epoch": 1.2729306959954403, "grad_norm": 0.41354700922966003, "learning_rate": 3.513675343342682e-06, "loss": 0.0071, "step": 150750 }, { "epoch": 1.273015135842604, "grad_norm": 0.12686678767204285, "learning_rate": 3.5129717883430266e-06, "loss": 0.0026, "step": 150760 }, { "epoch": 1.273099575689768, "grad_norm": 0.06799181550741196, "learning_rate": 3.51226826564132e-06, "loss": 0.0092, "step": 150770 }, { "epoch": 1.2731840155369318, "grad_norm": 0.30409011244773865, "learning_rate": 3.5115647752528417e-06, "loss": 0.0069, "step": 150780 }, { "epoch": 1.2732684553840958, "grad_norm": 0.17376448214054108, "learning_rate": 3.510861317192872e-06, "loss": 0.0066, "step": 150790 }, { "epoch": 1.2733528952312596, "grad_norm": 0.20518851280212402, "learning_rate": 3.510157891476688e-06, "loss": 0.0064, "step": 150800 }, { "epoch": 1.2734373350784236, "grad_norm": 0.3274388611316681, "learning_rate": 3.50945449811957e-06, "loss": 0.0082, "step": 150810 }, { "epoch": 1.2735217749255874, "grad_norm": 0.2944627106189728, "learning_rate": 3.5087511371367944e-06, "loss": 0.0062, "step": 150820 }, { "epoch": 1.2736062147727512, "grad_norm": 0.3395056426525116, "learning_rate": 3.5080478085436396e-06, "loss": 0.0096, "step": 150830 }, { "epoch": 1.2736906546199152, "grad_norm": 0.15408241748809814, "learning_rate": 3.5073445123553807e-06, "loss": 0.0052, "step": 150840 }, { "epoch": 1.2737750944670791, "grad_norm": 0.24941784143447876, "learning_rate": 3.5066412485872913e-06, "loss": 0.0068, "step": 150850 }, { "epoch": 1.273859534314243, "grad_norm": 0.18323372304439545, "learning_rate": 3.5059380172546498e-06, "loss": 0.004, "step": 150860 }, { "epoch": 1.2739439741614067, "grad_norm": 0.24832288920879364, "learning_rate": 3.5052348183727273e-06, "loss": 0.0075, "step": 150870 }, { "epoch": 1.2740284140085707, "grad_norm": 0.10598837584257126, "learning_rate": 3.5045316519567997e-06, "loss": 0.0063, "step": 150880 }, { "epoch": 1.2741128538557345, "grad_norm": 0.10539120435714722, "learning_rate": 3.5038285180221352e-06, "loss": 0.007, "step": 150890 }, { "epoch": 1.2741972937028985, "grad_norm": 0.001410938217304647, "learning_rate": 3.5031254165840117e-06, "loss": 0.0054, "step": 150900 }, { "epoch": 1.2742817335500622, "grad_norm": 0.7272210121154785, "learning_rate": 3.5024223476576957e-06, "loss": 0.0112, "step": 150910 }, { "epoch": 1.274366173397226, "grad_norm": 0.11124993860721588, "learning_rate": 3.5017193112584612e-06, "loss": 0.0068, "step": 150920 }, { "epoch": 1.27445061324439, "grad_norm": 0.12601028382778168, "learning_rate": 3.5010163074015737e-06, "loss": 0.0082, "step": 150930 }, { "epoch": 1.274535053091554, "grad_norm": 0.15068913996219635, "learning_rate": 3.5003133361023066e-06, "loss": 0.0059, "step": 150940 }, { "epoch": 1.2746194929387178, "grad_norm": 0.033395469188690186, "learning_rate": 3.499610397375926e-06, "loss": 0.0044, "step": 150950 }, { "epoch": 1.2747039327858816, "grad_norm": 0.08882565051317215, "learning_rate": 3.498907491237701e-06, "loss": 0.0036, "step": 150960 }, { "epoch": 1.2747883726330456, "grad_norm": 0.2913833260536194, "learning_rate": 3.498204617702898e-06, "loss": 0.0051, "step": 150970 }, { "epoch": 1.2748728124802093, "grad_norm": 0.41979679465293884, "learning_rate": 3.49750177678678e-06, "loss": 0.0101, "step": 150980 }, { "epoch": 1.2749572523273733, "grad_norm": 0.595072329044342, "learning_rate": 3.4967989685046184e-06, "loss": 0.0066, "step": 150990 }, { "epoch": 1.2750416921745371, "grad_norm": 0.1554138958454132, "learning_rate": 3.496096192871674e-06, "loss": 0.0092, "step": 151000 }, { "epoch": 1.275126132021701, "grad_norm": 0.14153485000133514, "learning_rate": 3.4953934499032127e-06, "loss": 0.0069, "step": 151010 }, { "epoch": 1.2752105718688649, "grad_norm": 0.12593676149845123, "learning_rate": 3.494690739614496e-06, "loss": 0.0079, "step": 151020 }, { "epoch": 1.2752950117160289, "grad_norm": 0.11307158321142197, "learning_rate": 3.4939880620207896e-06, "loss": 0.0054, "step": 151030 }, { "epoch": 1.2753794515631927, "grad_norm": 0.02256188355386257, "learning_rate": 3.4932854171373528e-06, "loss": 0.0164, "step": 151040 }, { "epoch": 1.2754638914103564, "grad_norm": 0.150850310921669, "learning_rate": 3.4925828049794485e-06, "loss": 0.0066, "step": 151050 }, { "epoch": 1.2755483312575204, "grad_norm": 0.21887460350990295, "learning_rate": 3.4918802255623356e-06, "loss": 0.0044, "step": 151060 }, { "epoch": 1.2756327711046844, "grad_norm": 0.007824013940989971, "learning_rate": 3.491177678901278e-06, "loss": 0.0081, "step": 151070 }, { "epoch": 1.2757172109518482, "grad_norm": 0.06370009481906891, "learning_rate": 3.490475165011531e-06, "loss": 0.0039, "step": 151080 }, { "epoch": 1.275801650799012, "grad_norm": 0.42410212755203247, "learning_rate": 3.4897726839083523e-06, "loss": 0.0068, "step": 151090 }, { "epoch": 1.275886090646176, "grad_norm": 0.07100226730108261, "learning_rate": 3.4890702356070028e-06, "loss": 0.0093, "step": 151100 }, { "epoch": 1.2759705304933397, "grad_norm": 0.23616984486579895, "learning_rate": 3.4883678201227377e-06, "loss": 0.0072, "step": 151110 }, { "epoch": 1.2760549703405037, "grad_norm": 0.09539835155010223, "learning_rate": 3.487665437470814e-06, "loss": 0.0076, "step": 151120 }, { "epoch": 1.2761394101876675, "grad_norm": 0.36319008469581604, "learning_rate": 3.486963087666486e-06, "loss": 0.0058, "step": 151130 }, { "epoch": 1.2762238500348315, "grad_norm": 0.3741146922111511, "learning_rate": 3.4862607707250117e-06, "loss": 0.0107, "step": 151140 }, { "epoch": 1.2763082898819953, "grad_norm": 0.15432456135749817, "learning_rate": 3.4855584866616417e-06, "loss": 0.0065, "step": 151150 }, { "epoch": 1.2763927297291593, "grad_norm": 0.3599952161312103, "learning_rate": 3.4848562354916328e-06, "loss": 0.0078, "step": 151160 }, { "epoch": 1.276477169576323, "grad_norm": 0.0625666156411171, "learning_rate": 3.484154017230233e-06, "loss": 0.0068, "step": 151170 }, { "epoch": 1.2765616094234868, "grad_norm": 0.32072097063064575, "learning_rate": 3.4834518318926995e-06, "loss": 0.0069, "step": 151180 }, { "epoch": 1.2766460492706508, "grad_norm": 0.29016777873039246, "learning_rate": 3.4827496794942806e-06, "loss": 0.0095, "step": 151190 }, { "epoch": 1.2767304891178146, "grad_norm": 0.8384634852409363, "learning_rate": 3.482047560050229e-06, "loss": 0.0042, "step": 151200 }, { "epoch": 1.2768149289649786, "grad_norm": 0.6512857675552368, "learning_rate": 3.4813454735757936e-06, "loss": 0.0151, "step": 151210 }, { "epoch": 1.2768993688121424, "grad_norm": 1.2470389604568481, "learning_rate": 3.4806434200862205e-06, "loss": 0.0083, "step": 151220 }, { "epoch": 1.2769838086593064, "grad_norm": 0.14327557384967804, "learning_rate": 3.4799413995967634e-06, "loss": 0.0071, "step": 151230 }, { "epoch": 1.2770682485064702, "grad_norm": 0.1808265745639801, "learning_rate": 3.4792394121226668e-06, "loss": 0.0079, "step": 151240 }, { "epoch": 1.2771526883536342, "grad_norm": 0.03725035488605499, "learning_rate": 3.478537457679179e-06, "loss": 0.0093, "step": 151250 }, { "epoch": 1.277237128200798, "grad_norm": 0.5715909004211426, "learning_rate": 3.477835536281544e-06, "loss": 0.0075, "step": 151260 }, { "epoch": 1.2773215680479617, "grad_norm": 0.3288825452327728, "learning_rate": 3.4771336479450117e-06, "loss": 0.0061, "step": 151270 }, { "epoch": 1.2774060078951257, "grad_norm": 0.30098602175712585, "learning_rate": 3.4764317926848236e-06, "loss": 0.0126, "step": 151280 }, { "epoch": 1.2774904477422897, "grad_norm": 0.3333823084831238, "learning_rate": 3.475729970516225e-06, "loss": 0.0044, "step": 151290 }, { "epoch": 1.2775748875894535, "grad_norm": 0.3581922650337219, "learning_rate": 3.475028181454457e-06, "loss": 0.0045, "step": 151300 }, { "epoch": 1.2776593274366173, "grad_norm": 0.04692353680729866, "learning_rate": 3.474326425514767e-06, "loss": 0.0042, "step": 151310 }, { "epoch": 1.2777437672837813, "grad_norm": 0.409100741147995, "learning_rate": 3.4736247027123946e-06, "loss": 0.0084, "step": 151320 }, { "epoch": 1.277828207130945, "grad_norm": 0.44434136152267456, "learning_rate": 3.472923013062579e-06, "loss": 0.0063, "step": 151330 }, { "epoch": 1.277912646978109, "grad_norm": 0.6111698746681213, "learning_rate": 3.472221356580565e-06, "loss": 0.0078, "step": 151340 }, { "epoch": 1.2779970868252728, "grad_norm": 0.5327398777008057, "learning_rate": 3.4715197332815864e-06, "loss": 0.008, "step": 151350 }, { "epoch": 1.2780815266724368, "grad_norm": 0.2605096101760864, "learning_rate": 3.47081814318089e-06, "loss": 0.0112, "step": 151360 }, { "epoch": 1.2781659665196006, "grad_norm": 0.14800317585468292, "learning_rate": 3.470116586293707e-06, "loss": 0.0044, "step": 151370 }, { "epoch": 1.2782504063667646, "grad_norm": 0.31336888670921326, "learning_rate": 3.46941506263528e-06, "loss": 0.0109, "step": 151380 }, { "epoch": 1.2783348462139283, "grad_norm": 0.3803679049015045, "learning_rate": 3.4687135722208435e-06, "loss": 0.0085, "step": 151390 }, { "epoch": 1.2784192860610921, "grad_norm": 0.06449399888515472, "learning_rate": 3.4680121150656354e-06, "loss": 0.0052, "step": 151400 }, { "epoch": 1.2785037259082561, "grad_norm": 0.18335828185081482, "learning_rate": 3.467310691184888e-06, "loss": 0.0059, "step": 151410 }, { "epoch": 1.2785881657554201, "grad_norm": 0.40833592414855957, "learning_rate": 3.4666093005938407e-06, "loss": 0.0054, "step": 151420 }, { "epoch": 1.278672605602584, "grad_norm": 0.4034646153450012, "learning_rate": 3.4659079433077235e-06, "loss": 0.0085, "step": 151430 }, { "epoch": 1.2787570454497477, "grad_norm": 0.28217971324920654, "learning_rate": 3.4652066193417734e-06, "loss": 0.0151, "step": 151440 }, { "epoch": 1.2788414852969117, "grad_norm": 0.2695423364639282, "learning_rate": 3.46450532871122e-06, "loss": 0.0067, "step": 151450 }, { "epoch": 1.2789259251440754, "grad_norm": 0.040528666228055954, "learning_rate": 3.463804071431295e-06, "loss": 0.0028, "step": 151460 }, { "epoch": 1.2790103649912394, "grad_norm": 0.4194020628929138, "learning_rate": 3.4631028475172324e-06, "loss": 0.0079, "step": 151470 }, { "epoch": 1.2790948048384032, "grad_norm": 0.1690007895231247, "learning_rate": 3.46240165698426e-06, "loss": 0.0073, "step": 151480 }, { "epoch": 1.279179244685567, "grad_norm": 0.1463276743888855, "learning_rate": 3.4617004998476094e-06, "loss": 0.006, "step": 151490 }, { "epoch": 1.279263684532731, "grad_norm": 0.031341757625341415, "learning_rate": 3.460999376122507e-06, "loss": 0.0052, "step": 151500 }, { "epoch": 1.279348124379895, "grad_norm": 0.2817232608795166, "learning_rate": 3.4602982858241844e-06, "loss": 0.0128, "step": 151510 }, { "epoch": 1.2794325642270588, "grad_norm": 0.3669685423374176, "learning_rate": 3.459597228967867e-06, "loss": 0.0074, "step": 151520 }, { "epoch": 1.2795170040742225, "grad_norm": 0.16101282835006714, "learning_rate": 3.4588962055687823e-06, "loss": 0.0095, "step": 151530 }, { "epoch": 1.2796014439213865, "grad_norm": 0.2870804965496063, "learning_rate": 3.458195215642155e-06, "loss": 0.004, "step": 151540 }, { "epoch": 1.2796858837685503, "grad_norm": 0.09513911604881287, "learning_rate": 3.4574942592032133e-06, "loss": 0.0093, "step": 151550 }, { "epoch": 1.2797703236157143, "grad_norm": 0.11193698644638062, "learning_rate": 3.4567933362671814e-06, "loss": 0.0057, "step": 151560 }, { "epoch": 1.279854763462878, "grad_norm": 0.05032092332839966, "learning_rate": 3.4560924468492794e-06, "loss": 0.0056, "step": 151570 }, { "epoch": 1.279939203310042, "grad_norm": 0.2863766551017761, "learning_rate": 3.455391590964735e-06, "loss": 0.0044, "step": 151580 }, { "epoch": 1.2800236431572058, "grad_norm": 0.5018291473388672, "learning_rate": 3.4546907686287662e-06, "loss": 0.0104, "step": 151590 }, { "epoch": 1.2801080830043698, "grad_norm": 0.04800565540790558, "learning_rate": 3.453989979856599e-06, "loss": 0.0039, "step": 151600 }, { "epoch": 1.2801925228515336, "grad_norm": 0.29935207962989807, "learning_rate": 3.453289224663452e-06, "loss": 0.0055, "step": 151610 }, { "epoch": 1.2802769626986974, "grad_norm": 0.0391044057905674, "learning_rate": 3.452588503064548e-06, "loss": 0.0063, "step": 151620 }, { "epoch": 1.2803614025458614, "grad_norm": 0.23261398077011108, "learning_rate": 3.4518878150751012e-06, "loss": 0.0089, "step": 151630 }, { "epoch": 1.2804458423930254, "grad_norm": 0.26182159781455994, "learning_rate": 3.4511871607103363e-06, "loss": 0.0082, "step": 151640 }, { "epoch": 1.2805302822401892, "grad_norm": 0.2275502234697342, "learning_rate": 3.450486539985468e-06, "loss": 0.0078, "step": 151650 }, { "epoch": 1.280614722087353, "grad_norm": 0.47907793521881104, "learning_rate": 3.4497859529157157e-06, "loss": 0.0078, "step": 151660 }, { "epoch": 1.280699161934517, "grad_norm": 0.35272103548049927, "learning_rate": 3.4490853995162943e-06, "loss": 0.0065, "step": 151670 }, { "epoch": 1.2807836017816807, "grad_norm": 0.2924591600894928, "learning_rate": 3.44838487980242e-06, "loss": 0.0049, "step": 151680 }, { "epoch": 1.2808680416288447, "grad_norm": 0.34515562653541565, "learning_rate": 3.4476843937893086e-06, "loss": 0.0073, "step": 151690 }, { "epoch": 1.2809524814760085, "grad_norm": 0.2446533441543579, "learning_rate": 3.4469839414921726e-06, "loss": 0.0059, "step": 151700 }, { "epoch": 1.2810369213231725, "grad_norm": 0.3277856409549713, "learning_rate": 3.446283522926228e-06, "loss": 0.0101, "step": 151710 }, { "epoch": 1.2811213611703363, "grad_norm": 0.14584049582481384, "learning_rate": 3.445583138106687e-06, "loss": 0.007, "step": 151720 }, { "epoch": 1.2812058010175003, "grad_norm": 0.1688712239265442, "learning_rate": 3.444882787048762e-06, "loss": 0.0083, "step": 151730 }, { "epoch": 1.281290240864664, "grad_norm": 0.1854734867811203, "learning_rate": 3.444182469767663e-06, "loss": 0.0056, "step": 151740 }, { "epoch": 1.2813746807118278, "grad_norm": 0.2264624536037445, "learning_rate": 3.443482186278603e-06, "loss": 0.015, "step": 151750 }, { "epoch": 1.2814591205589918, "grad_norm": 0.11415701359510422, "learning_rate": 3.4427819365967906e-06, "loss": 0.0069, "step": 151760 }, { "epoch": 1.2815435604061558, "grad_norm": 0.1823134571313858, "learning_rate": 3.4420817207374356e-06, "loss": 0.0049, "step": 151770 }, { "epoch": 1.2816280002533196, "grad_norm": 0.19921855628490448, "learning_rate": 3.4413815387157457e-06, "loss": 0.0093, "step": 151780 }, { "epoch": 1.2817124401004834, "grad_norm": 0.25082266330718994, "learning_rate": 3.4406813905469304e-06, "loss": 0.0044, "step": 151790 }, { "epoch": 1.2817968799476473, "grad_norm": 0.42999130487442017, "learning_rate": 3.439981276246196e-06, "loss": 0.0069, "step": 151800 }, { "epoch": 1.2818813197948111, "grad_norm": 0.4708923399448395, "learning_rate": 3.4392811958287475e-06, "loss": 0.0071, "step": 151810 }, { "epoch": 1.2819657596419751, "grad_norm": 0.15187005698680878, "learning_rate": 3.438581149309793e-06, "loss": 0.0068, "step": 151820 }, { "epoch": 1.282050199489139, "grad_norm": 0.8972363471984863, "learning_rate": 3.437881136704534e-06, "loss": 0.0071, "step": 151830 }, { "epoch": 1.2821346393363027, "grad_norm": 0.49746188521385193, "learning_rate": 3.4371811580281778e-06, "loss": 0.01, "step": 151840 }, { "epoch": 1.2822190791834667, "grad_norm": 0.3158482015132904, "learning_rate": 3.436481213295926e-06, "loss": 0.0052, "step": 151850 }, { "epoch": 1.2823035190306307, "grad_norm": 0.4794590473175049, "learning_rate": 3.4357813025229835e-06, "loss": 0.0083, "step": 151860 }, { "epoch": 1.2823879588777944, "grad_norm": 0.9095807671546936, "learning_rate": 3.435081425724548e-06, "loss": 0.0092, "step": 151870 }, { "epoch": 1.2824723987249582, "grad_norm": 0.22040563821792603, "learning_rate": 3.4343815829158256e-06, "loss": 0.0052, "step": 151880 }, { "epoch": 1.2825568385721222, "grad_norm": 0.2910490036010742, "learning_rate": 3.4336817741120125e-06, "loss": 0.0134, "step": 151890 }, { "epoch": 1.282641278419286, "grad_norm": 0.3621954321861267, "learning_rate": 3.432981999328312e-06, "loss": 0.0109, "step": 151900 }, { "epoch": 1.28272571826645, "grad_norm": 0.006247181911021471, "learning_rate": 3.4322822585799215e-06, "loss": 0.0079, "step": 151910 }, { "epoch": 1.2828101581136138, "grad_norm": 0.2538314163684845, "learning_rate": 3.431582551882037e-06, "loss": 0.0078, "step": 151920 }, { "epoch": 1.2828945979607778, "grad_norm": 0.0007464221562258899, "learning_rate": 3.4308828792498594e-06, "loss": 0.0048, "step": 151930 }, { "epoch": 1.2829790378079415, "grad_norm": 0.3365691304206848, "learning_rate": 3.4301832406985836e-06, "loss": 0.0088, "step": 151940 }, { "epoch": 1.2830634776551055, "grad_norm": 0.1478092223405838, "learning_rate": 3.4294836362434075e-06, "loss": 0.0094, "step": 151950 }, { "epoch": 1.2831479175022693, "grad_norm": 0.13964195549488068, "learning_rate": 3.428784065899523e-06, "loss": 0.0099, "step": 151960 }, { "epoch": 1.283232357349433, "grad_norm": 0.2726520299911499, "learning_rate": 3.428084529682129e-06, "loss": 0.0088, "step": 151970 }, { "epoch": 1.283316797196597, "grad_norm": 0.4122368097305298, "learning_rate": 3.427385027606414e-06, "loss": 0.007, "step": 151980 }, { "epoch": 1.283401237043761, "grad_norm": 0.44215384125709534, "learning_rate": 3.426685559687576e-06, "loss": 0.0061, "step": 151990 }, { "epoch": 1.2834856768909249, "grad_norm": 0.04218823462724686, "learning_rate": 3.4259861259408044e-06, "loss": 0.0057, "step": 152000 }, { "epoch": 1.2835701167380886, "grad_norm": 0.256024569272995, "learning_rate": 3.4252867263812927e-06, "loss": 0.0053, "step": 152010 }, { "epoch": 1.2836545565852526, "grad_norm": 0.5948032736778259, "learning_rate": 3.4245873610242285e-06, "loss": 0.0058, "step": 152020 }, { "epoch": 1.2837389964324164, "grad_norm": 0.003599934745579958, "learning_rate": 3.423888029884806e-06, "loss": 0.0086, "step": 152030 }, { "epoch": 1.2838234362795804, "grad_norm": 0.09948273748159409, "learning_rate": 3.423188732978213e-06, "loss": 0.0111, "step": 152040 }, { "epoch": 1.2839078761267442, "grad_norm": 0.22101064026355743, "learning_rate": 3.4224894703196355e-06, "loss": 0.0101, "step": 152050 }, { "epoch": 1.283992315973908, "grad_norm": 0.9722509384155273, "learning_rate": 3.4217902419242655e-06, "loss": 0.0079, "step": 152060 }, { "epoch": 1.284076755821072, "grad_norm": 0.0930616706609726, "learning_rate": 3.421091047807285e-06, "loss": 0.0056, "step": 152070 }, { "epoch": 1.284161195668236, "grad_norm": 0.1858484297990799, "learning_rate": 3.420391887983886e-06, "loss": 0.0108, "step": 152080 }, { "epoch": 1.2842456355153997, "grad_norm": 0.12241475284099579, "learning_rate": 3.4196927624692506e-06, "loss": 0.0031, "step": 152090 }, { "epoch": 1.2843300753625635, "grad_norm": 0.17661653459072113, "learning_rate": 3.4189936712785652e-06, "loss": 0.012, "step": 152100 }, { "epoch": 1.2844145152097275, "grad_norm": 0.1733456254005432, "learning_rate": 3.4182946144270114e-06, "loss": 0.005, "step": 152110 }, { "epoch": 1.2844989550568913, "grad_norm": 0.40887752175331116, "learning_rate": 3.4175955919297765e-06, "loss": 0.0063, "step": 152120 }, { "epoch": 1.2845833949040553, "grad_norm": 0.6020039916038513, "learning_rate": 3.41689660380204e-06, "loss": 0.0131, "step": 152130 }, { "epoch": 1.284667834751219, "grad_norm": 0.3291516602039337, "learning_rate": 3.4161976500589867e-06, "loss": 0.0092, "step": 152140 }, { "epoch": 1.284752274598383, "grad_norm": 0.5997399091720581, "learning_rate": 3.415498730715795e-06, "loss": 0.0094, "step": 152150 }, { "epoch": 1.2848367144455468, "grad_norm": 0.5093158483505249, "learning_rate": 3.414799845787644e-06, "loss": 0.0121, "step": 152160 }, { "epoch": 1.2849211542927108, "grad_norm": 0.01772618666291237, "learning_rate": 3.4141009952897183e-06, "loss": 0.0047, "step": 152170 }, { "epoch": 1.2850055941398746, "grad_norm": 0.6989609599113464, "learning_rate": 3.413402179237192e-06, "loss": 0.0093, "step": 152180 }, { "epoch": 1.2850900339870384, "grad_norm": 0.19129985570907593, "learning_rate": 3.412703397645247e-06, "loss": 0.0091, "step": 152190 }, { "epoch": 1.2851744738342024, "grad_norm": 0.290047824382782, "learning_rate": 3.4120046505290566e-06, "loss": 0.0041, "step": 152200 }, { "epoch": 1.2852589136813664, "grad_norm": 0.2349829226732254, "learning_rate": 3.411305937903802e-06, "loss": 0.0082, "step": 152210 }, { "epoch": 1.2853433535285301, "grad_norm": 0.006834017112851143, "learning_rate": 3.4106072597846555e-06, "loss": 0.0031, "step": 152220 }, { "epoch": 1.285427793375694, "grad_norm": 0.17989492416381836, "learning_rate": 3.4099086161867954e-06, "loss": 0.0078, "step": 152230 }, { "epoch": 1.285512233222858, "grad_norm": 0.2580026388168335, "learning_rate": 3.409210007125392e-06, "loss": 0.0046, "step": 152240 }, { "epoch": 1.2855966730700217, "grad_norm": 0.21046893298625946, "learning_rate": 3.4085114326156227e-06, "loss": 0.0055, "step": 152250 }, { "epoch": 1.2856811129171857, "grad_norm": 0.22063465416431427, "learning_rate": 3.4078128926726606e-06, "loss": 0.0076, "step": 152260 }, { "epoch": 1.2857655527643495, "grad_norm": 0.39357221126556396, "learning_rate": 3.407114387311673e-06, "loss": 0.0032, "step": 152270 }, { "epoch": 1.2858499926115134, "grad_norm": 0.15158629417419434, "learning_rate": 3.406415916547836e-06, "loss": 0.0055, "step": 152280 }, { "epoch": 1.2859344324586772, "grad_norm": 0.07592277228832245, "learning_rate": 3.4057174803963184e-06, "loss": 0.0064, "step": 152290 }, { "epoch": 1.2860188723058412, "grad_norm": 0.3442671000957489, "learning_rate": 3.4050190788722914e-06, "loss": 0.0038, "step": 152300 }, { "epoch": 1.286103312153005, "grad_norm": 0.18954478204250336, "learning_rate": 3.4043207119909206e-06, "loss": 0.005, "step": 152310 }, { "epoch": 1.2861877520001688, "grad_norm": 0.19228532910346985, "learning_rate": 3.4036223797673795e-06, "loss": 0.0044, "step": 152320 }, { "epoch": 1.2862721918473328, "grad_norm": 0.23297818005084991, "learning_rate": 3.402924082216832e-06, "loss": 0.014, "step": 152330 }, { "epoch": 1.2863566316944968, "grad_norm": 0.14320340752601624, "learning_rate": 3.402225819354447e-06, "loss": 0.0029, "step": 152340 }, { "epoch": 1.2864410715416605, "grad_norm": 0.1767001897096634, "learning_rate": 3.4015275911953883e-06, "loss": 0.008, "step": 152350 }, { "epoch": 1.2865255113888243, "grad_norm": 0.3051753640174866, "learning_rate": 3.400829397754824e-06, "loss": 0.0075, "step": 152360 }, { "epoch": 1.2866099512359883, "grad_norm": 0.17550291121006012, "learning_rate": 3.400131239047917e-06, "loss": 0.0044, "step": 152370 }, { "epoch": 1.286694391083152, "grad_norm": 0.27510911226272583, "learning_rate": 3.3994331150898325e-06, "loss": 0.007, "step": 152380 }, { "epoch": 1.286778830930316, "grad_norm": 0.2041187733411789, "learning_rate": 3.398735025895733e-06, "loss": 0.0102, "step": 152390 }, { "epoch": 1.2868632707774799, "grad_norm": 0.1786506623029709, "learning_rate": 3.3980369714807783e-06, "loss": 0.0135, "step": 152400 }, { "epoch": 1.2869477106246436, "grad_norm": 0.5806155800819397, "learning_rate": 3.397338951860134e-06, "loss": 0.0064, "step": 152410 }, { "epoch": 1.2870321504718076, "grad_norm": 0.30519944429397583, "learning_rate": 3.396640967048959e-06, "loss": 0.0045, "step": 152420 }, { "epoch": 1.2871165903189716, "grad_norm": 0.38529855012893677, "learning_rate": 3.395943017062414e-06, "loss": 0.0048, "step": 152430 }, { "epoch": 1.2872010301661354, "grad_norm": 0.19227421283721924, "learning_rate": 3.3952451019156563e-06, "loss": 0.0081, "step": 152440 }, { "epoch": 1.2872854700132992, "grad_norm": 0.06448739767074585, "learning_rate": 3.3945472216238486e-06, "loss": 0.0076, "step": 152450 }, { "epoch": 1.2873699098604632, "grad_norm": 0.04460010677576065, "learning_rate": 3.393849376202144e-06, "loss": 0.0045, "step": 152460 }, { "epoch": 1.287454349707627, "grad_norm": 0.27194491028785706, "learning_rate": 3.393151565665704e-06, "loss": 0.0103, "step": 152470 }, { "epoch": 1.287538789554791, "grad_norm": 0.012335469014942646, "learning_rate": 3.39245379002968e-06, "loss": 0.0076, "step": 152480 }, { "epoch": 1.2876232294019547, "grad_norm": 0.25076761841773987, "learning_rate": 3.3917560493092327e-06, "loss": 0.005, "step": 152490 }, { "epoch": 1.2877076692491187, "grad_norm": 0.45697152614593506, "learning_rate": 3.3910583435195143e-06, "loss": 0.0093, "step": 152500 }, { "epoch": 1.2877921090962825, "grad_norm": 0.18926629424095154, "learning_rate": 3.3903606726756776e-06, "loss": 0.0055, "step": 152510 }, { "epoch": 1.2878765489434465, "grad_norm": 0.3740360736846924, "learning_rate": 3.389663036792879e-06, "loss": 0.0075, "step": 152520 }, { "epoch": 1.2879609887906103, "grad_norm": 0.15928468108177185, "learning_rate": 3.388965435886267e-06, "loss": 0.008, "step": 152530 }, { "epoch": 1.288045428637774, "grad_norm": 0.037403058260679245, "learning_rate": 3.388267869970997e-06, "loss": 0.0102, "step": 152540 }, { "epoch": 1.288129868484938, "grad_norm": 0.1352992206811905, "learning_rate": 3.3875703390622184e-06, "loss": 0.0054, "step": 152550 }, { "epoch": 1.288214308332102, "grad_norm": 0.261599600315094, "learning_rate": 3.386872843175082e-06, "loss": 0.0093, "step": 152560 }, { "epoch": 1.2882987481792658, "grad_norm": 0.1839856505393982, "learning_rate": 3.3861753823247356e-06, "loss": 0.0097, "step": 152570 }, { "epoch": 1.2883831880264296, "grad_norm": 0.22317814826965332, "learning_rate": 3.385477956526332e-06, "loss": 0.0046, "step": 152580 }, { "epoch": 1.2884676278735936, "grad_norm": 0.0034927360247820616, "learning_rate": 3.3847805657950127e-06, "loss": 0.0073, "step": 152590 }, { "epoch": 1.2885520677207574, "grad_norm": 0.7507195472717285, "learning_rate": 3.384083210145931e-06, "loss": 0.0073, "step": 152600 }, { "epoch": 1.2886365075679214, "grad_norm": 0.36036497354507446, "learning_rate": 3.3833858895942296e-06, "loss": 0.0155, "step": 152610 }, { "epoch": 1.2887209474150851, "grad_norm": 0.15701799094676971, "learning_rate": 3.3826886041550566e-06, "loss": 0.0049, "step": 152620 }, { "epoch": 1.2888053872622491, "grad_norm": 0.29573723673820496, "learning_rate": 3.3819913538435557e-06, "loss": 0.0078, "step": 152630 }, { "epoch": 1.288889827109413, "grad_norm": 0.10924335569143295, "learning_rate": 3.3812941386748687e-06, "loss": 0.0128, "step": 152640 }, { "epoch": 1.288974266956577, "grad_norm": 0.4622870981693268, "learning_rate": 3.3805969586641433e-06, "loss": 0.0094, "step": 152650 }, { "epoch": 1.2890587068037407, "grad_norm": 0.20919817686080933, "learning_rate": 3.379899813826519e-06, "loss": 0.0045, "step": 152660 }, { "epoch": 1.2891431466509045, "grad_norm": 0.2360840141773224, "learning_rate": 3.3792027041771407e-06, "loss": 0.0081, "step": 152670 }, { "epoch": 1.2892275864980685, "grad_norm": 0.40678539872169495, "learning_rate": 3.3785056297311448e-06, "loss": 0.0042, "step": 152680 }, { "epoch": 1.2893120263452325, "grad_norm": 0.25798776745796204, "learning_rate": 3.3778085905036772e-06, "loss": 0.0061, "step": 152690 }, { "epoch": 1.2893964661923962, "grad_norm": 0.18651089072227478, "learning_rate": 3.3771115865098725e-06, "loss": 0.0033, "step": 152700 }, { "epoch": 1.28948090603956, "grad_norm": 0.05966905876994133, "learning_rate": 3.3764146177648732e-06, "loss": 0.0055, "step": 152710 }, { "epoch": 1.289565345886724, "grad_norm": 0.382144570350647, "learning_rate": 3.3757176842838135e-06, "loss": 0.0061, "step": 152720 }, { "epoch": 1.2896497857338878, "grad_norm": 0.2478494644165039, "learning_rate": 3.3750207860818345e-06, "loss": 0.0063, "step": 152730 }, { "epoch": 1.2897342255810518, "grad_norm": 0.6917672157287598, "learning_rate": 3.3743239231740717e-06, "loss": 0.0073, "step": 152740 }, { "epoch": 1.2898186654282155, "grad_norm": 0.15831641852855682, "learning_rate": 3.3736270955756587e-06, "loss": 0.0037, "step": 152750 }, { "epoch": 1.2899031052753793, "grad_norm": 0.40940096974372864, "learning_rate": 3.3729303033017337e-06, "loss": 0.0081, "step": 152760 }, { "epoch": 1.2899875451225433, "grad_norm": 0.7276325821876526, "learning_rate": 3.372233546367426e-06, "loss": 0.0142, "step": 152770 }, { "epoch": 1.2900719849697073, "grad_norm": 0.3064141869544983, "learning_rate": 3.3715368247878745e-06, "loss": 0.0093, "step": 152780 }, { "epoch": 1.290156424816871, "grad_norm": 0.06002477928996086, "learning_rate": 3.3708401385782085e-06, "loss": 0.0064, "step": 152790 }, { "epoch": 1.2902408646640349, "grad_norm": 0.22841760516166687, "learning_rate": 3.370143487753562e-06, "loss": 0.0225, "step": 152800 }, { "epoch": 1.2903253045111989, "grad_norm": 0.31579098105430603, "learning_rate": 3.3694468723290626e-06, "loss": 0.0092, "step": 152810 }, { "epoch": 1.2904097443583626, "grad_norm": 0.27919211983680725, "learning_rate": 3.368750292319845e-06, "loss": 0.0088, "step": 152820 }, { "epoch": 1.2904941842055266, "grad_norm": 0.018338941037654877, "learning_rate": 3.3680537477410357e-06, "loss": 0.005, "step": 152830 }, { "epoch": 1.2905786240526904, "grad_norm": 0.22212395071983337, "learning_rate": 3.3673572386077663e-06, "loss": 0.0059, "step": 152840 }, { "epoch": 1.2906630638998544, "grad_norm": 0.24346952140331268, "learning_rate": 3.3666607649351628e-06, "loss": 0.0059, "step": 152850 }, { "epoch": 1.2907475037470182, "grad_norm": 0.2880294620990753, "learning_rate": 3.3659643267383506e-06, "loss": 0.0063, "step": 152860 }, { "epoch": 1.2908319435941822, "grad_norm": 0.4367178976535797, "learning_rate": 3.3652679240324616e-06, "loss": 0.0113, "step": 152870 }, { "epoch": 1.290916383441346, "grad_norm": 0.3161364495754242, "learning_rate": 3.3645715568326153e-06, "loss": 0.0074, "step": 152880 }, { "epoch": 1.2910008232885097, "grad_norm": 0.5681677460670471, "learning_rate": 3.3638752251539414e-06, "loss": 0.0083, "step": 152890 }, { "epoch": 1.2910852631356737, "grad_norm": 0.12578032910823822, "learning_rate": 3.363178929011561e-06, "loss": 0.0118, "step": 152900 }, { "epoch": 1.2911697029828377, "grad_norm": 0.8719082474708557, "learning_rate": 3.362482668420601e-06, "loss": 0.0062, "step": 152910 }, { "epoch": 1.2912541428300015, "grad_norm": 0.3868964910507202, "learning_rate": 3.3617864433961787e-06, "loss": 0.0092, "step": 152920 }, { "epoch": 1.2913385826771653, "grad_norm": 0.35839784145355225, "learning_rate": 3.361090253953422e-06, "loss": 0.0074, "step": 152930 }, { "epoch": 1.2914230225243293, "grad_norm": 0.22621197998523712, "learning_rate": 3.3603941001074476e-06, "loss": 0.0071, "step": 152940 }, { "epoch": 1.291507462371493, "grad_norm": 0.40285176038742065, "learning_rate": 3.3596979818733786e-06, "loss": 0.0147, "step": 152950 }, { "epoch": 1.291591902218657, "grad_norm": 0.3913114368915558, "learning_rate": 3.359001899266331e-06, "loss": 0.0147, "step": 152960 }, { "epoch": 1.2916763420658208, "grad_norm": 0.17162716388702393, "learning_rate": 3.358305852301428e-06, "loss": 0.0032, "step": 152970 }, { "epoch": 1.2917607819129846, "grad_norm": 0.37164995074272156, "learning_rate": 3.3576098409937856e-06, "loss": 0.0043, "step": 152980 }, { "epoch": 1.2918452217601486, "grad_norm": 0.20878468453884125, "learning_rate": 3.3569138653585196e-06, "loss": 0.01, "step": 152990 }, { "epoch": 1.2919296616073126, "grad_norm": 0.3666335642337799, "learning_rate": 3.35621792541075e-06, "loss": 0.0159, "step": 153000 }, { "epoch": 1.2920141014544764, "grad_norm": 0.924217939376831, "learning_rate": 3.3555220211655865e-06, "loss": 0.0084, "step": 153010 }, { "epoch": 1.2920985413016401, "grad_norm": 0.16702581942081451, "learning_rate": 3.354826152638151e-06, "loss": 0.007, "step": 153020 }, { "epoch": 1.2921829811488041, "grad_norm": 0.2834702134132385, "learning_rate": 3.3541303198435527e-06, "loss": 0.0144, "step": 153030 }, { "epoch": 1.292267420995968, "grad_norm": 0.02421463094651699, "learning_rate": 3.3534345227969073e-06, "loss": 0.0056, "step": 153040 }, { "epoch": 1.292351860843132, "grad_norm": 0.25954896211624146, "learning_rate": 3.352738761513325e-06, "loss": 0.0073, "step": 153050 }, { "epoch": 1.2924363006902957, "grad_norm": 0.09394548833370209, "learning_rate": 3.3520430360079216e-06, "loss": 0.0052, "step": 153060 }, { "epoch": 1.2925207405374597, "grad_norm": 0.009073811583220959, "learning_rate": 3.3513473462958042e-06, "loss": 0.0041, "step": 153070 }, { "epoch": 1.2926051803846235, "grad_norm": 0.5085604786872864, "learning_rate": 3.3506516923920857e-06, "loss": 0.0106, "step": 153080 }, { "epoch": 1.2926896202317875, "grad_norm": 0.5261709690093994, "learning_rate": 3.3499560743118744e-06, "loss": 0.0095, "step": 153090 }, { "epoch": 1.2927740600789512, "grad_norm": 0.3732582926750183, "learning_rate": 3.3492604920702776e-06, "loss": 0.0058, "step": 153100 }, { "epoch": 1.292858499926115, "grad_norm": 0.008849065750837326, "learning_rate": 3.348564945682406e-06, "loss": 0.0073, "step": 153110 }, { "epoch": 1.292942939773279, "grad_norm": 0.32880112528800964, "learning_rate": 3.3478694351633646e-06, "loss": 0.0088, "step": 153120 }, { "epoch": 1.293027379620443, "grad_norm": 0.020954588428139687, "learning_rate": 3.3471739605282617e-06, "loss": 0.0073, "step": 153130 }, { "epoch": 1.2931118194676068, "grad_norm": 0.18908865749835968, "learning_rate": 3.346478521792199e-06, "loss": 0.0078, "step": 153140 }, { "epoch": 1.2931962593147706, "grad_norm": 1.2808012962341309, "learning_rate": 3.345783118970287e-06, "loss": 0.0077, "step": 153150 }, { "epoch": 1.2932806991619346, "grad_norm": 0.10539764910936356, "learning_rate": 3.3450877520776246e-06, "loss": 0.0051, "step": 153160 }, { "epoch": 1.2933651390090983, "grad_norm": 0.1298564225435257, "learning_rate": 3.3443924211293187e-06, "loss": 0.0031, "step": 153170 }, { "epoch": 1.2934495788562623, "grad_norm": 0.19815464317798615, "learning_rate": 3.3436971261404684e-06, "loss": 0.0054, "step": 153180 }, { "epoch": 1.293534018703426, "grad_norm": 0.14108876883983612, "learning_rate": 3.34300186712618e-06, "loss": 0.0053, "step": 153190 }, { "epoch": 1.29361845855059, "grad_norm": 0.3832067549228668, "learning_rate": 3.3423066441015484e-06, "loss": 0.0088, "step": 153200 }, { "epoch": 1.2937028983977539, "grad_norm": 0.23290467262268066, "learning_rate": 3.3416114570816784e-06, "loss": 0.0056, "step": 153210 }, { "epoch": 1.2937873382449179, "grad_norm": 0.3973410725593567, "learning_rate": 3.3409163060816685e-06, "loss": 0.0059, "step": 153220 }, { "epoch": 1.2938717780920816, "grad_norm": 0.32155176997184753, "learning_rate": 3.340221191116615e-06, "loss": 0.0049, "step": 153230 }, { "epoch": 1.2939562179392454, "grad_norm": 0.2508934438228607, "learning_rate": 3.339526112201618e-06, "loss": 0.0067, "step": 153240 }, { "epoch": 1.2940406577864094, "grad_norm": 0.4437810182571411, "learning_rate": 3.338831069351771e-06, "loss": 0.0063, "step": 153250 }, { "epoch": 1.2941250976335734, "grad_norm": 0.13737691938877106, "learning_rate": 3.3381360625821755e-06, "loss": 0.0045, "step": 153260 }, { "epoch": 1.2942095374807372, "grad_norm": 0.15284521877765656, "learning_rate": 3.3374410919079237e-06, "loss": 0.0068, "step": 153270 }, { "epoch": 1.294293977327901, "grad_norm": 0.2932490408420563, "learning_rate": 3.3367461573441107e-06, "loss": 0.0055, "step": 153280 }, { "epoch": 1.294378417175065, "grad_norm": 0.6908617615699768, "learning_rate": 3.336051258905829e-06, "loss": 0.0077, "step": 153290 }, { "epoch": 1.2944628570222287, "grad_norm": 0.10583395510911942, "learning_rate": 3.335356396608175e-06, "loss": 0.007, "step": 153300 }, { "epoch": 1.2945472968693927, "grad_norm": 0.4667300879955292, "learning_rate": 3.334661570466237e-06, "loss": 0.0059, "step": 153310 }, { "epoch": 1.2946317367165565, "grad_norm": 0.17912140488624573, "learning_rate": 3.33396678049511e-06, "loss": 0.0084, "step": 153320 }, { "epoch": 1.2947161765637203, "grad_norm": 0.07323397696018219, "learning_rate": 3.3332720267098838e-06, "loss": 0.0056, "step": 153330 }, { "epoch": 1.2948006164108843, "grad_norm": 0.40175458788871765, "learning_rate": 3.332577309125645e-06, "loss": 0.0082, "step": 153340 }, { "epoch": 1.2948850562580483, "grad_norm": 0.03702212870121002, "learning_rate": 3.331882627757487e-06, "loss": 0.0039, "step": 153350 }, { "epoch": 1.294969496105212, "grad_norm": 0.059858012944459915, "learning_rate": 3.3311879826204963e-06, "loss": 0.0059, "step": 153360 }, { "epoch": 1.2950539359523758, "grad_norm": 0.10629624128341675, "learning_rate": 3.3304933737297614e-06, "loss": 0.0071, "step": 153370 }, { "epoch": 1.2951383757995398, "grad_norm": 0.21146827936172485, "learning_rate": 3.329798801100366e-06, "loss": 0.0048, "step": 153380 }, { "epoch": 1.2952228156467036, "grad_norm": 0.8314550518989563, "learning_rate": 3.3291042647474005e-06, "loss": 0.0179, "step": 153390 }, { "epoch": 1.2953072554938676, "grad_norm": 0.35242006182670593, "learning_rate": 3.3284097646859472e-06, "loss": 0.0077, "step": 153400 }, { "epoch": 1.2953916953410314, "grad_norm": 0.09251894056797028, "learning_rate": 3.327715300931092e-06, "loss": 0.0045, "step": 153410 }, { "epoch": 1.2954761351881954, "grad_norm": 0.3469882607460022, "learning_rate": 3.327020873497916e-06, "loss": 0.0041, "step": 153420 }, { "epoch": 1.2955605750353592, "grad_norm": 0.4197399318218231, "learning_rate": 3.326326482401505e-06, "loss": 0.0063, "step": 153430 }, { "epoch": 1.2956450148825231, "grad_norm": 0.14853765070438385, "learning_rate": 3.32563212765694e-06, "loss": 0.0061, "step": 153440 }, { "epoch": 1.295729454729687, "grad_norm": 0.16256199777126312, "learning_rate": 3.3249378092793027e-06, "loss": 0.0077, "step": 153450 }, { "epoch": 1.2958138945768507, "grad_norm": 0.14606203138828278, "learning_rate": 3.3242435272836725e-06, "loss": 0.0115, "step": 153460 }, { "epoch": 1.2958983344240147, "grad_norm": 0.4001910090446472, "learning_rate": 3.3235492816851276e-06, "loss": 0.0052, "step": 153470 }, { "epoch": 1.2959827742711787, "grad_norm": 0.42819008231163025, "learning_rate": 3.3228550724987517e-06, "loss": 0.0127, "step": 153480 }, { "epoch": 1.2960672141183425, "grad_norm": 0.24894504249095917, "learning_rate": 3.3221608997396176e-06, "loss": 0.0081, "step": 153490 }, { "epoch": 1.2961516539655062, "grad_norm": 0.09018359333276749, "learning_rate": 3.3214667634228064e-06, "loss": 0.003, "step": 153500 }, { "epoch": 1.2962360938126702, "grad_norm": 0.1606818288564682, "learning_rate": 3.3207726635633926e-06, "loss": 0.006, "step": 153510 }, { "epoch": 1.296320533659834, "grad_norm": 0.11098510026931763, "learning_rate": 3.3200786001764527e-06, "loss": 0.0053, "step": 153520 }, { "epoch": 1.296404973506998, "grad_norm": 0.7251542210578918, "learning_rate": 3.31938457327706e-06, "loss": 0.0069, "step": 153530 }, { "epoch": 1.2964894133541618, "grad_norm": 0.08675260841846466, "learning_rate": 3.3186905828802917e-06, "loss": 0.005, "step": 153540 }, { "epoch": 1.2965738532013258, "grad_norm": 0.15798209607601166, "learning_rate": 3.317996629001219e-06, "loss": 0.0059, "step": 153550 }, { "epoch": 1.2966582930484896, "grad_norm": 0.28996336460113525, "learning_rate": 3.317302711654915e-06, "loss": 0.0123, "step": 153560 }, { "epoch": 1.2967427328956536, "grad_norm": 0.09214448928833008, "learning_rate": 3.3166088308564513e-06, "loss": 0.0096, "step": 153570 }, { "epoch": 1.2968271727428173, "grad_norm": 0.2608672082424164, "learning_rate": 3.3159149866208974e-06, "loss": 0.0105, "step": 153580 }, { "epoch": 1.296911612589981, "grad_norm": 0.2100355476140976, "learning_rate": 3.3152211789633263e-06, "loss": 0.0074, "step": 153590 }, { "epoch": 1.296996052437145, "grad_norm": 0.0007762331515550613, "learning_rate": 3.3145274078988055e-06, "loss": 0.01, "step": 153600 }, { "epoch": 1.2970804922843089, "grad_norm": 0.04307703673839569, "learning_rate": 3.313833673442405e-06, "loss": 0.006, "step": 153610 }, { "epoch": 1.2971649321314729, "grad_norm": 0.41612765192985535, "learning_rate": 3.3131399756091886e-06, "loss": 0.0101, "step": 153620 }, { "epoch": 1.2972493719786367, "grad_norm": 0.01600225456058979, "learning_rate": 3.3124463144142295e-06, "loss": 0.01, "step": 153630 }, { "epoch": 1.2973338118258007, "grad_norm": 1.1119740009307861, "learning_rate": 3.311752689872589e-06, "loss": 0.0107, "step": 153640 }, { "epoch": 1.2974182516729644, "grad_norm": 0.063559889793396, "learning_rate": 3.3110591019993355e-06, "loss": 0.0081, "step": 153650 }, { "epoch": 1.2975026915201284, "grad_norm": 0.7257144451141357, "learning_rate": 3.31036555080953e-06, "loss": 0.0121, "step": 153660 }, { "epoch": 1.2975871313672922, "grad_norm": 0.44398602843284607, "learning_rate": 3.3096720363182405e-06, "loss": 0.0042, "step": 153670 }, { "epoch": 1.297671571214456, "grad_norm": 0.2517105042934418, "learning_rate": 3.3089785585405277e-06, "loss": 0.0055, "step": 153680 }, { "epoch": 1.29775601106162, "grad_norm": 0.2546830177307129, "learning_rate": 3.308285117491453e-06, "loss": 0.0136, "step": 153690 }, { "epoch": 1.297840450908784, "grad_norm": 0.023033270612359047, "learning_rate": 3.3075917131860807e-06, "loss": 0.0031, "step": 153700 }, { "epoch": 1.2979248907559477, "grad_norm": 0.1753055602312088, "learning_rate": 3.306898345639467e-06, "loss": 0.0086, "step": 153710 }, { "epoch": 1.2980093306031115, "grad_norm": 0.2222280353307724, "learning_rate": 3.306205014866677e-06, "loss": 0.0091, "step": 153720 }, { "epoch": 1.2980937704502755, "grad_norm": 0.08140452206134796, "learning_rate": 3.3055117208827647e-06, "loss": 0.0053, "step": 153730 }, { "epoch": 1.2981782102974393, "grad_norm": 0.4487590491771698, "learning_rate": 3.3048184637027924e-06, "loss": 0.0089, "step": 153740 }, { "epoch": 1.2982626501446033, "grad_norm": 0.23768627643585205, "learning_rate": 3.3041252433418134e-06, "loss": 0.0055, "step": 153750 }, { "epoch": 1.298347089991767, "grad_norm": 0.32301971316337585, "learning_rate": 3.3034320598148893e-06, "loss": 0.0086, "step": 153760 }, { "epoch": 1.298431529838931, "grad_norm": 0.17687156796455383, "learning_rate": 3.3027389131370713e-06, "loss": 0.005, "step": 153770 }, { "epoch": 1.2985159696860948, "grad_norm": 0.4373484253883362, "learning_rate": 3.302045803323418e-06, "loss": 0.0103, "step": 153780 }, { "epoch": 1.2986004095332588, "grad_norm": 0.32240593433380127, "learning_rate": 3.3013527303889792e-06, "loss": 0.0076, "step": 153790 }, { "epoch": 1.2986848493804226, "grad_norm": 0.03798815235495567, "learning_rate": 3.3006596943488146e-06, "loss": 0.0056, "step": 153800 }, { "epoch": 1.2987692892275864, "grad_norm": 0.16528362035751343, "learning_rate": 3.2999666952179722e-06, "loss": 0.0136, "step": 153810 }, { "epoch": 1.2988537290747504, "grad_norm": 0.021846536546945572, "learning_rate": 3.299273733011502e-06, "loss": 0.0038, "step": 153820 }, { "epoch": 1.2989381689219144, "grad_norm": 0.09872125834226608, "learning_rate": 3.298580807744461e-06, "loss": 0.0049, "step": 153830 }, { "epoch": 1.2990226087690782, "grad_norm": 0.0002595199039205909, "learning_rate": 3.2978879194318946e-06, "loss": 0.0056, "step": 153840 }, { "epoch": 1.299107048616242, "grad_norm": 0.3593078553676605, "learning_rate": 3.297195068088855e-06, "loss": 0.009, "step": 153850 }, { "epoch": 1.299191488463406, "grad_norm": 0.1861027628183365, "learning_rate": 3.296502253730387e-06, "loss": 0.0034, "step": 153860 }, { "epoch": 1.2992759283105697, "grad_norm": 0.17906352877616882, "learning_rate": 3.2958094763715433e-06, "loss": 0.0104, "step": 153870 }, { "epoch": 1.2993603681577337, "grad_norm": 0.07558248937129974, "learning_rate": 3.295116736027368e-06, "loss": 0.0067, "step": 153880 }, { "epoch": 1.2994448080048975, "grad_norm": 0.19835108518600464, "learning_rate": 3.2944240327129083e-06, "loss": 0.0112, "step": 153890 }, { "epoch": 1.2995292478520613, "grad_norm": 0.03286371007561684, "learning_rate": 3.293731366443207e-06, "loss": 0.0073, "step": 153900 }, { "epoch": 1.2996136876992253, "grad_norm": 0.3205113708972931, "learning_rate": 3.293038737233313e-06, "loss": 0.0046, "step": 153910 }, { "epoch": 1.2996981275463892, "grad_norm": 0.17082735896110535, "learning_rate": 3.292346145098268e-06, "loss": 0.0048, "step": 153920 }, { "epoch": 1.299782567393553, "grad_norm": 0.2217244952917099, "learning_rate": 3.291653590053114e-06, "loss": 0.0084, "step": 153930 }, { "epoch": 1.2998670072407168, "grad_norm": 0.32553356885910034, "learning_rate": 3.2909610721128944e-06, "loss": 0.0048, "step": 153940 }, { "epoch": 1.2999514470878808, "grad_norm": 0.5700196623802185, "learning_rate": 3.290268591292648e-06, "loss": 0.0047, "step": 153950 }, { "epoch": 1.3000358869350446, "grad_norm": 0.2979585528373718, "learning_rate": 3.2895761476074202e-06, "loss": 0.0094, "step": 153960 }, { "epoch": 1.3001203267822086, "grad_norm": 0.13082897663116455, "learning_rate": 3.288883741072246e-06, "loss": 0.0062, "step": 153970 }, { "epoch": 1.3002047666293723, "grad_norm": 0.1580682396888733, "learning_rate": 3.2881913717021686e-06, "loss": 0.0073, "step": 153980 }, { "epoch": 1.3002892064765363, "grad_norm": 0.25361356139183044, "learning_rate": 3.287499039512221e-06, "loss": 0.0105, "step": 153990 }, { "epoch": 1.3003736463237001, "grad_norm": 0.29384082555770874, "learning_rate": 3.286806744517445e-06, "loss": 0.0079, "step": 154000 }, { "epoch": 1.3004580861708641, "grad_norm": 0.17013218998908997, "learning_rate": 3.286114486732875e-06, "loss": 0.0076, "step": 154010 }, { "epoch": 1.300542526018028, "grad_norm": 0.2805885672569275, "learning_rate": 3.285422266173548e-06, "loss": 0.0039, "step": 154020 }, { "epoch": 1.3006269658651917, "grad_norm": 0.116019107401371, "learning_rate": 3.284730082854496e-06, "loss": 0.0076, "step": 154030 }, { "epoch": 1.3007114057123557, "grad_norm": 0.2599129378795624, "learning_rate": 3.284037936790757e-06, "loss": 0.0117, "step": 154040 }, { "epoch": 1.3007958455595197, "grad_norm": 0.1387718766927719, "learning_rate": 3.2833458279973625e-06, "loss": 0.0169, "step": 154050 }, { "epoch": 1.3008802854066834, "grad_norm": 0.05629963427782059, "learning_rate": 3.2826537564893435e-06, "loss": 0.0044, "step": 154060 }, { "epoch": 1.3009647252538472, "grad_norm": 0.6727670431137085, "learning_rate": 3.2819617222817345e-06, "loss": 0.0115, "step": 154070 }, { "epoch": 1.3010491651010112, "grad_norm": 0.08285842835903168, "learning_rate": 3.2812697253895627e-06, "loss": 0.0077, "step": 154080 }, { "epoch": 1.301133604948175, "grad_norm": 0.25552263855934143, "learning_rate": 3.280577765827863e-06, "loss": 0.0054, "step": 154090 }, { "epoch": 1.301218044795339, "grad_norm": 0.23204420506954193, "learning_rate": 3.279885843611659e-06, "loss": 0.007, "step": 154100 }, { "epoch": 1.3013024846425028, "grad_norm": 0.14697016775608063, "learning_rate": 3.279193958755984e-06, "loss": 0.0052, "step": 154110 }, { "epoch": 1.3013869244896668, "grad_norm": 0.018180061131715775, "learning_rate": 3.278502111275863e-06, "loss": 0.0071, "step": 154120 }, { "epoch": 1.3014713643368305, "grad_norm": 0.15896084904670715, "learning_rate": 3.2778103011863237e-06, "loss": 0.0094, "step": 154130 }, { "epoch": 1.3015558041839945, "grad_norm": 0.20863847434520721, "learning_rate": 3.2771185285023903e-06, "loss": 0.0083, "step": 154140 }, { "epoch": 1.3016402440311583, "grad_norm": 0.29339495301246643, "learning_rate": 3.2764267932390913e-06, "loss": 0.0039, "step": 154150 }, { "epoch": 1.301724683878322, "grad_norm": 0.07989992201328278, "learning_rate": 3.275735095411449e-06, "loss": 0.0042, "step": 154160 }, { "epoch": 1.301809123725486, "grad_norm": 0.9554206132888794, "learning_rate": 3.275043435034486e-06, "loss": 0.0096, "step": 154170 }, { "epoch": 1.30189356357265, "grad_norm": 0.0398128405213356, "learning_rate": 3.2743518121232277e-06, "loss": 0.0041, "step": 154180 }, { "epoch": 1.3019780034198138, "grad_norm": 0.408799946308136, "learning_rate": 3.273660226692692e-06, "loss": 0.0121, "step": 154190 }, { "epoch": 1.3020624432669776, "grad_norm": 0.39207923412323, "learning_rate": 3.272968678757904e-06, "loss": 0.0092, "step": 154200 }, { "epoch": 1.3021468831141416, "grad_norm": 0.1444895714521408, "learning_rate": 3.2722771683338824e-06, "loss": 0.008, "step": 154210 }, { "epoch": 1.3022313229613054, "grad_norm": 0.4749937653541565, "learning_rate": 3.271585695435647e-06, "loss": 0.0092, "step": 154220 }, { "epoch": 1.3023157628084694, "grad_norm": 0.13277822732925415, "learning_rate": 3.270894260078214e-06, "loss": 0.0102, "step": 154230 }, { "epoch": 1.3024002026556332, "grad_norm": 0.3318091630935669, "learning_rate": 3.270202862276606e-06, "loss": 0.007, "step": 154240 }, { "epoch": 1.302484642502797, "grad_norm": 0.13466337323188782, "learning_rate": 3.2695115020458357e-06, "loss": 0.008, "step": 154250 }, { "epoch": 1.302569082349961, "grad_norm": 0.8904100656509399, "learning_rate": 3.268820179400922e-06, "loss": 0.0074, "step": 154260 }, { "epoch": 1.302653522197125, "grad_norm": 0.3004767894744873, "learning_rate": 3.268128894356879e-06, "loss": 0.0085, "step": 154270 }, { "epoch": 1.3027379620442887, "grad_norm": 0.2913416624069214, "learning_rate": 3.26743764692872e-06, "loss": 0.0049, "step": 154280 }, { "epoch": 1.3028224018914525, "grad_norm": 0.27547505497932434, "learning_rate": 3.2667464371314623e-06, "loss": 0.0104, "step": 154290 }, { "epoch": 1.3029068417386165, "grad_norm": 0.022348711267113686, "learning_rate": 3.2660552649801157e-06, "loss": 0.0102, "step": 154300 }, { "epoch": 1.3029912815857803, "grad_norm": 0.30470648407936096, "learning_rate": 3.2653641304896945e-06, "loss": 0.0095, "step": 154310 }, { "epoch": 1.3030757214329443, "grad_norm": 0.553912341594696, "learning_rate": 3.2646730336752066e-06, "loss": 0.0094, "step": 154320 }, { "epoch": 1.303160161280108, "grad_norm": 0.2230587601661682, "learning_rate": 3.263981974551667e-06, "loss": 0.0066, "step": 154330 }, { "epoch": 1.303244601127272, "grad_norm": 0.10095353424549103, "learning_rate": 3.263290953134082e-06, "loss": 0.0097, "step": 154340 }, { "epoch": 1.3033290409744358, "grad_norm": 0.24492260813713074, "learning_rate": 3.2625999694374634e-06, "loss": 0.0063, "step": 154350 }, { "epoch": 1.3034134808215998, "grad_norm": 1.0276943445205688, "learning_rate": 3.2619090234768146e-06, "loss": 0.014, "step": 154360 }, { "epoch": 1.3034979206687636, "grad_norm": 0.2162054479122162, "learning_rate": 3.261218115267148e-06, "loss": 0.0073, "step": 154370 }, { "epoch": 1.3035823605159274, "grad_norm": 0.16092561185359955, "learning_rate": 3.2605272448234665e-06, "loss": 0.0093, "step": 154380 }, { "epoch": 1.3036668003630913, "grad_norm": 0.6168458461761475, "learning_rate": 3.259836412160778e-06, "loss": 0.0053, "step": 154390 }, { "epoch": 1.3037512402102553, "grad_norm": 0.038777925074100494, "learning_rate": 3.259145617294086e-06, "loss": 0.0023, "step": 154400 }, { "epoch": 1.3038356800574191, "grad_norm": 0.1362074762582779, "learning_rate": 3.258454860238393e-06, "loss": 0.007, "step": 154410 }, { "epoch": 1.303920119904583, "grad_norm": 0.1939375400543213, "learning_rate": 3.2577641410087048e-06, "loss": 0.0084, "step": 154420 }, { "epoch": 1.304004559751747, "grad_norm": 0.14655041694641113, "learning_rate": 3.2570734596200206e-06, "loss": 0.0062, "step": 154430 }, { "epoch": 1.3040889995989107, "grad_norm": 0.001787543180398643, "learning_rate": 3.256382816087346e-06, "loss": 0.0071, "step": 154440 }, { "epoch": 1.3041734394460747, "grad_norm": 0.36357057094573975, "learning_rate": 3.2556922104256776e-06, "loss": 0.0141, "step": 154450 }, { "epoch": 1.3042578792932384, "grad_norm": 0.2713960111141205, "learning_rate": 3.2550016426500186e-06, "loss": 0.006, "step": 154460 }, { "epoch": 1.3043423191404022, "grad_norm": 0.021847225725650787, "learning_rate": 3.2543111127753634e-06, "loss": 0.0081, "step": 154470 }, { "epoch": 1.3044267589875662, "grad_norm": 0.44072139263153076, "learning_rate": 3.2536206208167163e-06, "loss": 0.0087, "step": 154480 }, { "epoch": 1.3045111988347302, "grad_norm": 0.6983363032341003, "learning_rate": 3.2529301667890702e-06, "loss": 0.0123, "step": 154490 }, { "epoch": 1.304595638681894, "grad_norm": 0.16156406700611115, "learning_rate": 3.2522397507074232e-06, "loss": 0.0067, "step": 154500 }, { "epoch": 1.3046800785290578, "grad_norm": 0.2538803517818451, "learning_rate": 3.2515493725867707e-06, "loss": 0.0114, "step": 154510 }, { "epoch": 1.3047645183762218, "grad_norm": 0.30200308561325073, "learning_rate": 3.2508590324421064e-06, "loss": 0.0091, "step": 154520 }, { "epoch": 1.3048489582233855, "grad_norm": 0.1049213632941246, "learning_rate": 3.2501687302884267e-06, "loss": 0.0036, "step": 154530 }, { "epoch": 1.3049333980705495, "grad_norm": 0.16425488889217377, "learning_rate": 3.249478466140723e-06, "loss": 0.0077, "step": 154540 }, { "epoch": 1.3050178379177133, "grad_norm": 0.15796178579330444, "learning_rate": 3.248788240013989e-06, "loss": 0.0086, "step": 154550 }, { "epoch": 1.3051022777648773, "grad_norm": 0.39895516633987427, "learning_rate": 3.2480980519232135e-06, "loss": 0.0086, "step": 154560 }, { "epoch": 1.305186717612041, "grad_norm": 0.2428392469882965, "learning_rate": 3.2474079018833916e-06, "loss": 0.007, "step": 154570 }, { "epoch": 1.305271157459205, "grad_norm": 0.7937660217285156, "learning_rate": 3.24671778990951e-06, "loss": 0.0061, "step": 154580 }, { "epoch": 1.3053555973063689, "grad_norm": 0.24190692603588104, "learning_rate": 3.24602771601656e-06, "loss": 0.0034, "step": 154590 }, { "epoch": 1.3054400371535326, "grad_norm": 0.061432331800460815, "learning_rate": 3.245337680219526e-06, "loss": 0.005, "step": 154600 }, { "epoch": 1.3055244770006966, "grad_norm": 0.2704319357872009, "learning_rate": 3.2446476825333993e-06, "loss": 0.0091, "step": 154610 }, { "epoch": 1.3056089168478606, "grad_norm": 0.32870325446128845, "learning_rate": 3.2439577229731654e-06, "loss": 0.0106, "step": 154620 }, { "epoch": 1.3056933566950244, "grad_norm": 0.1940990835428238, "learning_rate": 3.2432678015538112e-06, "loss": 0.0064, "step": 154630 }, { "epoch": 1.3057777965421882, "grad_norm": 0.23996569216251373, "learning_rate": 3.2425779182903195e-06, "loss": 0.0056, "step": 154640 }, { "epoch": 1.3058622363893522, "grad_norm": 0.18232479691505432, "learning_rate": 3.241888073197673e-06, "loss": 0.0055, "step": 154650 }, { "epoch": 1.305946676236516, "grad_norm": 0.2298777550458908, "learning_rate": 3.2411982662908604e-06, "loss": 0.0077, "step": 154660 }, { "epoch": 1.30603111608368, "grad_norm": 0.40905436873435974, "learning_rate": 3.2405084975848593e-06, "loss": 0.0089, "step": 154670 }, { "epoch": 1.3061155559308437, "grad_norm": 0.10462851077318192, "learning_rate": 3.2398187670946545e-06, "loss": 0.003, "step": 154680 }, { "epoch": 1.3061999957780077, "grad_norm": 0.20282568037509918, "learning_rate": 3.2391290748352235e-06, "loss": 0.0097, "step": 154690 }, { "epoch": 1.3062844356251715, "grad_norm": 0.16639716923236847, "learning_rate": 3.2384394208215504e-06, "loss": 0.0072, "step": 154700 }, { "epoch": 1.3063688754723355, "grad_norm": 0.2731895446777344, "learning_rate": 3.2377498050686097e-06, "loss": 0.0076, "step": 154710 }, { "epoch": 1.3064533153194993, "grad_norm": 0.24805569648742676, "learning_rate": 3.2370602275913837e-06, "loss": 0.0108, "step": 154720 }, { "epoch": 1.306537755166663, "grad_norm": 0.08380734175443649, "learning_rate": 3.2363706884048475e-06, "loss": 0.0065, "step": 154730 }, { "epoch": 1.306622195013827, "grad_norm": 0.37060844898223877, "learning_rate": 3.2356811875239795e-06, "loss": 0.0062, "step": 154740 }, { "epoch": 1.306706634860991, "grad_norm": 0.2732112407684326, "learning_rate": 3.2349917249637545e-06, "loss": 0.0051, "step": 154750 }, { "epoch": 1.3067910747081548, "grad_norm": 0.13917607069015503, "learning_rate": 3.234302300739146e-06, "loss": 0.0054, "step": 154760 }, { "epoch": 1.3068755145553186, "grad_norm": 0.22538204491138458, "learning_rate": 3.2336129148651314e-06, "loss": 0.0072, "step": 154770 }, { "epoch": 1.3069599544024826, "grad_norm": 0.23818321526050568, "learning_rate": 3.232923567356681e-06, "loss": 0.0112, "step": 154780 }, { "epoch": 1.3070443942496464, "grad_norm": 0.3358599543571472, "learning_rate": 3.23223425822877e-06, "loss": 0.0113, "step": 154790 }, { "epoch": 1.3071288340968104, "grad_norm": 0.42027509212493896, "learning_rate": 3.2315449874963665e-06, "loss": 0.0087, "step": 154800 }, { "epoch": 1.3072132739439741, "grad_norm": 0.265415221452713, "learning_rate": 3.2308557551744456e-06, "loss": 0.0047, "step": 154810 }, { "epoch": 1.307297713791138, "grad_norm": 0.37123623490333557, "learning_rate": 3.2301665612779743e-06, "loss": 0.0082, "step": 154820 }, { "epoch": 1.307382153638302, "grad_norm": 0.31377682089805603, "learning_rate": 3.2294774058219238e-06, "loss": 0.0043, "step": 154830 }, { "epoch": 1.307466593485466, "grad_norm": 0.29160287976264954, "learning_rate": 3.2287882888212583e-06, "loss": 0.0111, "step": 154840 }, { "epoch": 1.3075510333326297, "grad_norm": 0.2696615755558014, "learning_rate": 3.2280992102909507e-06, "loss": 0.0101, "step": 154850 }, { "epoch": 1.3076354731797935, "grad_norm": 0.15425953269004822, "learning_rate": 3.2274101702459657e-06, "loss": 0.0101, "step": 154860 }, { "epoch": 1.3077199130269574, "grad_norm": 0.39782288670539856, "learning_rate": 3.2267211687012667e-06, "loss": 0.0059, "step": 154870 }, { "epoch": 1.3078043528741212, "grad_norm": 0.28875064849853516, "learning_rate": 3.226032205671822e-06, "loss": 0.0116, "step": 154880 }, { "epoch": 1.3078887927212852, "grad_norm": 0.21941356360912323, "learning_rate": 3.2253432811725927e-06, "loss": 0.0038, "step": 154890 }, { "epoch": 1.307973232568449, "grad_norm": 0.5229865908622742, "learning_rate": 3.224654395218545e-06, "loss": 0.0061, "step": 154900 }, { "epoch": 1.308057672415613, "grad_norm": 0.5910682082176208, "learning_rate": 3.223965547824639e-06, "loss": 0.0056, "step": 154910 }, { "epoch": 1.3081421122627768, "grad_norm": 0.27376478910446167, "learning_rate": 3.2232767390058383e-06, "loss": 0.009, "step": 154920 }, { "epoch": 1.3082265521099408, "grad_norm": 0.3442683815956116, "learning_rate": 3.2225879687771013e-06, "loss": 0.0066, "step": 154930 }, { "epoch": 1.3083109919571045, "grad_norm": 0.650906503200531, "learning_rate": 3.221899237153391e-06, "loss": 0.015, "step": 154940 }, { "epoch": 1.3083954318042683, "grad_norm": 0.4005192220211029, "learning_rate": 3.2212105441496643e-06, "loss": 0.0086, "step": 154950 }, { "epoch": 1.3084798716514323, "grad_norm": 0.39784303307533264, "learning_rate": 3.220521889780881e-06, "loss": 0.0076, "step": 154960 }, { "epoch": 1.3085643114985963, "grad_norm": 0.1464805155992508, "learning_rate": 3.2198332740619955e-06, "loss": 0.0067, "step": 154970 }, { "epoch": 1.30864875134576, "grad_norm": 0.5737406015396118, "learning_rate": 3.2191446970079677e-06, "loss": 0.0053, "step": 154980 }, { "epoch": 1.3087331911929239, "grad_norm": 0.26018843054771423, "learning_rate": 3.2184561586337536e-06, "loss": 0.0085, "step": 154990 }, { "epoch": 1.3088176310400879, "grad_norm": 0.4121463894844055, "learning_rate": 3.2177676589543043e-06, "loss": 0.0064, "step": 155000 }, { "epoch": 1.3089020708872516, "grad_norm": 0.5396912097930908, "learning_rate": 3.217079197984578e-06, "loss": 0.0113, "step": 155010 }, { "epoch": 1.3089865107344156, "grad_norm": 0.4461640417575836, "learning_rate": 3.2163907757395253e-06, "loss": 0.007, "step": 155020 }, { "epoch": 1.3090709505815794, "grad_norm": 0.6141575574874878, "learning_rate": 3.2157023922340998e-06, "loss": 0.0116, "step": 155030 }, { "epoch": 1.3091553904287434, "grad_norm": 0.06894383579492569, "learning_rate": 3.215014047483251e-06, "loss": 0.0077, "step": 155040 }, { "epoch": 1.3092398302759072, "grad_norm": 0.16758355498313904, "learning_rate": 3.2143257415019335e-06, "loss": 0.006, "step": 155050 }, { "epoch": 1.3093242701230712, "grad_norm": 0.13059410452842712, "learning_rate": 3.2136374743050935e-06, "loss": 0.0042, "step": 155060 }, { "epoch": 1.309408709970235, "grad_norm": 0.30554643273353577, "learning_rate": 3.2129492459076826e-06, "loss": 0.0069, "step": 155070 }, { "epoch": 1.3094931498173987, "grad_norm": 0.2060178965330124, "learning_rate": 3.2122610563246465e-06, "loss": 0.0078, "step": 155080 }, { "epoch": 1.3095775896645627, "grad_norm": 0.10285776108503342, "learning_rate": 3.2115729055709355e-06, "loss": 0.0074, "step": 155090 }, { "epoch": 1.3096620295117267, "grad_norm": 0.2445748895406723, "learning_rate": 3.2108847936614944e-06, "loss": 0.0059, "step": 155100 }, { "epoch": 1.3097464693588905, "grad_norm": 0.3480364978313446, "learning_rate": 3.2101967206112683e-06, "loss": 0.0087, "step": 155110 }, { "epoch": 1.3098309092060543, "grad_norm": 0.2804443836212158, "learning_rate": 3.2095086864352033e-06, "loss": 0.006, "step": 155120 }, { "epoch": 1.3099153490532183, "grad_norm": 0.24313893914222717, "learning_rate": 3.2088206911482412e-06, "loss": 0.0102, "step": 155130 }, { "epoch": 1.309999788900382, "grad_norm": 0.307100385427475, "learning_rate": 3.208132734765328e-06, "loss": 0.006, "step": 155140 }, { "epoch": 1.310084228747546, "grad_norm": 0.7021904587745667, "learning_rate": 3.207444817301404e-06, "loss": 0.0052, "step": 155150 }, { "epoch": 1.3101686685947098, "grad_norm": 0.030193911865353584, "learning_rate": 3.2067569387714126e-06, "loss": 0.0035, "step": 155160 }, { "epoch": 1.3102531084418736, "grad_norm": 0.18555110692977905, "learning_rate": 3.2060690991902897e-06, "loss": 0.0117, "step": 155170 }, { "epoch": 1.3103375482890376, "grad_norm": 0.07000710070133209, "learning_rate": 3.2053812985729813e-06, "loss": 0.0075, "step": 155180 }, { "epoch": 1.3104219881362016, "grad_norm": 0.4753206968307495, "learning_rate": 3.2046935369344224e-06, "loss": 0.0062, "step": 155190 }, { "epoch": 1.3105064279833654, "grad_norm": 0.4588419497013092, "learning_rate": 3.204005814289553e-06, "loss": 0.0038, "step": 155200 }, { "epoch": 1.3105908678305291, "grad_norm": 0.3016031086444855, "learning_rate": 3.2033181306533074e-06, "loss": 0.0042, "step": 155210 }, { "epoch": 1.3106753076776931, "grad_norm": 0.0970572680234909, "learning_rate": 3.2026304860406253e-06, "loss": 0.0038, "step": 155220 }, { "epoch": 1.310759747524857, "grad_norm": 0.2371700555086136, "learning_rate": 3.201942880466441e-06, "loss": 0.0061, "step": 155230 }, { "epoch": 1.310844187372021, "grad_norm": 0.1694459319114685, "learning_rate": 3.2012553139456883e-06, "loss": 0.0044, "step": 155240 }, { "epoch": 1.3109286272191847, "grad_norm": 0.15675704181194305, "learning_rate": 3.2005677864933015e-06, "loss": 0.0063, "step": 155250 }, { "epoch": 1.3110130670663487, "grad_norm": 0.49216485023498535, "learning_rate": 3.1998802981242127e-06, "loss": 0.0086, "step": 155260 }, { "epoch": 1.3110975069135125, "grad_norm": 0.25759848952293396, "learning_rate": 3.199192848853356e-06, "loss": 0.0063, "step": 155270 }, { "epoch": 1.3111819467606765, "grad_norm": 0.40582942962646484, "learning_rate": 3.1985054386956614e-06, "loss": 0.0076, "step": 155280 }, { "epoch": 1.3112663866078402, "grad_norm": 0.17130817472934723, "learning_rate": 3.1978180676660607e-06, "loss": 0.0045, "step": 155290 }, { "epoch": 1.311350826455004, "grad_norm": 0.03986912965774536, "learning_rate": 3.1971307357794796e-06, "loss": 0.0091, "step": 155300 }, { "epoch": 1.311435266302168, "grad_norm": 0.3348422050476074, "learning_rate": 3.196443443050853e-06, "loss": 0.007, "step": 155310 }, { "epoch": 1.311519706149332, "grad_norm": 0.29296770691871643, "learning_rate": 3.1957561894951015e-06, "loss": 0.0046, "step": 155320 }, { "epoch": 1.3116041459964958, "grad_norm": 0.14338240027427673, "learning_rate": 3.1950689751271586e-06, "loss": 0.0074, "step": 155330 }, { "epoch": 1.3116885858436595, "grad_norm": 0.26320478320121765, "learning_rate": 3.1943817999619474e-06, "loss": 0.0041, "step": 155340 }, { "epoch": 1.3117730256908235, "grad_norm": 0.01449305284768343, "learning_rate": 3.1936946640143925e-06, "loss": 0.0103, "step": 155350 }, { "epoch": 1.3118574655379873, "grad_norm": 0.13055293262004852, "learning_rate": 3.19300756729942e-06, "loss": 0.0072, "step": 155360 }, { "epoch": 1.3119419053851513, "grad_norm": 0.3083471357822418, "learning_rate": 3.1923205098319508e-06, "loss": 0.0066, "step": 155370 }, { "epoch": 1.312026345232315, "grad_norm": 0.062350716441869736, "learning_rate": 3.1916334916269125e-06, "loss": 0.0068, "step": 155380 }, { "epoch": 1.3121107850794789, "grad_norm": 0.146010160446167, "learning_rate": 3.1909465126992227e-06, "loss": 0.0062, "step": 155390 }, { "epoch": 1.3121952249266429, "grad_norm": 0.2297356128692627, "learning_rate": 3.1902595730638053e-06, "loss": 0.0073, "step": 155400 }, { "epoch": 1.3122796647738069, "grad_norm": 0.4516758620738983, "learning_rate": 3.189572672735577e-06, "loss": 0.0054, "step": 155410 }, { "epoch": 1.3123641046209706, "grad_norm": 0.33178430795669556, "learning_rate": 3.188885811729461e-06, "loss": 0.0062, "step": 155420 }, { "epoch": 1.3124485444681344, "grad_norm": 0.22680293023586273, "learning_rate": 3.188198990060373e-06, "loss": 0.0037, "step": 155430 }, { "epoch": 1.3125329843152984, "grad_norm": 0.3708173632621765, "learning_rate": 3.187512207743233e-06, "loss": 0.0052, "step": 155440 }, { "epoch": 1.3126174241624622, "grad_norm": 0.4393548369407654, "learning_rate": 3.1868254647929543e-06, "loss": 0.0068, "step": 155450 }, { "epoch": 1.3127018640096262, "grad_norm": 0.11099930852651596, "learning_rate": 3.1861387612244567e-06, "loss": 0.007, "step": 155460 }, { "epoch": 1.31278630385679, "grad_norm": 0.5045728087425232, "learning_rate": 3.1854520970526537e-06, "loss": 0.0068, "step": 155470 }, { "epoch": 1.312870743703954, "grad_norm": 0.22042050957679749, "learning_rate": 3.184765472292458e-06, "loss": 0.0058, "step": 155480 }, { "epoch": 1.3129551835511177, "grad_norm": 0.036607690155506134, "learning_rate": 3.184078886958786e-06, "loss": 0.0096, "step": 155490 }, { "epoch": 1.3130396233982817, "grad_norm": 0.11718494445085526, "learning_rate": 3.183392341066546e-06, "loss": 0.0066, "step": 155500 }, { "epoch": 1.3131240632454455, "grad_norm": 0.23176956176757812, "learning_rate": 3.182705834630654e-06, "loss": 0.0065, "step": 155510 }, { "epoch": 1.3132085030926093, "grad_norm": 0.21660122275352478, "learning_rate": 3.1820193676660176e-06, "loss": 0.0052, "step": 155520 }, { "epoch": 1.3132929429397733, "grad_norm": 0.19195298850536346, "learning_rate": 3.181332940187549e-06, "loss": 0.0085, "step": 155530 }, { "epoch": 1.3133773827869373, "grad_norm": 0.19563455879688263, "learning_rate": 3.1806465522101548e-06, "loss": 0.0052, "step": 155540 }, { "epoch": 1.313461822634101, "grad_norm": 0.6346328258514404, "learning_rate": 3.1799602037487464e-06, "loss": 0.0128, "step": 155550 }, { "epoch": 1.3135462624812648, "grad_norm": 0.18313325941562653, "learning_rate": 3.179273894818229e-06, "loss": 0.0084, "step": 155560 }, { "epoch": 1.3136307023284288, "grad_norm": 0.689820408821106, "learning_rate": 3.17858762543351e-06, "loss": 0.012, "step": 155570 }, { "epoch": 1.3137151421755926, "grad_norm": 0.1308087706565857, "learning_rate": 3.1779013956094938e-06, "loss": 0.0079, "step": 155580 }, { "epoch": 1.3137995820227566, "grad_norm": 0.3898727595806122, "learning_rate": 3.177215205361085e-06, "loss": 0.0069, "step": 155590 }, { "epoch": 1.3138840218699204, "grad_norm": 0.17850811779499054, "learning_rate": 3.1765290547031912e-06, "loss": 0.0127, "step": 155600 }, { "epoch": 1.3139684617170844, "grad_norm": 0.10745137929916382, "learning_rate": 3.17584294365071e-06, "loss": 0.0081, "step": 155610 }, { "epoch": 1.3140529015642481, "grad_norm": 0.29679375886917114, "learning_rate": 3.1751568722185476e-06, "loss": 0.0072, "step": 155620 }, { "epoch": 1.3141373414114121, "grad_norm": 0.29799795150756836, "learning_rate": 3.174470840421603e-06, "loss": 0.0044, "step": 155630 }, { "epoch": 1.314221781258576, "grad_norm": 0.10533371567726135, "learning_rate": 3.173784848274779e-06, "loss": 0.0059, "step": 155640 }, { "epoch": 1.3143062211057397, "grad_norm": 0.5488598346710205, "learning_rate": 3.1730988957929716e-06, "loss": 0.006, "step": 155650 }, { "epoch": 1.3143906609529037, "grad_norm": 0.14432169497013092, "learning_rate": 3.172412982991084e-06, "loss": 0.0099, "step": 155660 }, { "epoch": 1.3144751008000677, "grad_norm": 0.13708385825157166, "learning_rate": 3.1717271098840106e-06, "loss": 0.0092, "step": 155670 }, { "epoch": 1.3145595406472315, "grad_norm": 0.9648259282112122, "learning_rate": 3.171041276486651e-06, "loss": 0.0078, "step": 155680 }, { "epoch": 1.3146439804943952, "grad_norm": 0.20627757906913757, "learning_rate": 3.1703554828139e-06, "loss": 0.0063, "step": 155690 }, { "epoch": 1.3147284203415592, "grad_norm": 0.116673544049263, "learning_rate": 3.169669728880651e-06, "loss": 0.0046, "step": 155700 }, { "epoch": 1.314812860188723, "grad_norm": 0.20997409522533417, "learning_rate": 3.1689840147018027e-06, "loss": 0.004, "step": 155710 }, { "epoch": 1.314897300035887, "grad_norm": 0.39026206731796265, "learning_rate": 3.168298340292245e-06, "loss": 0.0085, "step": 155720 }, { "epoch": 1.3149817398830508, "grad_norm": 0.22548367083072662, "learning_rate": 3.1676127056668726e-06, "loss": 0.0056, "step": 155730 }, { "epoch": 1.3150661797302146, "grad_norm": 0.43649905920028687, "learning_rate": 3.166927110840575e-06, "loss": 0.012, "step": 155740 }, { "epoch": 1.3151506195773786, "grad_norm": 0.48798009753227234, "learning_rate": 3.166241555828248e-06, "loss": 0.0054, "step": 155750 }, { "epoch": 1.3152350594245426, "grad_norm": 0.10238775610923767, "learning_rate": 3.1655560406447767e-06, "loss": 0.0054, "step": 155760 }, { "epoch": 1.3153194992717063, "grad_norm": 0.2576594650745392, "learning_rate": 3.1648705653050532e-06, "loss": 0.0076, "step": 155770 }, { "epoch": 1.31540393911887, "grad_norm": 0.3769018054008484, "learning_rate": 3.1641851298239634e-06, "loss": 0.0093, "step": 155780 }, { "epoch": 1.315488378966034, "grad_norm": 0.06102209538221359, "learning_rate": 3.1634997342163982e-06, "loss": 0.0033, "step": 155790 }, { "epoch": 1.3155728188131979, "grad_norm": 0.22305171191692352, "learning_rate": 3.162814378497241e-06, "loss": 0.0089, "step": 155800 }, { "epoch": 1.3156572586603619, "grad_norm": 0.4995258152484894, "learning_rate": 3.1621290626813803e-06, "loss": 0.008, "step": 155810 }, { "epoch": 1.3157416985075256, "grad_norm": 0.41846874356269836, "learning_rate": 3.1614437867837e-06, "loss": 0.0063, "step": 155820 }, { "epoch": 1.3158261383546896, "grad_norm": 0.07364000380039215, "learning_rate": 3.1607585508190818e-06, "loss": 0.0044, "step": 155830 }, { "epoch": 1.3159105782018534, "grad_norm": 0.1647711545228958, "learning_rate": 3.160073354802413e-06, "loss": 0.0063, "step": 155840 }, { "epoch": 1.3159950180490174, "grad_norm": 0.4591328501701355, "learning_rate": 3.159388198748573e-06, "loss": 0.0067, "step": 155850 }, { "epoch": 1.3160794578961812, "grad_norm": 0.18707963824272156, "learning_rate": 3.1587030826724454e-06, "loss": 0.0105, "step": 155860 }, { "epoch": 1.316163897743345, "grad_norm": 0.5133984684944153, "learning_rate": 3.1580180065889064e-06, "loss": 0.0059, "step": 155870 }, { "epoch": 1.316248337590509, "grad_norm": 0.8807622194290161, "learning_rate": 3.157332970512842e-06, "loss": 0.0049, "step": 155880 }, { "epoch": 1.316332777437673, "grad_norm": 0.6410484910011292, "learning_rate": 3.156647974459126e-06, "loss": 0.0056, "step": 155890 }, { "epoch": 1.3164172172848367, "grad_norm": 0.195358544588089, "learning_rate": 3.1559630184426398e-06, "loss": 0.0091, "step": 155900 }, { "epoch": 1.3165016571320005, "grad_norm": 0.23933301866054535, "learning_rate": 3.1552781024782565e-06, "loss": 0.0103, "step": 155910 }, { "epoch": 1.3165860969791645, "grad_norm": 0.34081733226776123, "learning_rate": 3.154593226580859e-06, "loss": 0.005, "step": 155920 }, { "epoch": 1.3166705368263283, "grad_norm": 0.2942364513874054, "learning_rate": 3.153908390765316e-06, "loss": 0.0101, "step": 155930 }, { "epoch": 1.3167549766734923, "grad_norm": 0.19021441042423248, "learning_rate": 3.153223595046503e-06, "loss": 0.0058, "step": 155940 }, { "epoch": 1.316839416520656, "grad_norm": 0.1346215158700943, "learning_rate": 3.1525388394392974e-06, "loss": 0.0085, "step": 155950 }, { "epoch": 1.31692385636782, "grad_norm": 0.10200509428977966, "learning_rate": 3.151854123958568e-06, "loss": 0.0084, "step": 155960 }, { "epoch": 1.3170082962149838, "grad_norm": 0.5411510467529297, "learning_rate": 3.15116944861919e-06, "loss": 0.0106, "step": 155970 }, { "epoch": 1.3170927360621478, "grad_norm": 0.1723555028438568, "learning_rate": 3.15048481343603e-06, "loss": 0.0077, "step": 155980 }, { "epoch": 1.3171771759093116, "grad_norm": 0.24055597186088562, "learning_rate": 3.1498002184239633e-06, "loss": 0.0056, "step": 155990 }, { "epoch": 1.3172616157564754, "grad_norm": 0.28295546770095825, "learning_rate": 3.149115663597856e-06, "loss": 0.009, "step": 156000 }, { "epoch": 1.3173460556036394, "grad_norm": 0.11490803956985474, "learning_rate": 3.1484311489725773e-06, "loss": 0.0039, "step": 156010 }, { "epoch": 1.3174304954508032, "grad_norm": 0.5306759476661682, "learning_rate": 3.1477466745629936e-06, "loss": 0.0059, "step": 156020 }, { "epoch": 1.3175149352979671, "grad_norm": 0.3730834722518921, "learning_rate": 3.147062240383974e-06, "loss": 0.0056, "step": 156030 }, { "epoch": 1.317599375145131, "grad_norm": 0.2556738555431366, "learning_rate": 3.1463778464503818e-06, "loss": 0.0049, "step": 156040 }, { "epoch": 1.317683814992295, "grad_norm": 0.6907045245170593, "learning_rate": 3.1456934927770845e-06, "loss": 0.0063, "step": 156050 }, { "epoch": 1.3177682548394587, "grad_norm": 0.3087150752544403, "learning_rate": 3.145009179378944e-06, "loss": 0.0078, "step": 156060 }, { "epoch": 1.3178526946866227, "grad_norm": 0.16107434034347534, "learning_rate": 3.144324906270822e-06, "loss": 0.0065, "step": 156070 }, { "epoch": 1.3179371345337865, "grad_norm": 0.19465690851211548, "learning_rate": 3.143640673467585e-06, "loss": 0.0046, "step": 156080 }, { "epoch": 1.3180215743809502, "grad_norm": 0.35147303342819214, "learning_rate": 3.142956480984091e-06, "loss": 0.0062, "step": 156090 }, { "epoch": 1.3181060142281142, "grad_norm": 0.1841423511505127, "learning_rate": 3.1422723288352032e-06, "loss": 0.0102, "step": 156100 }, { "epoch": 1.3181904540752782, "grad_norm": 0.42554354667663574, "learning_rate": 3.141588217035777e-06, "loss": 0.005, "step": 156110 }, { "epoch": 1.318274893922442, "grad_norm": 0.1913246065378189, "learning_rate": 3.140904145600676e-06, "loss": 0.0045, "step": 156120 }, { "epoch": 1.3183593337696058, "grad_norm": 0.058759067207574844, "learning_rate": 3.140220114544755e-06, "loss": 0.0075, "step": 156130 }, { "epoch": 1.3184437736167698, "grad_norm": 0.3913937211036682, "learning_rate": 3.1395361238828735e-06, "loss": 0.0084, "step": 156140 }, { "epoch": 1.3185282134639336, "grad_norm": 0.31269747018814087, "learning_rate": 3.138852173629884e-06, "loss": 0.0069, "step": 156150 }, { "epoch": 1.3186126533110976, "grad_norm": 0.2623676359653473, "learning_rate": 3.1381682638006457e-06, "loss": 0.006, "step": 156160 }, { "epoch": 1.3186970931582613, "grad_norm": 0.37796828150749207, "learning_rate": 3.137484394410011e-06, "loss": 0.0186, "step": 156170 }, { "epoch": 1.3187815330054253, "grad_norm": 0.16296449303627014, "learning_rate": 3.136800565472833e-06, "loss": 0.0041, "step": 156180 }, { "epoch": 1.318865972852589, "grad_norm": 0.29622283577919006, "learning_rate": 3.136116777003966e-06, "loss": 0.0074, "step": 156190 }, { "epoch": 1.318950412699753, "grad_norm": 0.17077766358852386, "learning_rate": 3.135433029018259e-06, "loss": 0.0065, "step": 156200 }, { "epoch": 1.3190348525469169, "grad_norm": 0.4117179811000824, "learning_rate": 3.1347493215305657e-06, "loss": 0.007, "step": 156210 }, { "epoch": 1.3191192923940807, "grad_norm": 0.4767002761363983, "learning_rate": 3.1340656545557336e-06, "loss": 0.0128, "step": 156220 }, { "epoch": 1.3192037322412447, "grad_norm": 0.20567987859249115, "learning_rate": 3.1333820281086146e-06, "loss": 0.0049, "step": 156230 }, { "epoch": 1.3192881720884087, "grad_norm": 0.2742098569869995, "learning_rate": 3.132698442204054e-06, "loss": 0.0069, "step": 156240 }, { "epoch": 1.3193726119355724, "grad_norm": 0.18812677264213562, "learning_rate": 3.1320148968569027e-06, "loss": 0.0028, "step": 156250 }, { "epoch": 1.3194570517827362, "grad_norm": 0.15537795424461365, "learning_rate": 3.131331392082002e-06, "loss": 0.0059, "step": 156260 }, { "epoch": 1.3195414916299002, "grad_norm": 0.4893494248390198, "learning_rate": 3.130647927894203e-06, "loss": 0.0125, "step": 156270 }, { "epoch": 1.319625931477064, "grad_norm": 0.23673558235168457, "learning_rate": 3.129964504308348e-06, "loss": 0.0052, "step": 156280 }, { "epoch": 1.319710371324228, "grad_norm": 0.04147329926490784, "learning_rate": 3.1292811213392798e-06, "loss": 0.0087, "step": 156290 }, { "epoch": 1.3197948111713917, "grad_norm": 0.25577986240386963, "learning_rate": 3.128597779001843e-06, "loss": 0.0043, "step": 156300 }, { "epoch": 1.3198792510185555, "grad_norm": 0.37486881017684937, "learning_rate": 3.1279144773108772e-06, "loss": 0.008, "step": 156310 }, { "epoch": 1.3199636908657195, "grad_norm": 0.08663606643676758, "learning_rate": 3.1272312162812273e-06, "loss": 0.0048, "step": 156320 }, { "epoch": 1.3200481307128835, "grad_norm": 0.007264552637934685, "learning_rate": 3.126547995927731e-06, "loss": 0.0217, "step": 156330 }, { "epoch": 1.3201325705600473, "grad_norm": 0.11835575103759766, "learning_rate": 3.1258648162652283e-06, "loss": 0.008, "step": 156340 }, { "epoch": 1.320217010407211, "grad_norm": 0.270806223154068, "learning_rate": 3.1251816773085564e-06, "loss": 0.0064, "step": 156350 }, { "epoch": 1.320301450254375, "grad_norm": 0.03300707787275314, "learning_rate": 3.1244985790725564e-06, "loss": 0.0049, "step": 156360 }, { "epoch": 1.3203858901015388, "grad_norm": 0.17940939962863922, "learning_rate": 3.123815521572062e-06, "loss": 0.0115, "step": 156370 }, { "epoch": 1.3204703299487028, "grad_norm": 0.6664982438087463, "learning_rate": 3.1231325048219108e-06, "loss": 0.0089, "step": 156380 }, { "epoch": 1.3205547697958666, "grad_norm": 0.08067009598016739, "learning_rate": 3.1224495288369342e-06, "loss": 0.0081, "step": 156390 }, { "epoch": 1.3206392096430306, "grad_norm": 0.27714163064956665, "learning_rate": 3.121766593631972e-06, "loss": 0.0103, "step": 156400 }, { "epoch": 1.3207236494901944, "grad_norm": 0.19057418406009674, "learning_rate": 3.1210836992218548e-06, "loss": 0.0092, "step": 156410 }, { "epoch": 1.3208080893373584, "grad_norm": 0.1632154881954193, "learning_rate": 3.120400845621413e-06, "loss": 0.0079, "step": 156420 }, { "epoch": 1.3208925291845222, "grad_norm": 0.033112023025751114, "learning_rate": 3.1197180328454805e-06, "loss": 0.0075, "step": 156430 }, { "epoch": 1.320976969031686, "grad_norm": 0.4042030870914459, "learning_rate": 3.119035260908886e-06, "loss": 0.0066, "step": 156440 }, { "epoch": 1.32106140887885, "grad_norm": 0.02323083020746708, "learning_rate": 3.118352529826461e-06, "loss": 0.0048, "step": 156450 }, { "epoch": 1.321145848726014, "grad_norm": 0.1688380241394043, "learning_rate": 3.1176698396130335e-06, "loss": 0.0039, "step": 156460 }, { "epoch": 1.3212302885731777, "grad_norm": 0.15947356820106506, "learning_rate": 3.1169871902834315e-06, "loss": 0.0088, "step": 156470 }, { "epoch": 1.3213147284203415, "grad_norm": 0.26118016242980957, "learning_rate": 3.11630458185248e-06, "loss": 0.0058, "step": 156480 }, { "epoch": 1.3213991682675055, "grad_norm": 0.13915617763996124, "learning_rate": 3.1156220143350103e-06, "loss": 0.0102, "step": 156490 }, { "epoch": 1.3214836081146693, "grad_norm": 0.2492266446352005, "learning_rate": 3.114939487745841e-06, "loss": 0.0042, "step": 156500 }, { "epoch": 1.3215680479618332, "grad_norm": 0.17999723553657532, "learning_rate": 3.114257002099802e-06, "loss": 0.0073, "step": 156510 }, { "epoch": 1.321652487808997, "grad_norm": 0.09303902089595795, "learning_rate": 3.113574557411715e-06, "loss": 0.0046, "step": 156520 }, { "epoch": 1.321736927656161, "grad_norm": 0.37454062700271606, "learning_rate": 3.1128921536963997e-06, "loss": 0.0096, "step": 156530 }, { "epoch": 1.3218213675033248, "grad_norm": 0.15859204530715942, "learning_rate": 3.1122097909686823e-06, "loss": 0.0113, "step": 156540 }, { "epoch": 1.3219058073504888, "grad_norm": 0.1993405520915985, "learning_rate": 3.111527469243379e-06, "loss": 0.005, "step": 156550 }, { "epoch": 1.3219902471976526, "grad_norm": 0.48153501749038696, "learning_rate": 3.110845188535314e-06, "loss": 0.0077, "step": 156560 }, { "epoch": 1.3220746870448163, "grad_norm": 0.15815672278404236, "learning_rate": 3.1101629488593032e-06, "loss": 0.0058, "step": 156570 }, { "epoch": 1.3221591268919803, "grad_norm": 0.1924344003200531, "learning_rate": 3.109480750230167e-06, "loss": 0.0063, "step": 156580 }, { "epoch": 1.3222435667391443, "grad_norm": 0.2114124596118927, "learning_rate": 3.1087985926627196e-06, "loss": 0.0116, "step": 156590 }, { "epoch": 1.3223280065863081, "grad_norm": 0.2597401440143585, "learning_rate": 3.108116476171782e-06, "loss": 0.0046, "step": 156600 }, { "epoch": 1.3224124464334719, "grad_norm": 0.059961993247270584, "learning_rate": 3.1074344007721645e-06, "loss": 0.0056, "step": 156610 }, { "epoch": 1.3224968862806359, "grad_norm": 0.11778008192777634, "learning_rate": 3.1067523664786857e-06, "loss": 0.0054, "step": 156620 }, { "epoch": 1.3225813261277997, "grad_norm": 0.5139746069908142, "learning_rate": 3.106070373306155e-06, "loss": 0.0081, "step": 156630 }, { "epoch": 1.3226657659749637, "grad_norm": 0.8222165703773499, "learning_rate": 3.105388421269391e-06, "loss": 0.0073, "step": 156640 }, { "epoch": 1.3227502058221274, "grad_norm": 0.48811131715774536, "learning_rate": 3.104706510383201e-06, "loss": 0.0091, "step": 156650 }, { "epoch": 1.3228346456692912, "grad_norm": 0.09317121654748917, "learning_rate": 3.1040246406623966e-06, "loss": 0.0058, "step": 156660 }, { "epoch": 1.3229190855164552, "grad_norm": 0.2901712954044342, "learning_rate": 3.1033428121217897e-06, "loss": 0.0073, "step": 156670 }, { "epoch": 1.3230035253636192, "grad_norm": 0.10804186761379242, "learning_rate": 3.102661024776186e-06, "loss": 0.0061, "step": 156680 }, { "epoch": 1.323087965210783, "grad_norm": 0.17545096576213837, "learning_rate": 3.101979278640398e-06, "loss": 0.0082, "step": 156690 }, { "epoch": 1.3231724050579468, "grad_norm": 0.1607530564069748, "learning_rate": 3.10129757372923e-06, "loss": 0.0108, "step": 156700 }, { "epoch": 1.3232568449051108, "grad_norm": 0.38539519906044006, "learning_rate": 3.1006159100574906e-06, "loss": 0.0054, "step": 156710 }, { "epoch": 1.3233412847522745, "grad_norm": 0.2470243275165558, "learning_rate": 3.099934287639983e-06, "loss": 0.0067, "step": 156720 }, { "epoch": 1.3234257245994385, "grad_norm": 0.3460536301136017, "learning_rate": 3.0992527064915145e-06, "loss": 0.0039, "step": 156730 }, { "epoch": 1.3235101644466023, "grad_norm": 0.3169253170490265, "learning_rate": 3.0985711666268875e-06, "loss": 0.0077, "step": 156740 }, { "epoch": 1.3235946042937663, "grad_norm": 1.1206111907958984, "learning_rate": 3.097889668060906e-06, "loss": 0.0087, "step": 156750 }, { "epoch": 1.32367904414093, "grad_norm": 0.4091480076313019, "learning_rate": 3.0972082108083707e-06, "loss": 0.0066, "step": 156760 }, { "epoch": 1.323763483988094, "grad_norm": 0.37709030508995056, "learning_rate": 3.0965267948840817e-06, "loss": 0.0068, "step": 156770 }, { "epoch": 1.3238479238352578, "grad_norm": 0.15171988308429718, "learning_rate": 3.095845420302843e-06, "loss": 0.0078, "step": 156780 }, { "epoch": 1.3239323636824216, "grad_norm": 0.14015722274780273, "learning_rate": 3.0951640870794486e-06, "loss": 0.0091, "step": 156790 }, { "epoch": 1.3240168035295856, "grad_norm": 0.22528041899204254, "learning_rate": 3.094482795228703e-06, "loss": 0.0055, "step": 156800 }, { "epoch": 1.3241012433767496, "grad_norm": 0.23549267649650574, "learning_rate": 3.093801544765398e-06, "loss": 0.0032, "step": 156810 }, { "epoch": 1.3241856832239134, "grad_norm": 0.043096207082271576, "learning_rate": 3.093120335704335e-06, "loss": 0.0057, "step": 156820 }, { "epoch": 1.3242701230710772, "grad_norm": 0.21381884813308716, "learning_rate": 3.092439168060305e-06, "loss": 0.0116, "step": 156830 }, { "epoch": 1.3243545629182412, "grad_norm": 0.22228290140628815, "learning_rate": 3.0917580418481073e-06, "loss": 0.0048, "step": 156840 }, { "epoch": 1.324439002765405, "grad_norm": 0.17837265133857727, "learning_rate": 3.0910769570825333e-06, "loss": 0.008, "step": 156850 }, { "epoch": 1.324523442612569, "grad_norm": 0.30994683504104614, "learning_rate": 3.0903959137783775e-06, "loss": 0.0042, "step": 156860 }, { "epoch": 1.3246078824597327, "grad_norm": 0.30779266357421875, "learning_rate": 3.089714911950431e-06, "loss": 0.0028, "step": 156870 }, { "epoch": 1.3246923223068965, "grad_norm": 0.43743038177490234, "learning_rate": 3.089033951613483e-06, "loss": 0.0089, "step": 156880 }, { "epoch": 1.3247767621540605, "grad_norm": 0.26109814643859863, "learning_rate": 3.0883530327823288e-06, "loss": 0.0124, "step": 156890 }, { "epoch": 1.3248612020012245, "grad_norm": 0.0100742531940341, "learning_rate": 3.0876721554717525e-06, "loss": 0.0066, "step": 156900 }, { "epoch": 1.3249456418483883, "grad_norm": 0.267590194940567, "learning_rate": 3.086991319696547e-06, "loss": 0.0048, "step": 156910 }, { "epoch": 1.325030081695552, "grad_norm": 0.13041728734970093, "learning_rate": 3.0863105254714966e-06, "loss": 0.0054, "step": 156920 }, { "epoch": 1.325114521542716, "grad_norm": 0.31478604674339294, "learning_rate": 3.0856297728113904e-06, "loss": 0.0074, "step": 156930 }, { "epoch": 1.3251989613898798, "grad_norm": 0.15076926350593567, "learning_rate": 3.084949061731013e-06, "loss": 0.0077, "step": 156940 }, { "epoch": 1.3252834012370438, "grad_norm": 0.0019210012396797538, "learning_rate": 3.0842683922451504e-06, "loss": 0.005, "step": 156950 }, { "epoch": 1.3253678410842076, "grad_norm": 0.013366332277655602, "learning_rate": 3.0835877643685835e-06, "loss": 0.0075, "step": 156960 }, { "epoch": 1.3254522809313716, "grad_norm": 0.29096123576164246, "learning_rate": 3.0829071781161e-06, "loss": 0.0069, "step": 156970 }, { "epoch": 1.3255367207785353, "grad_norm": 0.26829993724823, "learning_rate": 3.082226633502479e-06, "loss": 0.0116, "step": 156980 }, { "epoch": 1.3256211606256993, "grad_norm": 0.38340893387794495, "learning_rate": 3.081546130542502e-06, "loss": 0.0097, "step": 156990 }, { "epoch": 1.3257056004728631, "grad_norm": 0.36715760827064514, "learning_rate": 3.0808656692509516e-06, "loss": 0.0132, "step": 157000 }, { "epoch": 1.325790040320027, "grad_norm": 0.2437056601047516, "learning_rate": 3.0801852496426027e-06, "loss": 0.0084, "step": 157010 }, { "epoch": 1.325874480167191, "grad_norm": 0.11589761078357697, "learning_rate": 3.079504871732239e-06, "loss": 0.0028, "step": 157020 }, { "epoch": 1.325958920014355, "grad_norm": 0.04852794110774994, "learning_rate": 3.0788245355346347e-06, "loss": 0.0086, "step": 157030 }, { "epoch": 1.3260433598615187, "grad_norm": 0.3362716734409332, "learning_rate": 3.078144241064569e-06, "loss": 0.0051, "step": 157040 }, { "epoch": 1.3261277997086824, "grad_norm": 0.008497104980051517, "learning_rate": 3.077463988336815e-06, "loss": 0.0141, "step": 157050 }, { "epoch": 1.3262122395558464, "grad_norm": 0.6391342878341675, "learning_rate": 3.0767837773661506e-06, "loss": 0.0076, "step": 157060 }, { "epoch": 1.3262966794030102, "grad_norm": 0.1456356644630432, "learning_rate": 3.076103608167348e-06, "loss": 0.004, "step": 157070 }, { "epoch": 1.3263811192501742, "grad_norm": 0.020662715658545494, "learning_rate": 3.0754234807551814e-06, "loss": 0.0097, "step": 157080 }, { "epoch": 1.326465559097338, "grad_norm": 0.10892368108034134, "learning_rate": 3.0747433951444208e-06, "loss": 0.0079, "step": 157090 }, { "epoch": 1.326549998944502, "grad_norm": 0.1942809373140335, "learning_rate": 3.074063351349841e-06, "loss": 0.0077, "step": 157100 }, { "epoch": 1.3266344387916658, "grad_norm": 0.13471584022045135, "learning_rate": 3.0733833493862107e-06, "loss": 0.0054, "step": 157110 }, { "epoch": 1.3267188786388298, "grad_norm": 0.20490798354148865, "learning_rate": 3.0727033892682966e-06, "loss": 0.0056, "step": 157120 }, { "epoch": 1.3268033184859935, "grad_norm": 0.1686171442270279, "learning_rate": 3.0720234710108726e-06, "loss": 0.0041, "step": 157130 }, { "epoch": 1.3268877583331573, "grad_norm": 0.25692474842071533, "learning_rate": 3.0713435946287023e-06, "loss": 0.0042, "step": 157140 }, { "epoch": 1.3269721981803213, "grad_norm": 0.16732154786586761, "learning_rate": 3.070663760136555e-06, "loss": 0.0059, "step": 157150 }, { "epoch": 1.3270566380274853, "grad_norm": 0.2259076088666916, "learning_rate": 3.069983967549194e-06, "loss": 0.0061, "step": 157160 }, { "epoch": 1.327141077874649, "grad_norm": 0.1851896196603775, "learning_rate": 3.069304216881387e-06, "loss": 0.0096, "step": 157170 }, { "epoch": 1.3272255177218129, "grad_norm": 0.2515014410018921, "learning_rate": 3.068624508147896e-06, "loss": 0.0058, "step": 157180 }, { "epoch": 1.3273099575689769, "grad_norm": 0.5209041833877563, "learning_rate": 3.0679448413634854e-06, "loss": 0.0102, "step": 157190 }, { "epoch": 1.3273943974161406, "grad_norm": 0.3829008638858795, "learning_rate": 3.067265216542916e-06, "loss": 0.0079, "step": 157200 }, { "epoch": 1.3274788372633046, "grad_norm": 0.2817947566509247, "learning_rate": 3.0665856337009514e-06, "loss": 0.0074, "step": 157210 }, { "epoch": 1.3275632771104684, "grad_norm": 0.48847493529319763, "learning_rate": 3.065906092852349e-06, "loss": 0.0068, "step": 157220 }, { "epoch": 1.3276477169576322, "grad_norm": 0.1429738849401474, "learning_rate": 3.065226594011872e-06, "loss": 0.0043, "step": 157230 }, { "epoch": 1.3277321568047962, "grad_norm": 0.01754039153456688, "learning_rate": 3.0645471371942766e-06, "loss": 0.0044, "step": 157240 }, { "epoch": 1.3278165966519602, "grad_norm": 0.22717608511447906, "learning_rate": 3.0638677224143186e-06, "loss": 0.0095, "step": 157250 }, { "epoch": 1.327901036499124, "grad_norm": 0.14194004237651825, "learning_rate": 3.063188349686759e-06, "loss": 0.0056, "step": 157260 }, { "epoch": 1.3279854763462877, "grad_norm": 0.1337825059890747, "learning_rate": 3.0625090190263505e-06, "loss": 0.0088, "step": 157270 }, { "epoch": 1.3280699161934517, "grad_norm": 0.11614122241735458, "learning_rate": 3.06182973044785e-06, "loss": 0.0051, "step": 157280 }, { "epoch": 1.3281543560406155, "grad_norm": 0.050746556371450424, "learning_rate": 3.061150483966009e-06, "loss": 0.005, "step": 157290 }, { "epoch": 1.3282387958877795, "grad_norm": 0.16703417897224426, "learning_rate": 3.060471279595584e-06, "loss": 0.0083, "step": 157300 }, { "epoch": 1.3283232357349433, "grad_norm": 0.35431739687919617, "learning_rate": 3.059792117351324e-06, "loss": 0.0067, "step": 157310 }, { "epoch": 1.3284076755821073, "grad_norm": 0.011628890410065651, "learning_rate": 3.0591129972479825e-06, "loss": 0.0072, "step": 157320 }, { "epoch": 1.328492115429271, "grad_norm": 0.11882951110601425, "learning_rate": 3.0584339193003076e-06, "loss": 0.0065, "step": 157330 }, { "epoch": 1.328576555276435, "grad_norm": 0.26541879773139954, "learning_rate": 3.057754883523052e-06, "loss": 0.0052, "step": 157340 }, { "epoch": 1.3286609951235988, "grad_norm": 0.4438679814338684, "learning_rate": 3.057075889930962e-06, "loss": 0.0098, "step": 157350 }, { "epoch": 1.3287454349707626, "grad_norm": 0.08005552738904953, "learning_rate": 3.0563969385387847e-06, "loss": 0.0124, "step": 157360 }, { "epoch": 1.3288298748179266, "grad_norm": 0.23254182934761047, "learning_rate": 3.0557180293612688e-06, "loss": 0.0084, "step": 157370 }, { "epoch": 1.3289143146650906, "grad_norm": 0.23694434762001038, "learning_rate": 3.055039162413157e-06, "loss": 0.0053, "step": 157380 }, { "epoch": 1.3289987545122544, "grad_norm": 0.1165727972984314, "learning_rate": 3.0543603377091986e-06, "loss": 0.0038, "step": 157390 }, { "epoch": 1.3290831943594181, "grad_norm": 0.5421671271324158, "learning_rate": 3.0536815552641318e-06, "loss": 0.0095, "step": 157400 }, { "epoch": 1.3291676342065821, "grad_norm": 0.14705121517181396, "learning_rate": 3.0530028150927048e-06, "loss": 0.0053, "step": 157410 }, { "epoch": 1.329252074053746, "grad_norm": 0.1734248548746109, "learning_rate": 3.0523241172096573e-06, "loss": 0.0055, "step": 157420 }, { "epoch": 1.32933651390091, "grad_norm": 0.5399754643440247, "learning_rate": 3.051645461629732e-06, "loss": 0.0104, "step": 157430 }, { "epoch": 1.3294209537480737, "grad_norm": 0.1784331500530243, "learning_rate": 3.0509668483676654e-06, "loss": 0.0115, "step": 157440 }, { "epoch": 1.3295053935952377, "grad_norm": 0.010587356053292751, "learning_rate": 3.0502882774382015e-06, "loss": 0.0074, "step": 157450 }, { "epoch": 1.3295898334424014, "grad_norm": 0.3195880055427551, "learning_rate": 3.0496097488560765e-06, "loss": 0.0082, "step": 157460 }, { "epoch": 1.3296742732895654, "grad_norm": 0.3901365101337433, "learning_rate": 3.048931262636028e-06, "loss": 0.0086, "step": 157470 }, { "epoch": 1.3297587131367292, "grad_norm": 0.39318132400512695, "learning_rate": 3.048252818792793e-06, "loss": 0.0072, "step": 157480 }, { "epoch": 1.329843152983893, "grad_norm": 0.24139146506786346, "learning_rate": 3.047574417341105e-06, "loss": 0.005, "step": 157490 }, { "epoch": 1.329927592831057, "grad_norm": 0.3748851418495178, "learning_rate": 3.046896058295702e-06, "loss": 0.0065, "step": 157500 }, { "epoch": 1.330012032678221, "grad_norm": 0.2826632261276245, "learning_rate": 3.0462177416713156e-06, "loss": 0.0052, "step": 157510 }, { "epoch": 1.3300964725253848, "grad_norm": 0.5188543200492859, "learning_rate": 3.045539467482681e-06, "loss": 0.0121, "step": 157520 }, { "epoch": 1.3301809123725485, "grad_norm": 0.2936588525772095, "learning_rate": 3.0448612357445262e-06, "loss": 0.0112, "step": 157530 }, { "epoch": 1.3302653522197125, "grad_norm": 1.0577304363250732, "learning_rate": 3.044183046471587e-06, "loss": 0.0081, "step": 157540 }, { "epoch": 1.3303497920668763, "grad_norm": 0.1856907308101654, "learning_rate": 3.0435048996785897e-06, "loss": 0.006, "step": 157550 }, { "epoch": 1.3304342319140403, "grad_norm": 0.0004167277365922928, "learning_rate": 3.042826795380267e-06, "loss": 0.0068, "step": 157560 }, { "epoch": 1.330518671761204, "grad_norm": 0.04454129934310913, "learning_rate": 3.0421487335913426e-06, "loss": 0.003, "step": 157570 }, { "epoch": 1.3306031116083679, "grad_norm": 0.33208155632019043, "learning_rate": 3.041470714326549e-06, "loss": 0.0053, "step": 157580 }, { "epoch": 1.3306875514555319, "grad_norm": 0.45630738139152527, "learning_rate": 3.040792737600609e-06, "loss": 0.0136, "step": 157590 }, { "epoch": 1.3307719913026959, "grad_norm": 0.2710637152194977, "learning_rate": 3.040114803428249e-06, "loss": 0.0057, "step": 157600 }, { "epoch": 1.3308564311498596, "grad_norm": 0.37237977981567383, "learning_rate": 3.039436911824195e-06, "loss": 0.0077, "step": 157610 }, { "epoch": 1.3309408709970234, "grad_norm": 0.04414491727948189, "learning_rate": 3.038759062803167e-06, "loss": 0.0046, "step": 157620 }, { "epoch": 1.3310253108441874, "grad_norm": 0.2095838189125061, "learning_rate": 3.0380812563798934e-06, "loss": 0.0051, "step": 157630 }, { "epoch": 1.3311097506913512, "grad_norm": 0.08332831412553787, "learning_rate": 3.037403492569091e-06, "loss": 0.0101, "step": 157640 }, { "epoch": 1.3311941905385152, "grad_norm": 0.33259090781211853, "learning_rate": 3.0367257713854835e-06, "loss": 0.0048, "step": 157650 }, { "epoch": 1.331278630385679, "grad_norm": 0.2721911072731018, "learning_rate": 3.036048092843788e-06, "loss": 0.0124, "step": 157660 }, { "epoch": 1.331363070232843, "grad_norm": 0.3905082941055298, "learning_rate": 3.035370456958727e-06, "loss": 0.005, "step": 157670 }, { "epoch": 1.3314475100800067, "grad_norm": 0.027445640414953232, "learning_rate": 3.0346928637450176e-06, "loss": 0.0047, "step": 157680 }, { "epoch": 1.3315319499271707, "grad_norm": 0.16168443858623505, "learning_rate": 3.0340153132173767e-06, "loss": 0.0084, "step": 157690 }, { "epoch": 1.3316163897743345, "grad_norm": 0.05239274725317955, "learning_rate": 3.0333378053905195e-06, "loss": 0.0089, "step": 157700 }, { "epoch": 1.3317008296214983, "grad_norm": 0.24860356748104095, "learning_rate": 3.032660340279162e-06, "loss": 0.005, "step": 157710 }, { "epoch": 1.3317852694686623, "grad_norm": 0.01971426047384739, "learning_rate": 3.03198291789802e-06, "loss": 0.0115, "step": 157720 }, { "epoch": 1.3318697093158263, "grad_norm": 0.5200756788253784, "learning_rate": 3.0313055382618027e-06, "loss": 0.0174, "step": 157730 }, { "epoch": 1.33195414916299, "grad_norm": 0.6821765899658203, "learning_rate": 3.0306282013852284e-06, "loss": 0.0077, "step": 157740 }, { "epoch": 1.3320385890101538, "grad_norm": 0.36001530289649963, "learning_rate": 3.0299509072830053e-06, "loss": 0.0071, "step": 157750 }, { "epoch": 1.3321230288573178, "grad_norm": 0.11105340719223022, "learning_rate": 3.0292736559698453e-06, "loss": 0.0067, "step": 157760 }, { "epoch": 1.3322074687044816, "grad_norm": 0.20771022140979767, "learning_rate": 3.0285964474604556e-06, "loss": 0.0095, "step": 157770 }, { "epoch": 1.3322919085516456, "grad_norm": 0.1549716740846634, "learning_rate": 3.027919281769549e-06, "loss": 0.0091, "step": 157780 }, { "epoch": 1.3323763483988094, "grad_norm": 0.14057858288288116, "learning_rate": 3.0272421589118312e-06, "loss": 0.0056, "step": 157790 }, { "epoch": 1.3324607882459731, "grad_norm": 0.32829833030700684, "learning_rate": 3.0265650789020097e-06, "loss": 0.0061, "step": 157800 }, { "epoch": 1.3325452280931371, "grad_norm": 0.16073815524578094, "learning_rate": 3.0258880417547888e-06, "loss": 0.0062, "step": 157810 }, { "epoch": 1.3326296679403011, "grad_norm": 0.11770939081907272, "learning_rate": 3.0252110474848766e-06, "loss": 0.0094, "step": 157820 }, { "epoch": 1.332714107787465, "grad_norm": 0.050907425582408905, "learning_rate": 3.0245340961069765e-06, "loss": 0.0045, "step": 157830 }, { "epoch": 1.3327985476346287, "grad_norm": 0.5512654781341553, "learning_rate": 3.023857187635789e-06, "loss": 0.0086, "step": 157840 }, { "epoch": 1.3328829874817927, "grad_norm": 0.00623352313414216, "learning_rate": 3.02318032208602e-06, "loss": 0.007, "step": 157850 }, { "epoch": 1.3329674273289565, "grad_norm": 0.27041390538215637, "learning_rate": 3.0225034994723678e-06, "loss": 0.0104, "step": 157860 }, { "epoch": 1.3330518671761205, "grad_norm": 0.06836773455142975, "learning_rate": 3.0218267198095357e-06, "loss": 0.0112, "step": 157870 }, { "epoch": 1.3331363070232842, "grad_norm": 0.3203120827674866, "learning_rate": 3.0211499831122208e-06, "loss": 0.007, "step": 157880 }, { "epoch": 1.3332207468704482, "grad_norm": 0.17754186689853668, "learning_rate": 3.0204732893951245e-06, "loss": 0.0065, "step": 157890 }, { "epoch": 1.333305186717612, "grad_norm": 0.5077663064002991, "learning_rate": 3.01979663867294e-06, "loss": 0.0063, "step": 157900 }, { "epoch": 1.333389626564776, "grad_norm": 0.13749390840530396, "learning_rate": 3.0191200309603686e-06, "loss": 0.0073, "step": 157910 }, { "epoch": 1.3334740664119398, "grad_norm": 0.3392031192779541, "learning_rate": 3.018443466272104e-06, "loss": 0.0047, "step": 157920 }, { "epoch": 1.3335585062591035, "grad_norm": 0.2074122279882431, "learning_rate": 3.017766944622842e-06, "loss": 0.009, "step": 157930 }, { "epoch": 1.3336429461062675, "grad_norm": 0.042642757296562195, "learning_rate": 3.0170904660272755e-06, "loss": 0.0067, "step": 157940 }, { "epoch": 1.3337273859534315, "grad_norm": 0.6412942409515381, "learning_rate": 3.0164140305000953e-06, "loss": 0.0089, "step": 157950 }, { "epoch": 1.3338118258005953, "grad_norm": 0.05036015808582306, "learning_rate": 3.0157376380559988e-06, "loss": 0.0056, "step": 157960 }, { "epoch": 1.333896265647759, "grad_norm": 0.31672197580337524, "learning_rate": 3.015061288709673e-06, "loss": 0.0069, "step": 157970 }, { "epoch": 1.333980705494923, "grad_norm": 0.15842802822589874, "learning_rate": 3.01438498247581e-06, "loss": 0.0031, "step": 157980 }, { "epoch": 1.3340651453420869, "grad_norm": 0.25499093532562256, "learning_rate": 3.0137087193690963e-06, "loss": 0.0113, "step": 157990 }, { "epoch": 1.3341495851892509, "grad_norm": 0.10733766853809357, "learning_rate": 3.013032499404225e-06, "loss": 0.0059, "step": 158000 }, { "epoch": 1.3342340250364146, "grad_norm": 0.30202072858810425, "learning_rate": 3.012356322595878e-06, "loss": 0.0099, "step": 158010 }, { "epoch": 1.3343184648835786, "grad_norm": 0.01631603203713894, "learning_rate": 3.011680188958746e-06, "loss": 0.008, "step": 158020 }, { "epoch": 1.3344029047307424, "grad_norm": 0.031035898253321648, "learning_rate": 3.0110040985075124e-06, "loss": 0.0052, "step": 158030 }, { "epoch": 1.3344873445779064, "grad_norm": 0.14227618277072906, "learning_rate": 3.0103280512568627e-06, "loss": 0.0028, "step": 158040 }, { "epoch": 1.3345717844250702, "grad_norm": 0.32483237981796265, "learning_rate": 3.0096520472214783e-06, "loss": 0.0076, "step": 158050 }, { "epoch": 1.334656224272234, "grad_norm": 0.22719787061214447, "learning_rate": 3.0089760864160454e-06, "loss": 0.0059, "step": 158060 }, { "epoch": 1.334740664119398, "grad_norm": 0.6409861445426941, "learning_rate": 3.008300168855244e-06, "loss": 0.0096, "step": 158070 }, { "epoch": 1.334825103966562, "grad_norm": 0.2764381468296051, "learning_rate": 3.0076242945537538e-06, "loss": 0.0068, "step": 158080 }, { "epoch": 1.3349095438137257, "grad_norm": 0.7430617809295654, "learning_rate": 3.006948463526257e-06, "loss": 0.0069, "step": 158090 }, { "epoch": 1.3349939836608895, "grad_norm": 0.27612119913101196, "learning_rate": 3.0062726757874285e-06, "loss": 0.0054, "step": 158100 }, { "epoch": 1.3350784235080535, "grad_norm": 0.2641017436981201, "learning_rate": 3.0055969313519507e-06, "loss": 0.0093, "step": 158110 }, { "epoch": 1.3351628633552173, "grad_norm": 0.1936837136745453, "learning_rate": 3.004921230234499e-06, "loss": 0.0095, "step": 158120 }, { "epoch": 1.3352473032023813, "grad_norm": 0.3344701826572418, "learning_rate": 3.004245572449749e-06, "loss": 0.004, "step": 158130 }, { "epoch": 1.335331743049545, "grad_norm": 0.00931238941848278, "learning_rate": 3.003569958012375e-06, "loss": 0.0072, "step": 158140 }, { "epoch": 1.3354161828967088, "grad_norm": 0.11856964975595474, "learning_rate": 3.002894386937054e-06, "loss": 0.004, "step": 158150 }, { "epoch": 1.3355006227438728, "grad_norm": 0.26072779297828674, "learning_rate": 3.0022188592384573e-06, "loss": 0.0045, "step": 158160 }, { "epoch": 1.3355850625910368, "grad_norm": 0.41799765825271606, "learning_rate": 3.0015433749312585e-06, "loss": 0.0064, "step": 158170 }, { "epoch": 1.3356695024382006, "grad_norm": 0.4252939224243164, "learning_rate": 3.000867934030128e-06, "loss": 0.0051, "step": 158180 }, { "epoch": 1.3357539422853644, "grad_norm": 0.24797503650188446, "learning_rate": 3.0001925365497342e-06, "loss": 0.0117, "step": 158190 }, { "epoch": 1.3358383821325284, "grad_norm": 0.12288393825292587, "learning_rate": 2.9995171825047504e-06, "loss": 0.0077, "step": 158200 }, { "epoch": 1.3359228219796921, "grad_norm": 0.0456809401512146, "learning_rate": 2.998841871909843e-06, "loss": 0.005, "step": 158210 }, { "epoch": 1.3360072618268561, "grad_norm": 0.2430800050497055, "learning_rate": 2.99816660477968e-06, "loss": 0.0039, "step": 158220 }, { "epoch": 1.33609170167402, "grad_norm": 0.20094913244247437, "learning_rate": 2.9974913811289274e-06, "loss": 0.0103, "step": 158230 }, { "epoch": 1.336176141521184, "grad_norm": 0.07740311324596405, "learning_rate": 2.9968162009722534e-06, "loss": 0.0036, "step": 158240 }, { "epoch": 1.3362605813683477, "grad_norm": 0.24857214093208313, "learning_rate": 2.9961410643243195e-06, "loss": 0.0056, "step": 158250 }, { "epoch": 1.3363450212155117, "grad_norm": 0.2950950264930725, "learning_rate": 2.9954659711997925e-06, "loss": 0.0088, "step": 158260 }, { "epoch": 1.3364294610626755, "grad_norm": 0.3439927101135254, "learning_rate": 2.9947909216133316e-06, "loss": 0.0127, "step": 158270 }, { "epoch": 1.3365139009098392, "grad_norm": 0.2665746212005615, "learning_rate": 2.9941159155796027e-06, "loss": 0.007, "step": 158280 }, { "epoch": 1.3365983407570032, "grad_norm": 0.26391828060150146, "learning_rate": 2.9934409531132667e-06, "loss": 0.0062, "step": 158290 }, { "epoch": 1.3366827806041672, "grad_norm": 0.26593029499053955, "learning_rate": 2.9927660342289786e-06, "loss": 0.0028, "step": 158300 }, { "epoch": 1.336767220451331, "grad_norm": 1.092623233795166, "learning_rate": 2.992091158941403e-06, "loss": 0.0091, "step": 158310 }, { "epoch": 1.3368516602984948, "grad_norm": 0.060171619057655334, "learning_rate": 2.9914163272651953e-06, "loss": 0.0067, "step": 158320 }, { "epoch": 1.3369361001456588, "grad_norm": 0.36654132604599, "learning_rate": 2.9907415392150143e-06, "loss": 0.0063, "step": 158330 }, { "epoch": 1.3370205399928226, "grad_norm": 0.2065698355436325, "learning_rate": 2.9900667948055127e-06, "loss": 0.0087, "step": 158340 }, { "epoch": 1.3371049798399866, "grad_norm": 0.16297632455825806, "learning_rate": 2.989392094051351e-06, "loss": 0.0042, "step": 158350 }, { "epoch": 1.3371894196871503, "grad_norm": 0.3213680684566498, "learning_rate": 2.98871743696718e-06, "loss": 0.0056, "step": 158360 }, { "epoch": 1.3372738595343143, "grad_norm": 0.10579608380794525, "learning_rate": 2.9880428235676553e-06, "loss": 0.0068, "step": 158370 }, { "epoch": 1.337358299381478, "grad_norm": 0.20570364594459534, "learning_rate": 2.987368253867425e-06, "loss": 0.0074, "step": 158380 }, { "epoch": 1.337442739228642, "grad_norm": 0.28105059266090393, "learning_rate": 2.986693727881147e-06, "loss": 0.0023, "step": 158390 }, { "epoch": 1.3375271790758059, "grad_norm": 0.19321340322494507, "learning_rate": 2.986019245623467e-06, "loss": 0.0054, "step": 158400 }, { "epoch": 1.3376116189229696, "grad_norm": 0.16507354378700256, "learning_rate": 2.9853448071090377e-06, "loss": 0.0053, "step": 158410 }, { "epoch": 1.3376960587701336, "grad_norm": 0.24606475234031677, "learning_rate": 2.9846704123525065e-06, "loss": 0.007, "step": 158420 }, { "epoch": 1.3377804986172974, "grad_norm": 0.37138333916664124, "learning_rate": 2.9839960613685188e-06, "loss": 0.0046, "step": 158430 }, { "epoch": 1.3378649384644614, "grad_norm": 0.09074905514717102, "learning_rate": 2.983321754171725e-06, "loss": 0.0096, "step": 158440 }, { "epoch": 1.3379493783116252, "grad_norm": 0.41138729453086853, "learning_rate": 2.9826474907767696e-06, "loss": 0.0056, "step": 158450 }, { "epoch": 1.3380338181587892, "grad_norm": 0.0008455059723928571, "learning_rate": 2.9819732711982973e-06, "loss": 0.0111, "step": 158460 }, { "epoch": 1.338118258005953, "grad_norm": 0.32739678025245667, "learning_rate": 2.98129909545095e-06, "loss": 0.009, "step": 158470 }, { "epoch": 1.338202697853117, "grad_norm": 0.015014028176665306, "learning_rate": 2.9806249635493757e-06, "loss": 0.007, "step": 158480 }, { "epoch": 1.3382871377002807, "grad_norm": 0.2922045588493347, "learning_rate": 2.9799508755082113e-06, "loss": 0.007, "step": 158490 }, { "epoch": 1.3383715775474445, "grad_norm": 0.38634252548217773, "learning_rate": 2.9792768313421017e-06, "loss": 0.0055, "step": 158500 }, { "epoch": 1.3384560173946085, "grad_norm": 0.24947498738765717, "learning_rate": 2.9786028310656823e-06, "loss": 0.0132, "step": 158510 }, { "epoch": 1.3385404572417725, "grad_norm": 0.11222835630178452, "learning_rate": 2.9779288746935974e-06, "loss": 0.0084, "step": 158520 }, { "epoch": 1.3386248970889363, "grad_norm": 0.24697032570838928, "learning_rate": 2.977254962240484e-06, "loss": 0.0054, "step": 158530 }, { "epoch": 1.3387093369361, "grad_norm": 0.4421866834163666, "learning_rate": 2.9765810937209766e-06, "loss": 0.009, "step": 158540 }, { "epoch": 1.338793776783264, "grad_norm": 0.3179864287376404, "learning_rate": 2.9759072691497137e-06, "loss": 0.0075, "step": 158550 }, { "epoch": 1.3388782166304278, "grad_norm": 0.5539879202842712, "learning_rate": 2.9752334885413293e-06, "loss": 0.0073, "step": 158560 }, { "epoch": 1.3389626564775918, "grad_norm": 0.48517754673957825, "learning_rate": 2.9745597519104597e-06, "loss": 0.0079, "step": 158570 }, { "epoch": 1.3390470963247556, "grad_norm": 0.2742321193218231, "learning_rate": 2.9738860592717366e-06, "loss": 0.0141, "step": 158580 }, { "epoch": 1.3391315361719196, "grad_norm": 0.4008966088294983, "learning_rate": 2.973212410639794e-06, "loss": 0.0074, "step": 158590 }, { "epoch": 1.3392159760190834, "grad_norm": 0.5791236162185669, "learning_rate": 2.97253880602926e-06, "loss": 0.0109, "step": 158600 }, { "epoch": 1.3393004158662474, "grad_norm": 0.22089695930480957, "learning_rate": 2.971865245454772e-06, "loss": 0.0067, "step": 158610 }, { "epoch": 1.3393848557134111, "grad_norm": 0.0017877635546028614, "learning_rate": 2.971191728930951e-06, "loss": 0.0099, "step": 158620 }, { "epoch": 1.339469295560575, "grad_norm": 0.45597997307777405, "learning_rate": 2.9705182564724325e-06, "loss": 0.0126, "step": 158630 }, { "epoch": 1.339553735407739, "grad_norm": 0.2166384905576706, "learning_rate": 2.9698448280938406e-06, "loss": 0.0037, "step": 158640 }, { "epoch": 1.339638175254903, "grad_norm": 0.11843452602624893, "learning_rate": 2.969171443809804e-06, "loss": 0.0029, "step": 158650 }, { "epoch": 1.3397226151020667, "grad_norm": 0.16433964669704437, "learning_rate": 2.968498103634947e-06, "loss": 0.0054, "step": 158660 }, { "epoch": 1.3398070549492305, "grad_norm": 0.00044101799721829593, "learning_rate": 2.9678248075838934e-06, "loss": 0.0049, "step": 158670 }, { "epoch": 1.3398914947963945, "grad_norm": 0.04246026277542114, "learning_rate": 2.9671515556712705e-06, "loss": 0.0056, "step": 158680 }, { "epoch": 1.3399759346435582, "grad_norm": 0.09751345217227936, "learning_rate": 2.966478347911698e-06, "loss": 0.0044, "step": 158690 }, { "epoch": 1.3400603744907222, "grad_norm": 0.2640756070613861, "learning_rate": 2.9658051843198005e-06, "loss": 0.0082, "step": 158700 }, { "epoch": 1.340144814337886, "grad_norm": 0.4987187385559082, "learning_rate": 2.9651320649101954e-06, "loss": 0.0063, "step": 158710 }, { "epoch": 1.3402292541850498, "grad_norm": 0.22531312704086304, "learning_rate": 2.964458989697506e-06, "loss": 0.0046, "step": 158720 }, { "epoch": 1.3403136940322138, "grad_norm": 0.1632733941078186, "learning_rate": 2.96378595869635e-06, "loss": 0.0068, "step": 158730 }, { "epoch": 1.3403981338793778, "grad_norm": 0.6167070269584656, "learning_rate": 2.9631129719213467e-06, "loss": 0.0049, "step": 158740 }, { "epoch": 1.3404825737265416, "grad_norm": 0.7561842799186707, "learning_rate": 2.962440029387111e-06, "loss": 0.0073, "step": 158750 }, { "epoch": 1.3405670135737053, "grad_norm": 0.4120629131793976, "learning_rate": 2.9617671311082615e-06, "loss": 0.0089, "step": 158760 }, { "epoch": 1.3406514534208693, "grad_norm": 0.1606469750404358, "learning_rate": 2.9610942770994122e-06, "loss": 0.0113, "step": 158770 }, { "epoch": 1.340735893268033, "grad_norm": 0.22785910964012146, "learning_rate": 2.9604214673751764e-06, "loss": 0.0067, "step": 158780 }, { "epoch": 1.340820333115197, "grad_norm": 0.4354684352874756, "learning_rate": 2.959748701950169e-06, "loss": 0.0102, "step": 158790 }, { "epoch": 1.3409047729623609, "grad_norm": 0.21073409914970398, "learning_rate": 2.959075980839001e-06, "loss": 0.0057, "step": 158800 }, { "epoch": 1.3409892128095249, "grad_norm": 0.37031951546669006, "learning_rate": 2.9584033040562854e-06, "loss": 0.0098, "step": 158810 }, { "epoch": 1.3410736526566887, "grad_norm": 0.40330564975738525, "learning_rate": 2.957730671616631e-06, "loss": 0.0064, "step": 158820 }, { "epoch": 1.3411580925038527, "grad_norm": 0.03709294646978378, "learning_rate": 2.9570580835346486e-06, "loss": 0.0095, "step": 158830 }, { "epoch": 1.3412425323510164, "grad_norm": 0.1402282416820526, "learning_rate": 2.956385539824944e-06, "loss": 0.0056, "step": 158840 }, { "epoch": 1.3413269721981802, "grad_norm": 0.08534149825572968, "learning_rate": 2.9557130405021294e-06, "loss": 0.0081, "step": 158850 }, { "epoch": 1.3414114120453442, "grad_norm": 0.03640326112508774, "learning_rate": 2.9550405855808073e-06, "loss": 0.0054, "step": 158860 }, { "epoch": 1.3414958518925082, "grad_norm": 0.09715095162391663, "learning_rate": 2.954368175075585e-06, "loss": 0.0059, "step": 158870 }, { "epoch": 1.341580291739672, "grad_norm": 0.27550604939460754, "learning_rate": 2.9536958090010674e-06, "loss": 0.0048, "step": 158880 }, { "epoch": 1.3416647315868357, "grad_norm": 0.4007920026779175, "learning_rate": 2.9530234873718556e-06, "loss": 0.0065, "step": 158890 }, { "epoch": 1.3417491714339997, "grad_norm": 0.38323837518692017, "learning_rate": 2.9523512102025566e-06, "loss": 0.0067, "step": 158900 }, { "epoch": 1.3418336112811635, "grad_norm": 0.3722354769706726, "learning_rate": 2.951678977507767e-06, "loss": 0.0041, "step": 158910 }, { "epoch": 1.3419180511283275, "grad_norm": 0.19270087778568268, "learning_rate": 2.9510067893020923e-06, "loss": 0.0131, "step": 158920 }, { "epoch": 1.3420024909754913, "grad_norm": 0.4346173107624054, "learning_rate": 2.9503346456001293e-06, "loss": 0.0077, "step": 158930 }, { "epoch": 1.3420869308226553, "grad_norm": 0.8835578560829163, "learning_rate": 2.9496625464164787e-06, "loss": 0.0121, "step": 158940 }, { "epoch": 1.342171370669819, "grad_norm": 0.0019190331222489476, "learning_rate": 2.948990491765735e-06, "loss": 0.004, "step": 158950 }, { "epoch": 1.342255810516983, "grad_norm": 0.2487243115901947, "learning_rate": 2.948318481662501e-06, "loss": 0.0068, "step": 158960 }, { "epoch": 1.3423402503641468, "grad_norm": 0.3775911331176758, "learning_rate": 2.947646516121367e-06, "loss": 0.0105, "step": 158970 }, { "epoch": 1.3424246902113106, "grad_norm": 0.20265927910804749, "learning_rate": 2.9469745951569312e-06, "loss": 0.0042, "step": 158980 }, { "epoch": 1.3425091300584746, "grad_norm": 0.12825638055801392, "learning_rate": 2.9463027187837845e-06, "loss": 0.0088, "step": 158990 }, { "epoch": 1.3425935699056386, "grad_norm": 0.3728887140750885, "learning_rate": 2.945630887016524e-06, "loss": 0.0092, "step": 159000 }, { "epoch": 1.3426780097528024, "grad_norm": 0.22253894805908203, "learning_rate": 2.9449590998697395e-06, "loss": 0.0058, "step": 159010 }, { "epoch": 1.3427624495999662, "grad_norm": 0.6485963463783264, "learning_rate": 2.944287357358021e-06, "loss": 0.0074, "step": 159020 }, { "epoch": 1.3428468894471302, "grad_norm": 0.15245658159255981, "learning_rate": 2.943615659495962e-06, "loss": 0.0077, "step": 159030 }, { "epoch": 1.342931329294294, "grad_norm": 0.792725145816803, "learning_rate": 2.9429440062981463e-06, "loss": 0.0086, "step": 159040 }, { "epoch": 1.343015769141458, "grad_norm": 0.8273516297340393, "learning_rate": 2.9422723977791677e-06, "loss": 0.0125, "step": 159050 }, { "epoch": 1.3431002089886217, "grad_norm": 0.21875853836536407, "learning_rate": 2.9416008339536095e-06, "loss": 0.0057, "step": 159060 }, { "epoch": 1.3431846488357855, "grad_norm": 0.16850145161151886, "learning_rate": 2.940929314836061e-06, "loss": 0.0065, "step": 159070 }, { "epoch": 1.3432690886829495, "grad_norm": 0.35760852694511414, "learning_rate": 2.940257840441104e-06, "loss": 0.0142, "step": 159080 }, { "epoch": 1.3433535285301135, "grad_norm": 0.08325949311256409, "learning_rate": 2.939586410783326e-06, "loss": 0.0101, "step": 159090 }, { "epoch": 1.3434379683772772, "grad_norm": 0.31172430515289307, "learning_rate": 2.938915025877308e-06, "loss": 0.0107, "step": 159100 }, { "epoch": 1.343522408224441, "grad_norm": 0.18977321684360504, "learning_rate": 2.9382436857376345e-06, "loss": 0.0064, "step": 159110 }, { "epoch": 1.343606848071605, "grad_norm": 0.1989326775074005, "learning_rate": 2.9375723903788855e-06, "loss": 0.0063, "step": 159120 }, { "epoch": 1.3436912879187688, "grad_norm": 0.006885794922709465, "learning_rate": 2.93690113981564e-06, "loss": 0.0077, "step": 159130 }, { "epoch": 1.3437757277659328, "grad_norm": 0.00965824443846941, "learning_rate": 2.93622993406248e-06, "loss": 0.009, "step": 159140 }, { "epoch": 1.3438601676130966, "grad_norm": 0.6818742752075195, "learning_rate": 2.935558773133983e-06, "loss": 0.0079, "step": 159150 }, { "epoch": 1.3439446074602606, "grad_norm": 0.3812335729598999, "learning_rate": 2.9348876570447272e-06, "loss": 0.012, "step": 159160 }, { "epoch": 1.3440290473074243, "grad_norm": 0.7960553765296936, "learning_rate": 2.934216585809285e-06, "loss": 0.0107, "step": 159170 }, { "epoch": 1.3441134871545883, "grad_norm": 0.19398233294487, "learning_rate": 2.933545559442239e-06, "loss": 0.0044, "step": 159180 }, { "epoch": 1.3441979270017521, "grad_norm": 0.20789460837841034, "learning_rate": 2.932874577958158e-06, "loss": 0.0079, "step": 159190 }, { "epoch": 1.3442823668489159, "grad_norm": 0.03696407750248909, "learning_rate": 2.9322036413716192e-06, "loss": 0.0062, "step": 159200 }, { "epoch": 1.3443668066960799, "grad_norm": 0.26726043224334717, "learning_rate": 2.9315327496971915e-06, "loss": 0.0051, "step": 159210 }, { "epoch": 1.3444512465432439, "grad_norm": 0.354402631521225, "learning_rate": 2.9308619029494513e-06, "loss": 0.0084, "step": 159220 }, { "epoch": 1.3445356863904077, "grad_norm": 0.07865989953279495, "learning_rate": 2.9301911011429628e-06, "loss": 0.0065, "step": 159230 }, { "epoch": 1.3446201262375714, "grad_norm": 0.3905118703842163, "learning_rate": 2.9295203442923028e-06, "loss": 0.005, "step": 159240 }, { "epoch": 1.3447045660847354, "grad_norm": 0.26686999201774597, "learning_rate": 2.9288496324120365e-06, "loss": 0.01, "step": 159250 }, { "epoch": 1.3447890059318992, "grad_norm": 0.08705712109804153, "learning_rate": 2.92817896551673e-06, "loss": 0.0069, "step": 159260 }, { "epoch": 1.3448734457790632, "grad_norm": 0.29299670457839966, "learning_rate": 2.927508343620953e-06, "loss": 0.0134, "step": 159270 }, { "epoch": 1.344957885626227, "grad_norm": 0.4993634819984436, "learning_rate": 2.926837766739268e-06, "loss": 0.007, "step": 159280 }, { "epoch": 1.345042325473391, "grad_norm": 0.14761526882648468, "learning_rate": 2.9261672348862446e-06, "loss": 0.0047, "step": 159290 }, { "epoch": 1.3451267653205548, "grad_norm": 0.30946847796440125, "learning_rate": 2.9254967480764416e-06, "loss": 0.0082, "step": 159300 }, { "epoch": 1.3452112051677187, "grad_norm": 0.1248384341597557, "learning_rate": 2.9248263063244266e-06, "loss": 0.0108, "step": 159310 }, { "epoch": 1.3452956450148825, "grad_norm": 0.16859543323516846, "learning_rate": 2.924155909644756e-06, "loss": 0.0042, "step": 159320 }, { "epoch": 1.3453800848620463, "grad_norm": 0.44928812980651855, "learning_rate": 2.923485558051996e-06, "loss": 0.0101, "step": 159330 }, { "epoch": 1.3454645247092103, "grad_norm": 0.37295979261398315, "learning_rate": 2.9228152515607034e-06, "loss": 0.0045, "step": 159340 }, { "epoch": 1.345548964556374, "grad_norm": 0.16583624482154846, "learning_rate": 2.9221449901854383e-06, "loss": 0.0063, "step": 159350 }, { "epoch": 1.345633404403538, "grad_norm": 0.0723089799284935, "learning_rate": 2.921474773940759e-06, "loss": 0.0062, "step": 159360 }, { "epoch": 1.3457178442507018, "grad_norm": 0.24050654470920563, "learning_rate": 2.920804602841219e-06, "loss": 0.0099, "step": 159370 }, { "epoch": 1.3458022840978658, "grad_norm": 0.4860328733921051, "learning_rate": 2.920134476901379e-06, "loss": 0.0113, "step": 159380 }, { "epoch": 1.3458867239450296, "grad_norm": 0.001508791116066277, "learning_rate": 2.9194643961357914e-06, "loss": 0.0075, "step": 159390 }, { "epoch": 1.3459711637921936, "grad_norm": 0.21513600647449493, "learning_rate": 2.91879436055901e-06, "loss": 0.0048, "step": 159400 }, { "epoch": 1.3460556036393574, "grad_norm": 0.3346964716911316, "learning_rate": 2.9181243701855906e-06, "loss": 0.0051, "step": 159410 }, { "epoch": 1.3461400434865212, "grad_norm": 0.5969848036766052, "learning_rate": 2.917454425030083e-06, "loss": 0.0083, "step": 159420 }, { "epoch": 1.3462244833336852, "grad_norm": 0.31907400488853455, "learning_rate": 2.916784525107036e-06, "loss": 0.0035, "step": 159430 }, { "epoch": 1.3463089231808492, "grad_norm": 0.10745546221733093, "learning_rate": 2.9161146704310046e-06, "loss": 0.0056, "step": 159440 }, { "epoch": 1.346393363028013, "grad_norm": 0.3283081650733948, "learning_rate": 2.9154448610165333e-06, "loss": 0.0045, "step": 159450 }, { "epoch": 1.3464778028751767, "grad_norm": 0.7745784521102905, "learning_rate": 2.9147750968781744e-06, "loss": 0.0071, "step": 159460 }, { "epoch": 1.3465622427223407, "grad_norm": 0.15066012740135193, "learning_rate": 2.9141053780304733e-06, "loss": 0.0041, "step": 159470 }, { "epoch": 1.3466466825695045, "grad_norm": 0.6925479769706726, "learning_rate": 2.9134357044879743e-06, "loss": 0.0078, "step": 159480 }, { "epoch": 1.3467311224166685, "grad_norm": 0.356318861246109, "learning_rate": 2.9127660762652256e-06, "loss": 0.0102, "step": 159490 }, { "epoch": 1.3468155622638323, "grad_norm": 0.5152115225791931, "learning_rate": 2.9120964933767703e-06, "loss": 0.0079, "step": 159500 }, { "epoch": 1.3469000021109963, "grad_norm": 0.12456461042165756, "learning_rate": 2.911426955837151e-06, "loss": 0.003, "step": 159510 }, { "epoch": 1.34698444195816, "grad_norm": 0.06102743372321129, "learning_rate": 2.9107574636609094e-06, "loss": 0.0026, "step": 159520 }, { "epoch": 1.347068881805324, "grad_norm": 0.12317851185798645, "learning_rate": 2.9100880168625883e-06, "loss": 0.0037, "step": 159530 }, { "epoch": 1.3471533216524878, "grad_norm": 0.30458372831344604, "learning_rate": 2.9094186154567256e-06, "loss": 0.0049, "step": 159540 }, { "epoch": 1.3472377614996516, "grad_norm": 0.2773963212966919, "learning_rate": 2.9087492594578644e-06, "loss": 0.0079, "step": 159550 }, { "epoch": 1.3473222013468156, "grad_norm": 0.23635932803153992, "learning_rate": 2.90807994888054e-06, "loss": 0.0067, "step": 159560 }, { "epoch": 1.3474066411939796, "grad_norm": 0.18310031294822693, "learning_rate": 2.9074106837392914e-06, "loss": 0.0104, "step": 159570 }, { "epoch": 1.3474910810411433, "grad_norm": 0.528592050075531, "learning_rate": 2.906741464048651e-06, "loss": 0.0064, "step": 159580 }, { "epoch": 1.3475755208883071, "grad_norm": 0.1800151765346527, "learning_rate": 2.9060722898231595e-06, "loss": 0.0085, "step": 159590 }, { "epoch": 1.3476599607354711, "grad_norm": 0.10536655783653259, "learning_rate": 2.905403161077349e-06, "loss": 0.0121, "step": 159600 }, { "epoch": 1.347744400582635, "grad_norm": 0.1962367296218872, "learning_rate": 2.9047340778257506e-06, "loss": 0.0063, "step": 159610 }, { "epoch": 1.347828840429799, "grad_norm": 0.04837745800614357, "learning_rate": 2.9040650400829005e-06, "loss": 0.0064, "step": 159620 }, { "epoch": 1.3479132802769627, "grad_norm": 0.18321789801120758, "learning_rate": 2.9033960478633262e-06, "loss": 0.0069, "step": 159630 }, { "epoch": 1.3479977201241264, "grad_norm": 0.08395758271217346, "learning_rate": 2.902727101181562e-06, "loss": 0.0075, "step": 159640 }, { "epoch": 1.3480821599712904, "grad_norm": 0.2808399796485901, "learning_rate": 2.902058200052136e-06, "loss": 0.0073, "step": 159650 }, { "epoch": 1.3481665998184544, "grad_norm": 0.1512186974287033, "learning_rate": 2.901389344489575e-06, "loss": 0.0058, "step": 159660 }, { "epoch": 1.3482510396656182, "grad_norm": 0.18275174498558044, "learning_rate": 2.900720534508406e-06, "loss": 0.0066, "step": 159670 }, { "epoch": 1.348335479512782, "grad_norm": 0.30187490582466125, "learning_rate": 2.900051770123159e-06, "loss": 0.0074, "step": 159680 }, { "epoch": 1.348419919359946, "grad_norm": 0.9778198599815369, "learning_rate": 2.8993830513483543e-06, "loss": 0.0074, "step": 159690 }, { "epoch": 1.3485043592071098, "grad_norm": 0.013923252001404762, "learning_rate": 2.8987143781985227e-06, "loss": 0.0036, "step": 159700 }, { "epoch": 1.3485887990542738, "grad_norm": 0.14545026421546936, "learning_rate": 2.898045750688183e-06, "loss": 0.0121, "step": 159710 }, { "epoch": 1.3486732389014375, "grad_norm": 0.22884783148765564, "learning_rate": 2.8973771688318577e-06, "loss": 0.0067, "step": 159720 }, { "epoch": 1.3487576787486015, "grad_norm": 0.290072500705719, "learning_rate": 2.896708632644071e-06, "loss": 0.0074, "step": 159730 }, { "epoch": 1.3488421185957653, "grad_norm": 0.2575913369655609, "learning_rate": 2.896040142139341e-06, "loss": 0.0089, "step": 159740 }, { "epoch": 1.3489265584429293, "grad_norm": 0.43098676204681396, "learning_rate": 2.8953716973321894e-06, "loss": 0.0063, "step": 159750 }, { "epoch": 1.349010998290093, "grad_norm": 0.3427032232284546, "learning_rate": 2.89470329823713e-06, "loss": 0.0051, "step": 159760 }, { "epoch": 1.3490954381372569, "grad_norm": 0.19993235170841217, "learning_rate": 2.8940349448686868e-06, "loss": 0.0045, "step": 159770 }, { "epoch": 1.3491798779844209, "grad_norm": 0.07275997847318649, "learning_rate": 2.8933666372413706e-06, "loss": 0.0129, "step": 159780 }, { "epoch": 1.3492643178315848, "grad_norm": 0.47321316599845886, "learning_rate": 2.8926983753697015e-06, "loss": 0.0085, "step": 159790 }, { "epoch": 1.3493487576787486, "grad_norm": 0.30548587441444397, "learning_rate": 2.8920301592681887e-06, "loss": 0.0063, "step": 159800 }, { "epoch": 1.3494331975259124, "grad_norm": 0.5441100597381592, "learning_rate": 2.8913619889513545e-06, "loss": 0.0088, "step": 159810 }, { "epoch": 1.3495176373730764, "grad_norm": 0.020987875759601593, "learning_rate": 2.8906938644337007e-06, "loss": 0.0093, "step": 159820 }, { "epoch": 1.3496020772202402, "grad_norm": 0.32060375809669495, "learning_rate": 2.8900257857297465e-06, "loss": 0.0043, "step": 159830 }, { "epoch": 1.3496865170674042, "grad_norm": 0.09541905671358109, "learning_rate": 2.8893577528539997e-06, "loss": 0.0074, "step": 159840 }, { "epoch": 1.349770956914568, "grad_norm": 0.2852546274662018, "learning_rate": 2.8886897658209676e-06, "loss": 0.0042, "step": 159850 }, { "epoch": 1.349855396761732, "grad_norm": 0.4816819727420807, "learning_rate": 2.888021824645163e-06, "loss": 0.0081, "step": 159860 }, { "epoch": 1.3499398366088957, "grad_norm": 1.0517592430114746, "learning_rate": 2.88735392934109e-06, "loss": 0.0078, "step": 159870 }, { "epoch": 1.3500242764560597, "grad_norm": 0.808560848236084, "learning_rate": 2.886686079923259e-06, "loss": 0.0118, "step": 159880 }, { "epoch": 1.3501087163032235, "grad_norm": 0.03446632996201515, "learning_rate": 2.886018276406173e-06, "loss": 0.009, "step": 159890 }, { "epoch": 1.3501931561503873, "grad_norm": 0.26526519656181335, "learning_rate": 2.885350518804337e-06, "loss": 0.0096, "step": 159900 }, { "epoch": 1.3502775959975513, "grad_norm": 0.4498988389968872, "learning_rate": 2.884682807132252e-06, "loss": 0.0093, "step": 159910 }, { "epoch": 1.3503620358447153, "grad_norm": 0.24394099414348602, "learning_rate": 2.8840151414044252e-06, "loss": 0.007, "step": 159920 }, { "epoch": 1.350446475691879, "grad_norm": 0.263746440410614, "learning_rate": 2.8833475216353547e-06, "loss": 0.0053, "step": 159930 }, { "epoch": 1.3505309155390428, "grad_norm": 0.614942193031311, "learning_rate": 2.882679947839543e-06, "loss": 0.0061, "step": 159940 }, { "epoch": 1.3506153553862068, "grad_norm": 0.15099728107452393, "learning_rate": 2.882012420031489e-06, "loss": 0.006, "step": 159950 }, { "epoch": 1.3506997952333706, "grad_norm": 0.19791781902313232, "learning_rate": 2.8813449382256902e-06, "loss": 0.0056, "step": 159960 }, { "epoch": 1.3507842350805346, "grad_norm": 0.23879708349704742, "learning_rate": 2.880677502436647e-06, "loss": 0.0081, "step": 159970 }, { "epoch": 1.3508686749276984, "grad_norm": 0.26504331827163696, "learning_rate": 2.880010112678854e-06, "loss": 0.0123, "step": 159980 }, { "epoch": 1.3509531147748621, "grad_norm": 0.07129599153995514, "learning_rate": 2.879342768966806e-06, "loss": 0.003, "step": 159990 }, { "epoch": 1.3510375546220261, "grad_norm": 0.346047043800354, "learning_rate": 2.8786754713149977e-06, "loss": 0.0097, "step": 160000 }, { "epoch": 1.3511219944691901, "grad_norm": 0.1375838667154312, "learning_rate": 2.878008219737925e-06, "loss": 0.005, "step": 160010 }, { "epoch": 1.351206434316354, "grad_norm": 0.9561718702316284, "learning_rate": 2.877341014250077e-06, "loss": 0.0075, "step": 160020 }, { "epoch": 1.3512908741635177, "grad_norm": 0.36226123571395874, "learning_rate": 2.876673854865949e-06, "loss": 0.0067, "step": 160030 }, { "epoch": 1.3513753140106817, "grad_norm": 0.26951077580451965, "learning_rate": 2.8760067416000283e-06, "loss": 0.0056, "step": 160040 }, { "epoch": 1.3514597538578454, "grad_norm": 0.41236943006515503, "learning_rate": 2.875339674466807e-06, "loss": 0.0067, "step": 160050 }, { "epoch": 1.3515441937050094, "grad_norm": 0.4123656749725342, "learning_rate": 2.8746726534807733e-06, "loss": 0.0089, "step": 160060 }, { "epoch": 1.3516286335521732, "grad_norm": 0.4048345983028412, "learning_rate": 2.874005678656414e-06, "loss": 0.0078, "step": 160070 }, { "epoch": 1.3517130733993372, "grad_norm": 0.3029075562953949, "learning_rate": 2.8733387500082156e-06, "loss": 0.0091, "step": 160080 }, { "epoch": 1.351797513246501, "grad_norm": 0.3599647283554077, "learning_rate": 2.8726718675506614e-06, "loss": 0.0081, "step": 160090 }, { "epoch": 1.351881953093665, "grad_norm": 0.32470399141311646, "learning_rate": 2.872005031298241e-06, "loss": 0.009, "step": 160100 }, { "epoch": 1.3519663929408288, "grad_norm": 0.17540161311626434, "learning_rate": 2.8713382412654335e-06, "loss": 0.0075, "step": 160110 }, { "epoch": 1.3520508327879925, "grad_norm": 0.05735218524932861, "learning_rate": 2.8706714974667238e-06, "loss": 0.009, "step": 160120 }, { "epoch": 1.3521352726351565, "grad_norm": 0.3138875365257263, "learning_rate": 2.8700047999165936e-06, "loss": 0.0099, "step": 160130 }, { "epoch": 1.3522197124823205, "grad_norm": 0.14850573241710663, "learning_rate": 2.869338148629523e-06, "loss": 0.0077, "step": 160140 }, { "epoch": 1.3523041523294843, "grad_norm": 0.2869589030742645, "learning_rate": 2.8686715436199887e-06, "loss": 0.0067, "step": 160150 }, { "epoch": 1.352388592176648, "grad_norm": 0.2363753318786621, "learning_rate": 2.868004984902474e-06, "loss": 0.0088, "step": 160160 }, { "epoch": 1.352473032023812, "grad_norm": 0.42673739790916443, "learning_rate": 2.867338472491452e-06, "loss": 0.0089, "step": 160170 }, { "epoch": 1.3525574718709759, "grad_norm": 0.5503128170967102, "learning_rate": 2.8666720064014037e-06, "loss": 0.0043, "step": 160180 }, { "epoch": 1.3526419117181399, "grad_norm": 0.12908343970775604, "learning_rate": 2.866005586646802e-06, "loss": 0.0082, "step": 160190 }, { "epoch": 1.3527263515653036, "grad_norm": 0.26030433177948, "learning_rate": 2.86533921324212e-06, "loss": 0.0083, "step": 160200 }, { "epoch": 1.3528107914124674, "grad_norm": 0.5095206499099731, "learning_rate": 2.8646728862018335e-06, "loss": 0.013, "step": 160210 }, { "epoch": 1.3528952312596314, "grad_norm": 0.14336667954921722, "learning_rate": 2.864006605540416e-06, "loss": 0.0043, "step": 160220 }, { "epoch": 1.3529796711067954, "grad_norm": 0.24124494194984436, "learning_rate": 2.863340371272336e-06, "loss": 0.0043, "step": 160230 }, { "epoch": 1.3530641109539592, "grad_norm": 0.6865530610084534, "learning_rate": 2.8626741834120644e-06, "loss": 0.015, "step": 160240 }, { "epoch": 1.353148550801123, "grad_norm": 0.5714998245239258, "learning_rate": 2.862008041974073e-06, "loss": 0.0081, "step": 160250 }, { "epoch": 1.353232990648287, "grad_norm": 0.46059805154800415, "learning_rate": 2.861341946972827e-06, "loss": 0.0126, "step": 160260 }, { "epoch": 1.3533174304954507, "grad_norm": 0.1920350193977356, "learning_rate": 2.8606758984227978e-06, "loss": 0.0064, "step": 160270 }, { "epoch": 1.3534018703426147, "grad_norm": 0.35849517583847046, "learning_rate": 2.860009896338447e-06, "loss": 0.0081, "step": 160280 }, { "epoch": 1.3534863101897785, "grad_norm": 0.04395556077361107, "learning_rate": 2.8593439407342455e-06, "loss": 0.0038, "step": 160290 }, { "epoch": 1.3535707500369425, "grad_norm": 0.4211843013763428, "learning_rate": 2.8586780316246545e-06, "loss": 0.0046, "step": 160300 }, { "epoch": 1.3536551898841063, "grad_norm": 0.09972330927848816, "learning_rate": 2.858012169024138e-06, "loss": 0.0041, "step": 160310 }, { "epoch": 1.3537396297312703, "grad_norm": 1.1796692609786987, "learning_rate": 2.8573463529471583e-06, "loss": 0.0093, "step": 160320 }, { "epoch": 1.353824069578434, "grad_norm": 0.14907513558864594, "learning_rate": 2.8566805834081747e-06, "loss": 0.0055, "step": 160330 }, { "epoch": 1.3539085094255978, "grad_norm": 0.12020943313837051, "learning_rate": 2.856014860421652e-06, "loss": 0.0059, "step": 160340 }, { "epoch": 1.3539929492727618, "grad_norm": 0.19680695235729218, "learning_rate": 2.855349184002044e-06, "loss": 0.0093, "step": 160350 }, { "epoch": 1.3540773891199258, "grad_norm": 0.41716787219047546, "learning_rate": 2.854683554163815e-06, "loss": 0.0075, "step": 160360 }, { "epoch": 1.3541618289670896, "grad_norm": 0.2532118260860443, "learning_rate": 2.8540179709214176e-06, "loss": 0.0048, "step": 160370 }, { "epoch": 1.3542462688142534, "grad_norm": 0.44920945167541504, "learning_rate": 2.8533524342893127e-06, "loss": 0.0101, "step": 160380 }, { "epoch": 1.3543307086614174, "grad_norm": 0.254029780626297, "learning_rate": 2.8526869442819526e-06, "loss": 0.0071, "step": 160390 }, { "epoch": 1.3544151485085811, "grad_norm": 0.5018427968025208, "learning_rate": 2.852021500913792e-06, "loss": 0.0065, "step": 160400 }, { "epoch": 1.3544995883557451, "grad_norm": 0.49431169033050537, "learning_rate": 2.8513561041992822e-06, "loss": 0.0073, "step": 160410 }, { "epoch": 1.354584028202909, "grad_norm": 1.42036771774292, "learning_rate": 2.8506907541528795e-06, "loss": 0.0068, "step": 160420 }, { "epoch": 1.354668468050073, "grad_norm": 0.2875106930732727, "learning_rate": 2.850025450789033e-06, "loss": 0.0062, "step": 160430 }, { "epoch": 1.3547529078972367, "grad_norm": 0.07088973373174667, "learning_rate": 2.8493601941221917e-06, "loss": 0.0036, "step": 160440 }, { "epoch": 1.3548373477444007, "grad_norm": 0.3779091536998749, "learning_rate": 2.848694984166808e-06, "loss": 0.0095, "step": 160450 }, { "epoch": 1.3549217875915645, "grad_norm": 0.13789165019989014, "learning_rate": 2.848029820937328e-06, "loss": 0.0051, "step": 160460 }, { "epoch": 1.3550062274387282, "grad_norm": 0.868462085723877, "learning_rate": 2.847364704448199e-06, "loss": 0.0131, "step": 160470 }, { "epoch": 1.3550906672858922, "grad_norm": 0.2952720820903778, "learning_rate": 2.8466996347138666e-06, "loss": 0.005, "step": 160480 }, { "epoch": 1.3551751071330562, "grad_norm": 0.17366155982017517, "learning_rate": 2.846034611748778e-06, "loss": 0.0041, "step": 160490 }, { "epoch": 1.35525954698022, "grad_norm": 0.09189625084400177, "learning_rate": 2.8453696355673744e-06, "loss": 0.0084, "step": 160500 }, { "epoch": 1.3553439868273838, "grad_norm": 0.029919452965259552, "learning_rate": 2.844704706184103e-06, "loss": 0.007, "step": 160510 }, { "epoch": 1.3554284266745478, "grad_norm": 0.6653876304626465, "learning_rate": 2.844039823613402e-06, "loss": 0.0065, "step": 160520 }, { "epoch": 1.3555128665217115, "grad_norm": 1.0784136056900024, "learning_rate": 2.843374987869716e-06, "loss": 0.0074, "step": 160530 }, { "epoch": 1.3555973063688755, "grad_norm": 0.22001975774765015, "learning_rate": 2.8427101989674834e-06, "loss": 0.0039, "step": 160540 }, { "epoch": 1.3556817462160393, "grad_norm": 0.08930838853120804, "learning_rate": 2.842045456921143e-06, "loss": 0.0058, "step": 160550 }, { "epoch": 1.355766186063203, "grad_norm": 0.5922790765762329, "learning_rate": 2.8413807617451328e-06, "loss": 0.017, "step": 160560 }, { "epoch": 1.355850625910367, "grad_norm": 0.0215720497071743, "learning_rate": 2.8407161134538886e-06, "loss": 0.0092, "step": 160570 }, { "epoch": 1.355935065757531, "grad_norm": 0.3643553853034973, "learning_rate": 2.84005151206185e-06, "loss": 0.008, "step": 160580 }, { "epoch": 1.3560195056046949, "grad_norm": 0.3257039785385132, "learning_rate": 2.8393869575834476e-06, "loss": 0.0087, "step": 160590 }, { "epoch": 1.3561039454518586, "grad_norm": 0.2690720856189728, "learning_rate": 2.83872245003312e-06, "loss": 0.0064, "step": 160600 }, { "epoch": 1.3561883852990226, "grad_norm": 0.16466671228408813, "learning_rate": 2.8380579894252956e-06, "loss": 0.0047, "step": 160610 }, { "epoch": 1.3562728251461864, "grad_norm": 0.03582088649272919, "learning_rate": 2.8373935757744107e-06, "loss": 0.0117, "step": 160620 }, { "epoch": 1.3563572649933504, "grad_norm": 0.20380300283432007, "learning_rate": 2.836729209094895e-06, "loss": 0.0054, "step": 160630 }, { "epoch": 1.3564417048405142, "grad_norm": 0.09547761082649231, "learning_rate": 2.8360648894011767e-06, "loss": 0.0074, "step": 160640 }, { "epoch": 1.3565261446876782, "grad_norm": 0.25205540657043457, "learning_rate": 2.8354006167076834e-06, "loss": 0.0077, "step": 160650 }, { "epoch": 1.356610584534842, "grad_norm": 0.4180782437324524, "learning_rate": 2.834736391028848e-06, "loss": 0.0117, "step": 160660 }, { "epoch": 1.356695024382006, "grad_norm": 0.2571500241756439, "learning_rate": 2.8340722123790935e-06, "loss": 0.0063, "step": 160670 }, { "epoch": 1.3567794642291697, "grad_norm": 0.0730871707201004, "learning_rate": 2.8334080807728455e-06, "loss": 0.0044, "step": 160680 }, { "epoch": 1.3568639040763335, "grad_norm": 0.2852243185043335, "learning_rate": 2.832743996224532e-06, "loss": 0.0127, "step": 160690 }, { "epoch": 1.3569483439234975, "grad_norm": 0.41423434019088745, "learning_rate": 2.832079958748572e-06, "loss": 0.0085, "step": 160700 }, { "epoch": 1.3570327837706615, "grad_norm": 0.16865260899066925, "learning_rate": 2.8314159683593957e-06, "loss": 0.0046, "step": 160710 }, { "epoch": 1.3571172236178253, "grad_norm": 0.27016374468803406, "learning_rate": 2.830752025071416e-06, "loss": 0.0036, "step": 160720 }, { "epoch": 1.357201663464989, "grad_norm": 0.003572643268853426, "learning_rate": 2.83008812889906e-06, "loss": 0.0082, "step": 160730 }, { "epoch": 1.357286103312153, "grad_norm": 0.37546151876449585, "learning_rate": 2.829424279856743e-06, "loss": 0.0072, "step": 160740 }, { "epoch": 1.3573705431593168, "grad_norm": 0.40855666995048523, "learning_rate": 2.8287604779588875e-06, "loss": 0.007, "step": 160750 }, { "epoch": 1.3574549830064808, "grad_norm": 0.12010830640792847, "learning_rate": 2.8280967232199073e-06, "loss": 0.0052, "step": 160760 }, { "epoch": 1.3575394228536446, "grad_norm": 0.38463953137397766, "learning_rate": 2.827433015654224e-06, "loss": 0.0079, "step": 160770 }, { "epoch": 1.3576238627008086, "grad_norm": 0.45564255118370056, "learning_rate": 2.8267693552762506e-06, "loss": 0.0085, "step": 160780 }, { "epoch": 1.3577083025479724, "grad_norm": 0.01602625474333763, "learning_rate": 2.8261057421004007e-06, "loss": 0.0018, "step": 160790 }, { "epoch": 1.3577927423951364, "grad_norm": 0.058511488139629364, "learning_rate": 2.8254421761410888e-06, "loss": 0.01, "step": 160800 }, { "epoch": 1.3578771822423001, "grad_norm": 0.3209459185600281, "learning_rate": 2.8247786574127257e-06, "loss": 0.0109, "step": 160810 }, { "epoch": 1.357961622089464, "grad_norm": 5.596005916595459, "learning_rate": 2.8241151859297255e-06, "loss": 0.016, "step": 160820 }, { "epoch": 1.358046061936628, "grad_norm": 0.066908098757267, "learning_rate": 2.823451761706496e-06, "loss": 0.0079, "step": 160830 }, { "epoch": 1.3581305017837917, "grad_norm": 0.12175414711236954, "learning_rate": 2.8227883847574513e-06, "loss": 0.0063, "step": 160840 }, { "epoch": 1.3582149416309557, "grad_norm": 0.2972108721733093, "learning_rate": 2.8221250550969937e-06, "loss": 0.0059, "step": 160850 }, { "epoch": 1.3582993814781195, "grad_norm": 0.2478334605693817, "learning_rate": 2.8214617727395354e-06, "loss": 0.0062, "step": 160860 }, { "epoch": 1.3583838213252835, "grad_norm": 0.6866435408592224, "learning_rate": 2.8207985376994813e-06, "loss": 0.0062, "step": 160870 }, { "epoch": 1.3584682611724472, "grad_norm": 0.4075837731361389, "learning_rate": 2.8201353499912366e-06, "loss": 0.0063, "step": 160880 }, { "epoch": 1.3585527010196112, "grad_norm": 0.033570319414138794, "learning_rate": 2.819472209629205e-06, "loss": 0.008, "step": 160890 }, { "epoch": 1.358637140866775, "grad_norm": 0.1222507506608963, "learning_rate": 2.8188091166277883e-06, "loss": 0.008, "step": 160900 }, { "epoch": 1.3587215807139388, "grad_norm": 0.16227160394191742, "learning_rate": 2.8181460710013926e-06, "loss": 0.005, "step": 160910 }, { "epoch": 1.3588060205611028, "grad_norm": 0.3172532320022583, "learning_rate": 2.817483072764415e-06, "loss": 0.0052, "step": 160920 }, { "epoch": 1.3588904604082668, "grad_norm": 0.3066761791706085, "learning_rate": 2.8168201219312597e-06, "loss": 0.007, "step": 160930 }, { "epoch": 1.3589749002554306, "grad_norm": 0.25969791412353516, "learning_rate": 2.8161572185163223e-06, "loss": 0.0037, "step": 160940 }, { "epoch": 1.3590593401025943, "grad_norm": 0.32279813289642334, "learning_rate": 2.815494362534005e-06, "loss": 0.0042, "step": 160950 }, { "epoch": 1.3591437799497583, "grad_norm": 0.1341160535812378, "learning_rate": 2.814831553998702e-06, "loss": 0.0076, "step": 160960 }, { "epoch": 1.359228219796922, "grad_norm": 0.570546567440033, "learning_rate": 2.8141687929248096e-06, "loss": 0.0085, "step": 160970 }, { "epoch": 1.359312659644086, "grad_norm": 0.15468575060367584, "learning_rate": 2.8135060793267217e-06, "loss": 0.006, "step": 160980 }, { "epoch": 1.3593970994912499, "grad_norm": 0.03165110573172569, "learning_rate": 2.8128434132188354e-06, "loss": 0.0082, "step": 160990 }, { "epoch": 1.3594815393384139, "grad_norm": 0.14693443477153778, "learning_rate": 2.81218079461554e-06, "loss": 0.0055, "step": 161000 }, { "epoch": 1.3595659791855776, "grad_norm": 0.48922258615493774, "learning_rate": 2.811518223531231e-06, "loss": 0.0093, "step": 161010 }, { "epoch": 1.3596504190327416, "grad_norm": 0.08071880787611008, "learning_rate": 2.8108556999802983e-06, "loss": 0.0113, "step": 161020 }, { "epoch": 1.3597348588799054, "grad_norm": 0.286432683467865, "learning_rate": 2.810193223977131e-06, "loss": 0.0048, "step": 161030 }, { "epoch": 1.3598192987270692, "grad_norm": 0.5378720760345459, "learning_rate": 2.8095307955361185e-06, "loss": 0.0096, "step": 161040 }, { "epoch": 1.3599037385742332, "grad_norm": 0.34438356757164, "learning_rate": 2.808868414671646e-06, "loss": 0.0064, "step": 161050 }, { "epoch": 1.3599881784213972, "grad_norm": 0.5488584041595459, "learning_rate": 2.808206081398104e-06, "loss": 0.0103, "step": 161060 }, { "epoch": 1.360072618268561, "grad_norm": 0.2915600836277008, "learning_rate": 2.8075437957298755e-06, "loss": 0.0066, "step": 161070 }, { "epoch": 1.3601570581157247, "grad_norm": 0.543001651763916, "learning_rate": 2.8068815576813483e-06, "loss": 0.0093, "step": 161080 }, { "epoch": 1.3602414979628887, "grad_norm": 0.6603832244873047, "learning_rate": 2.8062193672669026e-06, "loss": 0.0084, "step": 161090 }, { "epoch": 1.3603259378100525, "grad_norm": 1.063611626625061, "learning_rate": 2.8055572245009244e-06, "loss": 0.0107, "step": 161100 }, { "epoch": 1.3604103776572165, "grad_norm": 0.19526880979537964, "learning_rate": 2.8048951293977934e-06, "loss": 0.011, "step": 161110 }, { "epoch": 1.3604948175043803, "grad_norm": 0.30284789204597473, "learning_rate": 2.80423308197189e-06, "loss": 0.0059, "step": 161120 }, { "epoch": 1.360579257351544, "grad_norm": 0.14862069487571716, "learning_rate": 2.803571082237595e-06, "loss": 0.009, "step": 161130 }, { "epoch": 1.360663697198708, "grad_norm": 0.11673944443464279, "learning_rate": 2.802909130209283e-06, "loss": 0.0086, "step": 161140 }, { "epoch": 1.360748137045872, "grad_norm": 0.2247777283191681, "learning_rate": 2.8022472259013372e-06, "loss": 0.0034, "step": 161150 }, { "epoch": 1.3608325768930358, "grad_norm": 0.834998369216919, "learning_rate": 2.8015853693281293e-06, "loss": 0.012, "step": 161160 }, { "epoch": 1.3609170167401996, "grad_norm": 0.17314830422401428, "learning_rate": 2.8009235605040385e-06, "loss": 0.0039, "step": 161170 }, { "epoch": 1.3610014565873636, "grad_norm": 0.4086928367614746, "learning_rate": 2.800261799443435e-06, "loss": 0.0079, "step": 161180 }, { "epoch": 1.3610858964345274, "grad_norm": 0.17411883175373077, "learning_rate": 2.799600086160697e-06, "loss": 0.0047, "step": 161190 }, { "epoch": 1.3611703362816914, "grad_norm": 0.47786879539489746, "learning_rate": 2.798938420670194e-06, "loss": 0.0069, "step": 161200 }, { "epoch": 1.3612547761288551, "grad_norm": 0.6079053282737732, "learning_rate": 2.7982768029862973e-06, "loss": 0.0101, "step": 161210 }, { "epoch": 1.3613392159760191, "grad_norm": 0.009586428292095661, "learning_rate": 2.7976152331233752e-06, "loss": 0.005, "step": 161220 }, { "epoch": 1.361423655823183, "grad_norm": 0.3516504168510437, "learning_rate": 2.796953711095801e-06, "loss": 0.0067, "step": 161230 }, { "epoch": 1.361508095670347, "grad_norm": 0.3347325026988983, "learning_rate": 2.796292236917939e-06, "loss": 0.0127, "step": 161240 }, { "epoch": 1.3615925355175107, "grad_norm": 0.532425582408905, "learning_rate": 2.7956308106041604e-06, "loss": 0.0072, "step": 161250 }, { "epoch": 1.3616769753646745, "grad_norm": 0.37642917037010193, "learning_rate": 2.794969432168828e-06, "loss": 0.0067, "step": 161260 }, { "epoch": 1.3617614152118385, "grad_norm": 0.22405406832695007, "learning_rate": 2.7943081016263065e-06, "loss": 0.0033, "step": 161270 }, { "epoch": 1.3618458550590025, "grad_norm": 0.39759549498558044, "learning_rate": 2.7936468189909625e-06, "loss": 0.0093, "step": 161280 }, { "epoch": 1.3619302949061662, "grad_norm": 0.296597421169281, "learning_rate": 2.7929855842771576e-06, "loss": 0.0087, "step": 161290 }, { "epoch": 1.36201473475333, "grad_norm": 0.3955860137939453, "learning_rate": 2.7923243974992533e-06, "loss": 0.0064, "step": 161300 }, { "epoch": 1.362099174600494, "grad_norm": 0.6506901383399963, "learning_rate": 2.7916632586716084e-06, "loss": 0.0087, "step": 161310 }, { "epoch": 1.3621836144476578, "grad_norm": 0.2451777458190918, "learning_rate": 2.791002167808588e-06, "loss": 0.0066, "step": 161320 }, { "epoch": 1.3622680542948218, "grad_norm": 0.22225196659564972, "learning_rate": 2.7903411249245457e-06, "loss": 0.0065, "step": 161330 }, { "epoch": 1.3623524941419856, "grad_norm": 0.30260467529296875, "learning_rate": 2.7896801300338426e-06, "loss": 0.0057, "step": 161340 }, { "epoch": 1.3624369339891496, "grad_norm": 0.34898969531059265, "learning_rate": 2.789019183150835e-06, "loss": 0.004, "step": 161350 }, { "epoch": 1.3625213738363133, "grad_norm": 0.5853236317634583, "learning_rate": 2.7883582842898773e-06, "loss": 0.0086, "step": 161360 }, { "epoch": 1.3626058136834773, "grad_norm": 0.04310399666428566, "learning_rate": 2.787697433465324e-06, "loss": 0.0055, "step": 161370 }, { "epoch": 1.362690253530641, "grad_norm": 0.04757268354296684, "learning_rate": 2.7870366306915277e-06, "loss": 0.004, "step": 161380 }, { "epoch": 1.3627746933778049, "grad_norm": 0.10991615802049637, "learning_rate": 2.786375875982844e-06, "loss": 0.0077, "step": 161390 }, { "epoch": 1.3628591332249689, "grad_norm": 0.4971681535243988, "learning_rate": 2.785715169353621e-06, "loss": 0.0065, "step": 161400 }, { "epoch": 1.3629435730721329, "grad_norm": 0.1148795485496521, "learning_rate": 2.7850545108182124e-06, "loss": 0.0095, "step": 161410 }, { "epoch": 1.3630280129192966, "grad_norm": 0.26477837562561035, "learning_rate": 2.7843939003909636e-06, "loss": 0.0057, "step": 161420 }, { "epoch": 1.3631124527664604, "grad_norm": 0.1556544154882431, "learning_rate": 2.783733338086228e-06, "loss": 0.0072, "step": 161430 }, { "epoch": 1.3631968926136244, "grad_norm": 0.2834022045135498, "learning_rate": 2.78307282391835e-06, "loss": 0.0085, "step": 161440 }, { "epoch": 1.3632813324607882, "grad_norm": 0.16122834384441376, "learning_rate": 2.782412357901676e-06, "loss": 0.0084, "step": 161450 }, { "epoch": 1.3633657723079522, "grad_norm": 0.002128930762410164, "learning_rate": 2.7817519400505487e-06, "loss": 0.0065, "step": 161460 }, { "epoch": 1.363450212155116, "grad_norm": 0.27516019344329834, "learning_rate": 2.7810915703793175e-06, "loss": 0.0042, "step": 161470 }, { "epoch": 1.3635346520022797, "grad_norm": 0.6520510315895081, "learning_rate": 2.7804312489023217e-06, "loss": 0.0089, "step": 161480 }, { "epoch": 1.3636190918494437, "grad_norm": 0.2474137544631958, "learning_rate": 2.7797709756339033e-06, "loss": 0.0076, "step": 161490 }, { "epoch": 1.3637035316966077, "grad_norm": 0.2726878523826599, "learning_rate": 2.779110750588405e-06, "loss": 0.0066, "step": 161500 }, { "epoch": 1.3637879715437715, "grad_norm": 0.1611856371164322, "learning_rate": 2.7784505737801655e-06, "loss": 0.0059, "step": 161510 }, { "epoch": 1.3638724113909353, "grad_norm": 0.30134159326553345, "learning_rate": 2.777790445223526e-06, "loss": 0.0124, "step": 161520 }, { "epoch": 1.3639568512380993, "grad_norm": 0.28373536467552185, "learning_rate": 2.777130364932822e-06, "loss": 0.0056, "step": 161530 }, { "epoch": 1.364041291085263, "grad_norm": 0.26764896512031555, "learning_rate": 2.7764703329223913e-06, "loss": 0.0079, "step": 161540 }, { "epoch": 1.364125730932427, "grad_norm": 0.22748754918575287, "learning_rate": 2.7758103492065675e-06, "loss": 0.0058, "step": 161550 }, { "epoch": 1.3642101707795908, "grad_norm": 0.42700788378715515, "learning_rate": 2.7751504137996886e-06, "loss": 0.0075, "step": 161560 }, { "epoch": 1.3642946106267548, "grad_norm": 0.2752417027950287, "learning_rate": 2.774490526716086e-06, "loss": 0.0089, "step": 161570 }, { "epoch": 1.3643790504739186, "grad_norm": 0.583164393901825, "learning_rate": 2.7738306879700938e-06, "loss": 0.011, "step": 161580 }, { "epoch": 1.3644634903210826, "grad_norm": 0.2135777473449707, "learning_rate": 2.773170897576042e-06, "loss": 0.0029, "step": 161590 }, { "epoch": 1.3645479301682464, "grad_norm": 0.16437630355358124, "learning_rate": 2.7725111555482633e-06, "loss": 0.0077, "step": 161600 }, { "epoch": 1.3646323700154102, "grad_norm": 0.003382431110367179, "learning_rate": 2.7718514619010877e-06, "loss": 0.0055, "step": 161610 }, { "epoch": 1.3647168098625742, "grad_norm": 0.0033226190134882927, "learning_rate": 2.7711918166488385e-06, "loss": 0.0063, "step": 161620 }, { "epoch": 1.3648012497097382, "grad_norm": 0.05410720035433769, "learning_rate": 2.770532219805847e-06, "loss": 0.005, "step": 161630 }, { "epoch": 1.364885689556902, "grad_norm": 0.0665217861533165, "learning_rate": 2.769872671386438e-06, "loss": 0.0053, "step": 161640 }, { "epoch": 1.3649701294040657, "grad_norm": 0.455692857503891, "learning_rate": 2.76921317140494e-06, "loss": 0.0075, "step": 161650 }, { "epoch": 1.3650545692512297, "grad_norm": 0.2850009799003601, "learning_rate": 2.768553719875671e-06, "loss": 0.0063, "step": 161660 }, { "epoch": 1.3651390090983935, "grad_norm": 0.15898139774799347, "learning_rate": 2.7678943168129614e-06, "loss": 0.0059, "step": 161670 }, { "epoch": 1.3652234489455575, "grad_norm": 0.23833000659942627, "learning_rate": 2.767234962231129e-06, "loss": 0.0042, "step": 161680 }, { "epoch": 1.3653078887927212, "grad_norm": 0.9096785187721252, "learning_rate": 2.7665756561444967e-06, "loss": 0.0062, "step": 161690 }, { "epoch": 1.3653923286398852, "grad_norm": 0.014360584318637848, "learning_rate": 2.765916398567381e-06, "loss": 0.0057, "step": 161700 }, { "epoch": 1.365476768487049, "grad_norm": 0.0032275007106363773, "learning_rate": 2.7652571895141046e-06, "loss": 0.0051, "step": 161710 }, { "epoch": 1.365561208334213, "grad_norm": 0.2650129497051239, "learning_rate": 2.764598028998985e-06, "loss": 0.0048, "step": 161720 }, { "epoch": 1.3656456481813768, "grad_norm": 0.24633438885211945, "learning_rate": 2.7639389170363355e-06, "loss": 0.0079, "step": 161730 }, { "epoch": 1.3657300880285406, "grad_norm": 0.406674861907959, "learning_rate": 2.763279853640477e-06, "loss": 0.0097, "step": 161740 }, { "epoch": 1.3658145278757046, "grad_norm": 0.0004092757881153375, "learning_rate": 2.762620838825719e-06, "loss": 0.0086, "step": 161750 }, { "epoch": 1.3658989677228683, "grad_norm": 0.10672902315855026, "learning_rate": 2.7619618726063813e-06, "loss": 0.0137, "step": 161760 }, { "epoch": 1.3659834075700323, "grad_norm": 0.1007583737373352, "learning_rate": 2.761302954996772e-06, "loss": 0.0066, "step": 161770 }, { "epoch": 1.3660678474171961, "grad_norm": 0.39058414101600647, "learning_rate": 2.7606440860112036e-06, "loss": 0.0075, "step": 161780 }, { "epoch": 1.36615228726436, "grad_norm": 0.22088895738124847, "learning_rate": 2.759985265663986e-06, "loss": 0.0043, "step": 161790 }, { "epoch": 1.3662367271115239, "grad_norm": 0.5439561605453491, "learning_rate": 2.75932649396943e-06, "loss": 0.0107, "step": 161800 }, { "epoch": 1.3663211669586879, "grad_norm": 0.11298879981040955, "learning_rate": 2.758667770941842e-06, "loss": 0.0125, "step": 161810 }, { "epoch": 1.3664056068058517, "grad_norm": 0.3015684187412262, "learning_rate": 2.7580090965955333e-06, "loss": 0.0081, "step": 161820 }, { "epoch": 1.3664900466530154, "grad_norm": 0.23902785778045654, "learning_rate": 2.757350470944804e-06, "loss": 0.0074, "step": 161830 }, { "epoch": 1.3665744865001794, "grad_norm": 0.05624161660671234, "learning_rate": 2.7566918940039665e-06, "loss": 0.0043, "step": 161840 }, { "epoch": 1.3666589263473434, "grad_norm": 0.5286251902580261, "learning_rate": 2.7560333657873207e-06, "loss": 0.0093, "step": 161850 }, { "epoch": 1.3667433661945072, "grad_norm": 0.012426510453224182, "learning_rate": 2.75537488630917e-06, "loss": 0.0076, "step": 161860 }, { "epoch": 1.366827806041671, "grad_norm": 0.06965257227420807, "learning_rate": 2.754716455583818e-06, "loss": 0.0069, "step": 161870 }, { "epoch": 1.366912245888835, "grad_norm": 0.24457384645938873, "learning_rate": 2.7540580736255612e-06, "loss": 0.0058, "step": 161880 }, { "epoch": 1.3669966857359988, "grad_norm": 0.0628664568066597, "learning_rate": 2.7533997404487056e-06, "loss": 0.0034, "step": 161890 }, { "epoch": 1.3670811255831627, "grad_norm": 0.2621224522590637, "learning_rate": 2.752741456067545e-06, "loss": 0.0103, "step": 161900 }, { "epoch": 1.3671655654303265, "grad_norm": 0.3180049955844879, "learning_rate": 2.7520832204963808e-06, "loss": 0.0052, "step": 161910 }, { "epoch": 1.3672500052774905, "grad_norm": 0.1584634631872177, "learning_rate": 2.7514250337495074e-06, "loss": 0.0049, "step": 161920 }, { "epoch": 1.3673344451246543, "grad_norm": 0.08689024299383163, "learning_rate": 2.7507668958412256e-06, "loss": 0.0054, "step": 161930 }, { "epoch": 1.3674188849718183, "grad_norm": 0.07997675985097885, "learning_rate": 2.750108806785821e-06, "loss": 0.018, "step": 161940 }, { "epoch": 1.367503324818982, "grad_norm": 0.37843430042266846, "learning_rate": 2.7494507665975945e-06, "loss": 0.0086, "step": 161950 }, { "epoch": 1.3675877646661458, "grad_norm": 0.08778953552246094, "learning_rate": 2.748792775290836e-06, "loss": 0.0078, "step": 161960 }, { "epoch": 1.3676722045133098, "grad_norm": 0.15008445084095, "learning_rate": 2.7481348328798357e-06, "loss": 0.0051, "step": 161970 }, { "epoch": 1.3677566443604738, "grad_norm": 0.37639883160591125, "learning_rate": 2.747476939378887e-06, "loss": 0.009, "step": 161980 }, { "epoch": 1.3678410842076376, "grad_norm": 0.06605349481105804, "learning_rate": 2.7468190948022754e-06, "loss": 0.0094, "step": 161990 }, { "epoch": 1.3679255240548014, "grad_norm": 0.007720346562564373, "learning_rate": 2.746161299164294e-06, "loss": 0.0055, "step": 162000 }, { "epoch": 1.3680099639019654, "grad_norm": 0.6224786043167114, "learning_rate": 2.7455035524792262e-06, "loss": 0.0125, "step": 162010 }, { "epoch": 1.3680944037491292, "grad_norm": 0.25630050897598267, "learning_rate": 2.74484585476136e-06, "loss": 0.0062, "step": 162020 }, { "epoch": 1.3681788435962932, "grad_norm": 0.2722441852092743, "learning_rate": 2.744188206024978e-06, "loss": 0.0073, "step": 162030 }, { "epoch": 1.368263283443457, "grad_norm": 0.4147678017616272, "learning_rate": 2.743530606284367e-06, "loss": 0.006, "step": 162040 }, { "epoch": 1.3683477232906207, "grad_norm": 0.3300594091415405, "learning_rate": 2.7428730555538073e-06, "loss": 0.0098, "step": 162050 }, { "epoch": 1.3684321631377847, "grad_norm": 0.1605423539876938, "learning_rate": 2.742215553847585e-06, "loss": 0.0046, "step": 162060 }, { "epoch": 1.3685166029849487, "grad_norm": 0.1388309746980667, "learning_rate": 2.741558101179975e-06, "loss": 0.0088, "step": 162070 }, { "epoch": 1.3686010428321125, "grad_norm": 0.11023687571287155, "learning_rate": 2.740900697565263e-06, "loss": 0.011, "step": 162080 }, { "epoch": 1.3686854826792763, "grad_norm": 0.4717414379119873, "learning_rate": 2.7402433430177244e-06, "loss": 0.0076, "step": 162090 }, { "epoch": 1.3687699225264403, "grad_norm": 0.010156117379665375, "learning_rate": 2.739586037551638e-06, "loss": 0.0114, "step": 162100 }, { "epoch": 1.368854362373604, "grad_norm": 0.08433159440755844, "learning_rate": 2.7389287811812793e-06, "loss": 0.0052, "step": 162110 }, { "epoch": 1.368938802220768, "grad_norm": 0.16740639507770538, "learning_rate": 2.738271573920922e-06, "loss": 0.0084, "step": 162120 }, { "epoch": 1.3690232420679318, "grad_norm": 0.2434961199760437, "learning_rate": 2.7376144157848448e-06, "loss": 0.008, "step": 162130 }, { "epoch": 1.3691076819150958, "grad_norm": 0.14178074896335602, "learning_rate": 2.736957306787317e-06, "loss": 0.0044, "step": 162140 }, { "epoch": 1.3691921217622596, "grad_norm": 0.30358946323394775, "learning_rate": 2.736300246942615e-06, "loss": 0.0047, "step": 162150 }, { "epoch": 1.3692765616094236, "grad_norm": 0.4314557611942291, "learning_rate": 2.7356432362650055e-06, "loss": 0.0087, "step": 162160 }, { "epoch": 1.3693610014565873, "grad_norm": 0.5615284442901611, "learning_rate": 2.734986274768762e-06, "loss": 0.0092, "step": 162170 }, { "epoch": 1.3694454413037511, "grad_norm": 0.31883788108825684, "learning_rate": 2.7343293624681532e-06, "loss": 0.0056, "step": 162180 }, { "epoch": 1.3695298811509151, "grad_norm": 0.20678867399692535, "learning_rate": 2.7336724993774464e-06, "loss": 0.0071, "step": 162190 }, { "epoch": 1.3696143209980791, "grad_norm": 0.36729225516319275, "learning_rate": 2.733015685510909e-06, "loss": 0.0059, "step": 162200 }, { "epoch": 1.369698760845243, "grad_norm": 0.011799391359090805, "learning_rate": 2.732358920882804e-06, "loss": 0.0063, "step": 162210 }, { "epoch": 1.3697832006924067, "grad_norm": 0.5268738269805908, "learning_rate": 2.7317022055074e-06, "loss": 0.0073, "step": 162220 }, { "epoch": 1.3698676405395707, "grad_norm": 0.3098435699939728, "learning_rate": 2.7310455393989575e-06, "loss": 0.0042, "step": 162230 }, { "epoch": 1.3699520803867344, "grad_norm": 0.22966253757476807, "learning_rate": 2.7303889225717434e-06, "loss": 0.0068, "step": 162240 }, { "epoch": 1.3700365202338984, "grad_norm": 0.1688358187675476, "learning_rate": 2.729732355040017e-06, "loss": 0.0124, "step": 162250 }, { "epoch": 1.3701209600810622, "grad_norm": 0.6151463389396667, "learning_rate": 2.729075836818038e-06, "loss": 0.0062, "step": 162260 }, { "epoch": 1.3702053999282262, "grad_norm": 0.20342299342155457, "learning_rate": 2.728419367920064e-06, "loss": 0.0056, "step": 162270 }, { "epoch": 1.37028983977539, "grad_norm": 0.1696028858423233, "learning_rate": 2.7277629483603585e-06, "loss": 0.0049, "step": 162280 }, { "epoch": 1.370374279622554, "grad_norm": 0.05498555675148964, "learning_rate": 2.7271065781531745e-06, "loss": 0.0044, "step": 162290 }, { "epoch": 1.3704587194697178, "grad_norm": 1.2895435094833374, "learning_rate": 2.7264502573127715e-06, "loss": 0.0102, "step": 162300 }, { "epoch": 1.3705431593168815, "grad_norm": 0.29736122488975525, "learning_rate": 2.7257939858534034e-06, "loss": 0.0061, "step": 162310 }, { "epoch": 1.3706275991640455, "grad_norm": 0.2738165855407715, "learning_rate": 2.725137763789321e-06, "loss": 0.0068, "step": 162320 }, { "epoch": 1.3707120390112095, "grad_norm": 0.2741064131259918, "learning_rate": 2.7244815911347826e-06, "loss": 0.0069, "step": 162330 }, { "epoch": 1.3707964788583733, "grad_norm": 0.036264460533857346, "learning_rate": 2.7238254679040377e-06, "loss": 0.0046, "step": 162340 }, { "epoch": 1.370880918705537, "grad_norm": 0.174299955368042, "learning_rate": 2.723169394111338e-06, "loss": 0.0068, "step": 162350 }, { "epoch": 1.370965358552701, "grad_norm": 0.29158467054367065, "learning_rate": 2.7225133697709295e-06, "loss": 0.0061, "step": 162360 }, { "epoch": 1.3710497983998648, "grad_norm": 0.13629867136478424, "learning_rate": 2.7218573948970663e-06, "loss": 0.0104, "step": 162370 }, { "epoch": 1.3711342382470288, "grad_norm": 0.10991751402616501, "learning_rate": 2.7212014695039914e-06, "loss": 0.014, "step": 162380 }, { "epoch": 1.3712186780941926, "grad_norm": 0.1155862957239151, "learning_rate": 2.7205455936059557e-06, "loss": 0.0095, "step": 162390 }, { "epoch": 1.3713031179413564, "grad_norm": 0.006946216337382793, "learning_rate": 2.7198897672172007e-06, "loss": 0.0069, "step": 162400 }, { "epoch": 1.3713875577885204, "grad_norm": 0.15331768989562988, "learning_rate": 2.719233990351975e-06, "loss": 0.0098, "step": 162410 }, { "epoch": 1.3714719976356844, "grad_norm": 2.079463005065918, "learning_rate": 2.7185782630245196e-06, "loss": 0.0113, "step": 162420 }, { "epoch": 1.3715564374828482, "grad_norm": 0.11697333306074142, "learning_rate": 2.7179225852490763e-06, "loss": 0.0095, "step": 162430 }, { "epoch": 1.371640877330012, "grad_norm": 0.4058952033519745, "learning_rate": 2.717266957039887e-06, "loss": 0.0121, "step": 162440 }, { "epoch": 1.371725317177176, "grad_norm": 0.9171364903450012, "learning_rate": 2.7166113784111895e-06, "loss": 0.0114, "step": 162450 }, { "epoch": 1.3718097570243397, "grad_norm": 0.0039442447014153, "learning_rate": 2.7159558493772276e-06, "loss": 0.0046, "step": 162460 }, { "epoch": 1.3718941968715037, "grad_norm": 0.3996754586696625, "learning_rate": 2.715300369952234e-06, "loss": 0.0026, "step": 162470 }, { "epoch": 1.3719786367186675, "grad_norm": 0.2727445662021637, "learning_rate": 2.7146449401504506e-06, "loss": 0.0043, "step": 162480 }, { "epoch": 1.3720630765658315, "grad_norm": 0.05709250643849373, "learning_rate": 2.7139895599861082e-06, "loss": 0.005, "step": 162490 }, { "epoch": 1.3721475164129953, "grad_norm": 0.10296947509050369, "learning_rate": 2.7133342294734464e-06, "loss": 0.0053, "step": 162500 }, { "epoch": 1.3722319562601593, "grad_norm": 0.19209817051887512, "learning_rate": 2.712678948626697e-06, "loss": 0.0041, "step": 162510 }, { "epoch": 1.372316396107323, "grad_norm": 0.46157798171043396, "learning_rate": 2.7120237174600918e-06, "loss": 0.0076, "step": 162520 }, { "epoch": 1.3724008359544868, "grad_norm": 0.2847382724285126, "learning_rate": 2.7113685359878605e-06, "loss": 0.0056, "step": 162530 }, { "epoch": 1.3724852758016508, "grad_norm": 0.3816031217575073, "learning_rate": 2.7107134042242376e-06, "loss": 0.008, "step": 162540 }, { "epoch": 1.3725697156488148, "grad_norm": 0.12871627509593964, "learning_rate": 2.7100583221834503e-06, "loss": 0.0043, "step": 162550 }, { "epoch": 1.3726541554959786, "grad_norm": 0.4159092903137207, "learning_rate": 2.7094032898797253e-06, "loss": 0.0087, "step": 162560 }, { "epoch": 1.3727385953431424, "grad_norm": 0.05820814520120621, "learning_rate": 2.7087483073272935e-06, "loss": 0.0055, "step": 162570 }, { "epoch": 1.3728230351903064, "grad_norm": 0.23310935497283936, "learning_rate": 2.708093374540378e-06, "loss": 0.0078, "step": 162580 }, { "epoch": 1.3729074750374701, "grad_norm": 0.41269001364707947, "learning_rate": 2.7074384915332065e-06, "loss": 0.0091, "step": 162590 }, { "epoch": 1.3729919148846341, "grad_norm": 0.3053272068500519, "learning_rate": 2.706783658319998e-06, "loss": 0.0119, "step": 162600 }, { "epoch": 1.373076354731798, "grad_norm": 0.2927733063697815, "learning_rate": 2.7061288749149804e-06, "loss": 0.0092, "step": 162610 }, { "epoch": 1.3731607945789617, "grad_norm": 0.20403191447257996, "learning_rate": 2.7054741413323722e-06, "loss": 0.0045, "step": 162620 }, { "epoch": 1.3732452344261257, "grad_norm": 0.5406567454338074, "learning_rate": 2.704819457586397e-06, "loss": 0.0077, "step": 162630 }, { "epoch": 1.3733296742732897, "grad_norm": 0.5560593008995056, "learning_rate": 2.704164823691271e-06, "loss": 0.0056, "step": 162640 }, { "epoch": 1.3734141141204534, "grad_norm": 0.610832691192627, "learning_rate": 2.703510239661217e-06, "loss": 0.0077, "step": 162650 }, { "epoch": 1.3734985539676172, "grad_norm": 0.41161787509918213, "learning_rate": 2.702855705510449e-06, "loss": 0.0091, "step": 162660 }, { "epoch": 1.3735829938147812, "grad_norm": 0.31358233094215393, "learning_rate": 2.702201221253185e-06, "loss": 0.0033, "step": 162670 }, { "epoch": 1.373667433661945, "grad_norm": 0.11716683208942413, "learning_rate": 2.7015467869036393e-06, "loss": 0.0037, "step": 162680 }, { "epoch": 1.373751873509109, "grad_norm": 0.03226996213197708, "learning_rate": 2.7008924024760253e-06, "loss": 0.003, "step": 162690 }, { "epoch": 1.3738363133562728, "grad_norm": 0.1030496209859848, "learning_rate": 2.700238067984558e-06, "loss": 0.007, "step": 162700 }, { "epoch": 1.3739207532034368, "grad_norm": 0.23798157274723053, "learning_rate": 2.699583783443447e-06, "loss": 0.0085, "step": 162710 }, { "epoch": 1.3740051930506005, "grad_norm": 0.0611281655728817, "learning_rate": 2.698929548866907e-06, "loss": 0.0062, "step": 162720 }, { "epoch": 1.3740896328977645, "grad_norm": 0.3310256898403168, "learning_rate": 2.698275364269144e-06, "loss": 0.0067, "step": 162730 }, { "epoch": 1.3741740727449283, "grad_norm": 0.574626088142395, "learning_rate": 2.69762122966437e-06, "loss": 0.0131, "step": 162740 }, { "epoch": 1.374258512592092, "grad_norm": 0.0965060219168663, "learning_rate": 2.696967145066791e-06, "loss": 0.0054, "step": 162750 }, { "epoch": 1.374342952439256, "grad_norm": 0.447361558675766, "learning_rate": 2.6963131104906136e-06, "loss": 0.0042, "step": 162760 }, { "epoch": 1.37442739228642, "grad_norm": 0.5500718951225281, "learning_rate": 2.6956591259500416e-06, "loss": 0.0148, "step": 162770 }, { "epoch": 1.3745118321335839, "grad_norm": 0.14963138103485107, "learning_rate": 2.6950051914592836e-06, "loss": 0.0095, "step": 162780 }, { "epoch": 1.3745962719807476, "grad_norm": 0.3082229495048523, "learning_rate": 2.6943513070325393e-06, "loss": 0.0068, "step": 162790 }, { "epoch": 1.3746807118279116, "grad_norm": 0.36656296253204346, "learning_rate": 2.6936974726840107e-06, "loss": 0.0055, "step": 162800 }, { "epoch": 1.3747651516750754, "grad_norm": 0.24608077108860016, "learning_rate": 2.6930436884279032e-06, "loss": 0.0069, "step": 162810 }, { "epoch": 1.3748495915222394, "grad_norm": 0.18172630667686462, "learning_rate": 2.692389954278411e-06, "loss": 0.005, "step": 162820 }, { "epoch": 1.3749340313694032, "grad_norm": 0.275993674993515, "learning_rate": 2.6917362702497405e-06, "loss": 0.0058, "step": 162830 }, { "epoch": 1.3750184712165672, "grad_norm": 0.33229050040245056, "learning_rate": 2.6910826363560817e-06, "loss": 0.0081, "step": 162840 }, { "epoch": 1.375102911063731, "grad_norm": 0.2931469976902008, "learning_rate": 2.690429052611636e-06, "loss": 0.0082, "step": 162850 }, { "epoch": 1.375187350910895, "grad_norm": 0.2200646698474884, "learning_rate": 2.6897755190305964e-06, "loss": 0.0126, "step": 162860 }, { "epoch": 1.3752717907580587, "grad_norm": 0.5230339169502258, "learning_rate": 2.6891220356271608e-06, "loss": 0.0084, "step": 162870 }, { "epoch": 1.3753562306052225, "grad_norm": 1.5718433856964111, "learning_rate": 2.68846860241552e-06, "loss": 0.0094, "step": 162880 }, { "epoch": 1.3754406704523865, "grad_norm": 0.3714430630207062, "learning_rate": 2.6878152194098684e-06, "loss": 0.0063, "step": 162890 }, { "epoch": 1.3755251102995505, "grad_norm": 0.16915470361709595, "learning_rate": 2.687161886624397e-06, "loss": 0.0102, "step": 162900 }, { "epoch": 1.3756095501467143, "grad_norm": 0.42801737785339355, "learning_rate": 2.6865086040732958e-06, "loss": 0.0099, "step": 162910 }, { "epoch": 1.375693989993878, "grad_norm": 0.19260697066783905, "learning_rate": 2.6858553717707525e-06, "loss": 0.0077, "step": 162920 }, { "epoch": 1.375778429841042, "grad_norm": 0.5467736721038818, "learning_rate": 2.6852021897309555e-06, "loss": 0.0049, "step": 162930 }, { "epoch": 1.3758628696882058, "grad_norm": 0.37664341926574707, "learning_rate": 2.6845490579680937e-06, "loss": 0.0096, "step": 162940 }, { "epoch": 1.3759473095353698, "grad_norm": 0.040062107145786285, "learning_rate": 2.68389597649635e-06, "loss": 0.0072, "step": 162950 }, { "epoch": 1.3760317493825336, "grad_norm": 0.12069713324308395, "learning_rate": 2.6832429453299136e-06, "loss": 0.007, "step": 162960 }, { "epoch": 1.3761161892296974, "grad_norm": 0.398917019367218, "learning_rate": 2.6825899644829633e-06, "loss": 0.006, "step": 162970 }, { "epoch": 1.3762006290768614, "grad_norm": 0.06148117408156395, "learning_rate": 2.6819370339696872e-06, "loss": 0.0051, "step": 162980 }, { "epoch": 1.3762850689240254, "grad_norm": 0.7936785817146301, "learning_rate": 2.681284153804262e-06, "loss": 0.0084, "step": 162990 }, { "epoch": 1.3763695087711891, "grad_norm": 0.2535645663738251, "learning_rate": 2.6806313240008707e-06, "loss": 0.0049, "step": 163000 }, { "epoch": 1.376453948618353, "grad_norm": 0.07019966095685959, "learning_rate": 2.6799785445736898e-06, "loss": 0.0045, "step": 163010 }, { "epoch": 1.376538388465517, "grad_norm": 0.11500449478626251, "learning_rate": 2.6793258155369012e-06, "loss": 0.0067, "step": 163020 }, { "epoch": 1.3766228283126807, "grad_norm": 0.04964137449860573, "learning_rate": 2.67867313690468e-06, "loss": 0.0051, "step": 163030 }, { "epoch": 1.3767072681598447, "grad_norm": 0.2753947973251343, "learning_rate": 2.6780205086912016e-06, "loss": 0.0036, "step": 163040 }, { "epoch": 1.3767917080070085, "grad_norm": 0.4985266923904419, "learning_rate": 2.6773679309106433e-06, "loss": 0.0102, "step": 163050 }, { "epoch": 1.3768761478541724, "grad_norm": 0.22733505070209503, "learning_rate": 2.6767154035771757e-06, "loss": 0.0081, "step": 163060 }, { "epoch": 1.3769605877013362, "grad_norm": 0.3959577679634094, "learning_rate": 2.6760629267049754e-06, "loss": 0.0077, "step": 163070 }, { "epoch": 1.3770450275485002, "grad_norm": 0.07816459983587265, "learning_rate": 2.6754105003082122e-06, "loss": 0.0049, "step": 163080 }, { "epoch": 1.377129467395664, "grad_norm": 0.009645313955843449, "learning_rate": 2.674758124401056e-06, "loss": 0.0033, "step": 163090 }, { "epoch": 1.3772139072428278, "grad_norm": 0.4504864811897278, "learning_rate": 2.674105798997676e-06, "loss": 0.008, "step": 163100 }, { "epoch": 1.3772983470899918, "grad_norm": 0.28941142559051514, "learning_rate": 2.6734535241122423e-06, "loss": 0.0093, "step": 163110 }, { "epoch": 1.3773827869371558, "grad_norm": 0.3622138500213623, "learning_rate": 2.6728012997589197e-06, "loss": 0.0056, "step": 163120 }, { "epoch": 1.3774672267843195, "grad_norm": 0.24280968308448792, "learning_rate": 2.6721491259518783e-06, "loss": 0.0061, "step": 163130 }, { "epoch": 1.3775516666314833, "grad_norm": 0.37439998984336853, "learning_rate": 2.6714970027052804e-06, "loss": 0.0086, "step": 163140 }, { "epoch": 1.3776361064786473, "grad_norm": 0.21342802047729492, "learning_rate": 2.6708449300332906e-06, "loss": 0.0079, "step": 163150 }, { "epoch": 1.377720546325811, "grad_norm": 0.1121893897652626, "learning_rate": 2.670192907950072e-06, "loss": 0.0123, "step": 163160 }, { "epoch": 1.377804986172975, "grad_norm": 0.29349231719970703, "learning_rate": 2.6695409364697834e-06, "loss": 0.0051, "step": 163170 }, { "epoch": 1.3778894260201389, "grad_norm": 0.2564675211906433, "learning_rate": 2.6688890156065904e-06, "loss": 0.0102, "step": 163180 }, { "epoch": 1.3779738658673029, "grad_norm": 0.10116413980722427, "learning_rate": 2.6682371453746483e-06, "loss": 0.0043, "step": 163190 }, { "epoch": 1.3780583057144666, "grad_norm": 0.07007058709859848, "learning_rate": 2.6675853257881194e-06, "loss": 0.0056, "step": 163200 }, { "epoch": 1.3781427455616306, "grad_norm": 0.21924369037151337, "learning_rate": 2.6669335568611576e-06, "loss": 0.0089, "step": 163210 }, { "epoch": 1.3782271854087944, "grad_norm": 0.35551202297210693, "learning_rate": 2.6662818386079225e-06, "loss": 0.0077, "step": 163220 }, { "epoch": 1.3783116252559582, "grad_norm": 0.24387092888355255, "learning_rate": 2.665630171042568e-06, "loss": 0.0101, "step": 163230 }, { "epoch": 1.3783960651031222, "grad_norm": 0.10510855913162231, "learning_rate": 2.6649785541792477e-06, "loss": 0.007, "step": 163240 }, { "epoch": 1.3784805049502862, "grad_norm": 0.32564064860343933, "learning_rate": 2.6643269880321133e-06, "loss": 0.0118, "step": 163250 }, { "epoch": 1.37856494479745, "grad_norm": 0.2114291936159134, "learning_rate": 2.663675472615319e-06, "loss": 0.0059, "step": 163260 }, { "epoch": 1.3786493846446137, "grad_norm": 0.1357312649488449, "learning_rate": 2.6630240079430146e-06, "loss": 0.0035, "step": 163270 }, { "epoch": 1.3787338244917777, "grad_norm": 0.06271842867136002, "learning_rate": 2.662372594029349e-06, "loss": 0.0042, "step": 163280 }, { "epoch": 1.3788182643389415, "grad_norm": 0.2343159019947052, "learning_rate": 2.661721230888473e-06, "loss": 0.0062, "step": 163290 }, { "epoch": 1.3789027041861055, "grad_norm": 0.043620821088552475, "learning_rate": 2.6610699185345305e-06, "loss": 0.0095, "step": 163300 }, { "epoch": 1.3789871440332693, "grad_norm": 0.1935957670211792, "learning_rate": 2.6604186569816725e-06, "loss": 0.0085, "step": 163310 }, { "epoch": 1.379071583880433, "grad_norm": 0.13716745376586914, "learning_rate": 2.6597674462440414e-06, "loss": 0.0064, "step": 163320 }, { "epoch": 1.379156023727597, "grad_norm": 0.39313915371894836, "learning_rate": 2.659116286335782e-06, "loss": 0.0081, "step": 163330 }, { "epoch": 1.379240463574761, "grad_norm": 0.18760953843593597, "learning_rate": 2.6584651772710355e-06, "loss": 0.0048, "step": 163340 }, { "epoch": 1.3793249034219248, "grad_norm": 0.34048905968666077, "learning_rate": 2.6578141190639475e-06, "loss": 0.011, "step": 163350 }, { "epoch": 1.3794093432690886, "grad_norm": 0.23113463819026947, "learning_rate": 2.6571631117286544e-06, "loss": 0.011, "step": 163360 }, { "epoch": 1.3794937831162526, "grad_norm": 0.39955562353134155, "learning_rate": 2.6565121552793007e-06, "loss": 0.0072, "step": 163370 }, { "epoch": 1.3795782229634164, "grad_norm": 0.008539543487131596, "learning_rate": 2.6558612497300227e-06, "loss": 0.0033, "step": 163380 }, { "epoch": 1.3796626628105804, "grad_norm": 0.18396486341953278, "learning_rate": 2.655210395094956e-06, "loss": 0.0039, "step": 163390 }, { "epoch": 1.3797471026577441, "grad_norm": 0.24817436933517456, "learning_rate": 2.654559591388241e-06, "loss": 0.0039, "step": 163400 }, { "epoch": 1.3798315425049081, "grad_norm": 0.11704491078853607, "learning_rate": 2.6539088386240106e-06, "loss": 0.0043, "step": 163410 }, { "epoch": 1.379915982352072, "grad_norm": 0.48322901129722595, "learning_rate": 2.6532581368164007e-06, "loss": 0.0101, "step": 163420 }, { "epoch": 1.380000422199236, "grad_norm": 0.05626736581325531, "learning_rate": 2.6526074859795404e-06, "loss": 0.0114, "step": 163430 }, { "epoch": 1.3800848620463997, "grad_norm": 0.015108516439795494, "learning_rate": 2.6519568861275664e-06, "loss": 0.0015, "step": 163440 }, { "epoch": 1.3801693018935635, "grad_norm": 0.20972353219985962, "learning_rate": 2.6513063372746063e-06, "loss": 0.0091, "step": 163450 }, { "epoch": 1.3802537417407275, "grad_norm": 0.044383399188518524, "learning_rate": 2.6506558394347925e-06, "loss": 0.0104, "step": 163460 }, { "epoch": 1.3803381815878915, "grad_norm": 0.06360358744859695, "learning_rate": 2.6500053926222525e-06, "loss": 0.0082, "step": 163470 }, { "epoch": 1.3804226214350552, "grad_norm": 0.07695288956165314, "learning_rate": 2.649354996851114e-06, "loss": 0.0049, "step": 163480 }, { "epoch": 1.380507061282219, "grad_norm": 0.02020030841231346, "learning_rate": 2.648704652135503e-06, "loss": 0.0044, "step": 163490 }, { "epoch": 1.380591501129383, "grad_norm": 0.28410717844963074, "learning_rate": 2.6480543584895435e-06, "loss": 0.0069, "step": 163500 }, { "epoch": 1.3806759409765468, "grad_norm": 0.1512233465909958, "learning_rate": 2.6474041159273635e-06, "loss": 0.0076, "step": 163510 }, { "epoch": 1.3807603808237108, "grad_norm": 0.02525489032268524, "learning_rate": 2.6467539244630813e-06, "loss": 0.0084, "step": 163520 }, { "epoch": 1.3808448206708746, "grad_norm": 0.07873925566673279, "learning_rate": 2.6461037841108247e-06, "loss": 0.0034, "step": 163530 }, { "epoch": 1.3809292605180383, "grad_norm": 0.16888543963432312, "learning_rate": 2.645453694884709e-06, "loss": 0.0035, "step": 163540 }, { "epoch": 1.3810137003652023, "grad_norm": 0.1951065957546234, "learning_rate": 2.644803656798858e-06, "loss": 0.0109, "step": 163550 }, { "epoch": 1.3810981402123663, "grad_norm": 0.4094526171684265, "learning_rate": 2.6441536698673897e-06, "loss": 0.0058, "step": 163560 }, { "epoch": 1.38118258005953, "grad_norm": 0.0971614271402359, "learning_rate": 2.6435037341044205e-06, "loss": 0.0072, "step": 163570 }, { "epoch": 1.3812670199066939, "grad_norm": 0.24382375180721283, "learning_rate": 2.642853849524066e-06, "loss": 0.0053, "step": 163580 }, { "epoch": 1.3813514597538579, "grad_norm": 0.37256231904029846, "learning_rate": 2.642204016140444e-06, "loss": 0.0069, "step": 163590 }, { "epoch": 1.3814358996010216, "grad_norm": 0.14633703231811523, "learning_rate": 2.641554233967666e-06, "loss": 0.0052, "step": 163600 }, { "epoch": 1.3815203394481856, "grad_norm": 0.3416472375392914, "learning_rate": 2.640904503019849e-06, "loss": 0.0052, "step": 163610 }, { "epoch": 1.3816047792953494, "grad_norm": 0.46298056840896606, "learning_rate": 2.6402548233111026e-06, "loss": 0.0099, "step": 163620 }, { "epoch": 1.3816892191425134, "grad_norm": 0.09113439172506332, "learning_rate": 2.6396051948555356e-06, "loss": 0.0084, "step": 163630 }, { "epoch": 1.3817736589896772, "grad_norm": 0.10797202587127686, "learning_rate": 2.638955617667263e-06, "loss": 0.007, "step": 163640 }, { "epoch": 1.3818580988368412, "grad_norm": 0.17784370481967926, "learning_rate": 2.6383060917603886e-06, "loss": 0.0083, "step": 163650 }, { "epoch": 1.381942538684005, "grad_norm": 0.2111162692308426, "learning_rate": 2.637656617149024e-06, "loss": 0.0037, "step": 163660 }, { "epoch": 1.3820269785311687, "grad_norm": 0.8600934743881226, "learning_rate": 2.63700719384727e-06, "loss": 0.0107, "step": 163670 }, { "epoch": 1.3821114183783327, "grad_norm": 0.2907622456550598, "learning_rate": 2.636357821869239e-06, "loss": 0.0058, "step": 163680 }, { "epoch": 1.3821958582254967, "grad_norm": 0.09763314574956894, "learning_rate": 2.6357085012290282e-06, "loss": 0.0065, "step": 163690 }, { "epoch": 1.3822802980726605, "grad_norm": 0.21546417474746704, "learning_rate": 2.6350592319407474e-06, "loss": 0.0047, "step": 163700 }, { "epoch": 1.3823647379198243, "grad_norm": 0.07375623285770416, "learning_rate": 2.6344100140184935e-06, "loss": 0.0051, "step": 163710 }, { "epoch": 1.3824491777669883, "grad_norm": 0.17243774235248566, "learning_rate": 2.633760847476371e-06, "loss": 0.0057, "step": 163720 }, { "epoch": 1.382533617614152, "grad_norm": 0.4136520326137543, "learning_rate": 2.6331117323284794e-06, "loss": 0.0092, "step": 163730 }, { "epoch": 1.382618057461316, "grad_norm": 0.5227013826370239, "learning_rate": 2.6324626685889127e-06, "loss": 0.0102, "step": 163740 }, { "epoch": 1.3827024973084798, "grad_norm": 0.17738878726959229, "learning_rate": 2.6318136562717734e-06, "loss": 0.0102, "step": 163750 }, { "epoch": 1.3827869371556438, "grad_norm": 0.07455271482467651, "learning_rate": 2.6311646953911546e-06, "loss": 0.0119, "step": 163760 }, { "epoch": 1.3828713770028076, "grad_norm": 0.19054293632507324, "learning_rate": 2.630515785961155e-06, "loss": 0.0046, "step": 163770 }, { "epoch": 1.3829558168499716, "grad_norm": 0.3282414376735687, "learning_rate": 2.6298669279958643e-06, "loss": 0.0147, "step": 163780 }, { "epoch": 1.3830402566971354, "grad_norm": 0.08097127825021744, "learning_rate": 2.6292181215093816e-06, "loss": 0.0092, "step": 163790 }, { "epoch": 1.3831246965442991, "grad_norm": 0.5113437175750732, "learning_rate": 2.628569366515794e-06, "loss": 0.0069, "step": 163800 }, { "epoch": 1.3832091363914631, "grad_norm": 0.6609708070755005, "learning_rate": 2.6279206630291946e-06, "loss": 0.0052, "step": 163810 }, { "epoch": 1.3832935762386271, "grad_norm": 0.11036596447229385, "learning_rate": 2.6272720110636707e-06, "loss": 0.0103, "step": 163820 }, { "epoch": 1.383378016085791, "grad_norm": 0.23045992851257324, "learning_rate": 2.626623410633314e-06, "loss": 0.0075, "step": 163830 }, { "epoch": 1.3834624559329547, "grad_norm": 0.29207488894462585, "learning_rate": 2.6259748617522086e-06, "loss": 0.0042, "step": 163840 }, { "epoch": 1.3835468957801187, "grad_norm": 0.7962402105331421, "learning_rate": 2.625326364434444e-06, "loss": 0.0115, "step": 163850 }, { "epoch": 1.3836313356272825, "grad_norm": 0.867572546005249, "learning_rate": 2.624677918694105e-06, "loss": 0.0187, "step": 163860 }, { "epoch": 1.3837157754744465, "grad_norm": 0.5891233682632446, "learning_rate": 2.624029524545273e-06, "loss": 0.0087, "step": 163870 }, { "epoch": 1.3838002153216102, "grad_norm": 0.1771302968263626, "learning_rate": 2.623381182002034e-06, "loss": 0.0098, "step": 163880 }, { "epoch": 1.383884655168774, "grad_norm": 0.9036654829978943, "learning_rate": 2.6227328910784694e-06, "loss": 0.0068, "step": 163890 }, { "epoch": 1.383969095015938, "grad_norm": 0.18081805109977722, "learning_rate": 2.622084651788659e-06, "loss": 0.0044, "step": 163900 }, { "epoch": 1.384053534863102, "grad_norm": 0.2518167495727539, "learning_rate": 2.6214364641466804e-06, "loss": 0.0037, "step": 163910 }, { "epoch": 1.3841379747102658, "grad_norm": 0.46858522295951843, "learning_rate": 2.620788328166617e-06, "loss": 0.0159, "step": 163920 }, { "epoch": 1.3842224145574296, "grad_norm": 1.107074499130249, "learning_rate": 2.6201402438625414e-06, "loss": 0.0169, "step": 163930 }, { "epoch": 1.3843068544045936, "grad_norm": 0.08847638964653015, "learning_rate": 2.6194922112485337e-06, "loss": 0.0098, "step": 163940 }, { "epoch": 1.3843912942517573, "grad_norm": 0.35138797760009766, "learning_rate": 2.6188442303386653e-06, "loss": 0.0075, "step": 163950 }, { "epoch": 1.3844757340989213, "grad_norm": 0.49107828736305237, "learning_rate": 2.6181963011470146e-06, "loss": 0.0107, "step": 163960 }, { "epoch": 1.384560173946085, "grad_norm": 0.18427713215351105, "learning_rate": 2.6175484236876526e-06, "loss": 0.0085, "step": 163970 }, { "epoch": 1.384644613793249, "grad_norm": 0.36224839091300964, "learning_rate": 2.6169005979746497e-06, "loss": 0.0072, "step": 163980 }, { "epoch": 1.3847290536404129, "grad_norm": 0.0982387438416481, "learning_rate": 2.616252824022078e-06, "loss": 0.0039, "step": 163990 }, { "epoch": 1.3848134934875769, "grad_norm": 0.08374051749706268, "learning_rate": 2.615605101844004e-06, "loss": 0.0048, "step": 164000 }, { "epoch": 1.3848979333347406, "grad_norm": 0.022195445373654366, "learning_rate": 2.614957431454501e-06, "loss": 0.0085, "step": 164010 }, { "epoch": 1.3849823731819044, "grad_norm": 0.8553779721260071, "learning_rate": 2.614309812867632e-06, "loss": 0.012, "step": 164020 }, { "epoch": 1.3850668130290684, "grad_norm": 0.294222354888916, "learning_rate": 2.613662246097467e-06, "loss": 0.0047, "step": 164030 }, { "epoch": 1.3851512528762324, "grad_norm": 0.018471471965312958, "learning_rate": 2.6130147311580665e-06, "loss": 0.0049, "step": 164040 }, { "epoch": 1.3852356927233962, "grad_norm": 0.3529491126537323, "learning_rate": 2.6123672680635017e-06, "loss": 0.0037, "step": 164050 }, { "epoch": 1.38532013257056, "grad_norm": 0.25120270252227783, "learning_rate": 2.6117198568278256e-06, "loss": 0.0098, "step": 164060 }, { "epoch": 1.385404572417724, "grad_norm": 0.2940578758716583, "learning_rate": 2.611072497465107e-06, "loss": 0.0057, "step": 164070 }, { "epoch": 1.3854890122648877, "grad_norm": 0.2217564880847931, "learning_rate": 2.610425189989405e-06, "loss": 0.0088, "step": 164080 }, { "epoch": 1.3855734521120517, "grad_norm": 0.003877601819112897, "learning_rate": 2.6097779344147757e-06, "loss": 0.0068, "step": 164090 }, { "epoch": 1.3856578919592155, "grad_norm": 0.09938447177410126, "learning_rate": 2.6091307307552815e-06, "loss": 0.0109, "step": 164100 }, { "epoch": 1.3857423318063795, "grad_norm": 0.26870599389076233, "learning_rate": 2.6084835790249764e-06, "loss": 0.0163, "step": 164110 }, { "epoch": 1.3858267716535433, "grad_norm": 0.13827970623970032, "learning_rate": 2.6078364792379194e-06, "loss": 0.0112, "step": 164120 }, { "epoch": 1.3859112115007073, "grad_norm": 0.20688706636428833, "learning_rate": 2.6071894314081645e-06, "loss": 0.0056, "step": 164130 }, { "epoch": 1.385995651347871, "grad_norm": 0.09753160923719406, "learning_rate": 2.6065424355497644e-06, "loss": 0.007, "step": 164140 }, { "epoch": 1.3860800911950348, "grad_norm": 0.17929482460021973, "learning_rate": 2.60589549167677e-06, "loss": 0.005, "step": 164150 }, { "epoch": 1.3861645310421988, "grad_norm": 0.6615140438079834, "learning_rate": 2.6052485998032374e-06, "loss": 0.012, "step": 164160 }, { "epoch": 1.3862489708893626, "grad_norm": 0.1144716888666153, "learning_rate": 2.604601759943212e-06, "loss": 0.0057, "step": 164170 }, { "epoch": 1.3863334107365266, "grad_norm": 0.30280518531799316, "learning_rate": 2.6039549721107484e-06, "loss": 0.006, "step": 164180 }, { "epoch": 1.3864178505836904, "grad_norm": 0.1489802747964859, "learning_rate": 2.603308236319889e-06, "loss": 0.0044, "step": 164190 }, { "epoch": 1.3865022904308544, "grad_norm": 0.23142138123512268, "learning_rate": 2.6026615525846856e-06, "loss": 0.0057, "step": 164200 }, { "epoch": 1.3865867302780182, "grad_norm": 0.21305403113365173, "learning_rate": 2.6020149209191825e-06, "loss": 0.005, "step": 164210 }, { "epoch": 1.3866711701251822, "grad_norm": 0.13922281563282013, "learning_rate": 2.6013683413374236e-06, "loss": 0.0084, "step": 164220 }, { "epoch": 1.386755609972346, "grad_norm": 0.21663154661655426, "learning_rate": 2.6007218138534524e-06, "loss": 0.0059, "step": 164230 }, { "epoch": 1.3868400498195097, "grad_norm": 0.22180813550949097, "learning_rate": 2.6000753384813104e-06, "loss": 0.0074, "step": 164240 }, { "epoch": 1.3869244896666737, "grad_norm": 0.2994491457939148, "learning_rate": 2.5994289152350415e-06, "loss": 0.0064, "step": 164250 }, { "epoch": 1.3870089295138377, "grad_norm": 0.18380138278007507, "learning_rate": 2.5987825441286836e-06, "loss": 0.0038, "step": 164260 }, { "epoch": 1.3870933693610015, "grad_norm": 0.23530733585357666, "learning_rate": 2.5981362251762783e-06, "loss": 0.0047, "step": 164270 }, { "epoch": 1.3871778092081652, "grad_norm": 0.3705921471118927, "learning_rate": 2.5974899583918602e-06, "loss": 0.0066, "step": 164280 }, { "epoch": 1.3872622490553292, "grad_norm": 0.3699856400489807, "learning_rate": 2.596843743789469e-06, "loss": 0.007, "step": 164290 }, { "epoch": 1.387346688902493, "grad_norm": 0.30974680185317993, "learning_rate": 2.5961975813831404e-06, "loss": 0.0063, "step": 164300 }, { "epoch": 1.387431128749657, "grad_norm": 0.25866758823394775, "learning_rate": 2.5955514711869067e-06, "loss": 0.0074, "step": 164310 }, { "epoch": 1.3875155685968208, "grad_norm": 0.10771140456199646, "learning_rate": 2.5949054132148033e-06, "loss": 0.0074, "step": 164320 }, { "epoch": 1.3876000084439848, "grad_norm": 0.37374603748321533, "learning_rate": 2.594259407480859e-06, "loss": 0.0067, "step": 164330 }, { "epoch": 1.3876844482911486, "grad_norm": 0.1762237548828125, "learning_rate": 2.5936134539991097e-06, "loss": 0.0128, "step": 164340 }, { "epoch": 1.3877688881383126, "grad_norm": 0.1984872668981552, "learning_rate": 2.592967552783581e-06, "loss": 0.0067, "step": 164350 }, { "epoch": 1.3878533279854763, "grad_norm": 0.1651822328567505, "learning_rate": 2.592321703848305e-06, "loss": 0.007, "step": 164360 }, { "epoch": 1.3879377678326401, "grad_norm": 0.16113120317459106, "learning_rate": 2.5916759072073093e-06, "loss": 0.0048, "step": 164370 }, { "epoch": 1.388022207679804, "grad_norm": 0.3474756181240082, "learning_rate": 2.591030162874619e-06, "loss": 0.0047, "step": 164380 }, { "epoch": 1.388106647526968, "grad_norm": 0.39250415563583374, "learning_rate": 2.5903844708642578e-06, "loss": 0.0044, "step": 164390 }, { "epoch": 1.3881910873741319, "grad_norm": 0.1262068897485733, "learning_rate": 2.5897388311902537e-06, "loss": 0.0055, "step": 164400 }, { "epoch": 1.3882755272212957, "grad_norm": 0.16721215844154358, "learning_rate": 2.589093243866627e-06, "loss": 0.0036, "step": 164410 }, { "epoch": 1.3883599670684597, "grad_norm": 0.7760369777679443, "learning_rate": 2.588447708907402e-06, "loss": 0.0098, "step": 164420 }, { "epoch": 1.3884444069156234, "grad_norm": 0.39436185359954834, "learning_rate": 2.587802226326598e-06, "loss": 0.0077, "step": 164430 }, { "epoch": 1.3885288467627874, "grad_norm": 0.13868756592273712, "learning_rate": 2.5871567961382366e-06, "loss": 0.0034, "step": 164440 }, { "epoch": 1.3886132866099512, "grad_norm": 0.32081910967826843, "learning_rate": 2.5865114183563356e-06, "loss": 0.0042, "step": 164450 }, { "epoch": 1.388697726457115, "grad_norm": 0.100736103951931, "learning_rate": 2.5858660929949113e-06, "loss": 0.0058, "step": 164460 }, { "epoch": 1.388782166304279, "grad_norm": 0.16551126539707184, "learning_rate": 2.5852208200679807e-06, "loss": 0.0063, "step": 164470 }, { "epoch": 1.388866606151443, "grad_norm": 0.6445859670639038, "learning_rate": 2.5845755995895577e-06, "loss": 0.0076, "step": 164480 }, { "epoch": 1.3889510459986067, "grad_norm": 0.41366252303123474, "learning_rate": 2.5839304315736597e-06, "loss": 0.0099, "step": 164490 }, { "epoch": 1.3890354858457705, "grad_norm": 0.07853332161903381, "learning_rate": 2.583285316034295e-06, "loss": 0.0059, "step": 164500 }, { "epoch": 1.3891199256929345, "grad_norm": 0.07395701855421066, "learning_rate": 2.582640252985481e-06, "loss": 0.0061, "step": 164510 }, { "epoch": 1.3892043655400983, "grad_norm": 0.22086875140666962, "learning_rate": 2.5819952424412228e-06, "loss": 0.006, "step": 164520 }, { "epoch": 1.3892888053872623, "grad_norm": 0.1833059936761856, "learning_rate": 2.5813502844155337e-06, "loss": 0.0042, "step": 164530 }, { "epoch": 1.389373245234426, "grad_norm": 0.2569900453090668, "learning_rate": 2.5807053789224218e-06, "loss": 0.0063, "step": 164540 }, { "epoch": 1.38945768508159, "grad_norm": 0.022789523005485535, "learning_rate": 2.5800605259758924e-06, "loss": 0.0086, "step": 164550 }, { "epoch": 1.3895421249287538, "grad_norm": 0.14310821890830994, "learning_rate": 2.5794157255899534e-06, "loss": 0.005, "step": 164560 }, { "epoch": 1.3896265647759178, "grad_norm": 0.3652229607105255, "learning_rate": 2.5787709777786065e-06, "loss": 0.0085, "step": 164570 }, { "epoch": 1.3897110046230816, "grad_norm": 0.25909188389778137, "learning_rate": 2.5781262825558596e-06, "loss": 0.005, "step": 164580 }, { "epoch": 1.3897954444702454, "grad_norm": 0.17614802718162537, "learning_rate": 2.5774816399357117e-06, "loss": 0.0035, "step": 164590 }, { "epoch": 1.3898798843174094, "grad_norm": 0.006318573374301195, "learning_rate": 2.5768370499321683e-06, "loss": 0.0063, "step": 164600 }, { "epoch": 1.3899643241645734, "grad_norm": 0.19048282504081726, "learning_rate": 2.576192512559226e-06, "loss": 0.0065, "step": 164610 }, { "epoch": 1.3900487640117372, "grad_norm": 0.292968213558197, "learning_rate": 2.575548027830887e-06, "loss": 0.0058, "step": 164620 }, { "epoch": 1.390133203858901, "grad_norm": 0.3059043884277344, "learning_rate": 2.5749035957611477e-06, "loss": 0.0068, "step": 164630 }, { "epoch": 1.390217643706065, "grad_norm": 0.093951016664505, "learning_rate": 2.5742592163640064e-06, "loss": 0.0042, "step": 164640 }, { "epoch": 1.3903020835532287, "grad_norm": 0.10030341148376465, "learning_rate": 2.5736148896534553e-06, "loss": 0.0049, "step": 164650 }, { "epoch": 1.3903865234003927, "grad_norm": 0.23519667983055115, "learning_rate": 2.572970615643493e-06, "loss": 0.0045, "step": 164660 }, { "epoch": 1.3904709632475565, "grad_norm": 0.15411663055419922, "learning_rate": 2.5723263943481104e-06, "loss": 0.005, "step": 164670 }, { "epoch": 1.3905554030947205, "grad_norm": 0.5206251740455627, "learning_rate": 2.5716822257813024e-06, "loss": 0.0117, "step": 164680 }, { "epoch": 1.3906398429418843, "grad_norm": 0.061912406235933304, "learning_rate": 2.5710381099570592e-06, "loss": 0.0134, "step": 164690 }, { "epoch": 1.3907242827890482, "grad_norm": 0.17614270746707916, "learning_rate": 2.570394046889371e-06, "loss": 0.0034, "step": 164700 }, { "epoch": 1.390808722636212, "grad_norm": 0.279541552066803, "learning_rate": 2.5697500365922255e-06, "loss": 0.0054, "step": 164710 }, { "epoch": 1.3908931624833758, "grad_norm": 0.19497057795524597, "learning_rate": 2.5691060790796095e-06, "loss": 0.0085, "step": 164720 }, { "epoch": 1.3909776023305398, "grad_norm": 0.4933903217315674, "learning_rate": 2.5684621743655136e-06, "loss": 0.007, "step": 164730 }, { "epoch": 1.3910620421777038, "grad_norm": 0.26885831356048584, "learning_rate": 2.5678183224639186e-06, "loss": 0.0025, "step": 164740 }, { "epoch": 1.3911464820248676, "grad_norm": 0.21213464438915253, "learning_rate": 2.5671745233888133e-06, "loss": 0.0149, "step": 164750 }, { "epoch": 1.3912309218720313, "grad_norm": 0.5890640616416931, "learning_rate": 2.566530777154177e-06, "loss": 0.0084, "step": 164760 }, { "epoch": 1.3913153617191953, "grad_norm": 0.23617519438266754, "learning_rate": 2.5658870837739947e-06, "loss": 0.0079, "step": 164770 }, { "epoch": 1.3913998015663591, "grad_norm": 0.3949145972728729, "learning_rate": 2.565243443262247e-06, "loss": 0.0058, "step": 164780 }, { "epoch": 1.3914842414135231, "grad_norm": 0.24621309340000153, "learning_rate": 2.5645998556329127e-06, "loss": 0.0063, "step": 164790 }, { "epoch": 1.391568681260687, "grad_norm": 0.14098471403121948, "learning_rate": 2.5639563208999707e-06, "loss": 0.0043, "step": 164800 }, { "epoch": 1.3916531211078507, "grad_norm": 0.2018718272447586, "learning_rate": 2.5633128390773953e-06, "loss": 0.0056, "step": 164810 }, { "epoch": 1.3917375609550147, "grad_norm": 0.44728660583496094, "learning_rate": 2.562669410179168e-06, "loss": 0.0082, "step": 164820 }, { "epoch": 1.3918220008021787, "grad_norm": 0.3156659007072449, "learning_rate": 2.5620260342192604e-06, "loss": 0.0106, "step": 164830 }, { "epoch": 1.3919064406493424, "grad_norm": 0.2622590661048889, "learning_rate": 2.5613827112116496e-06, "loss": 0.0031, "step": 164840 }, { "epoch": 1.3919908804965062, "grad_norm": 0.6755923628807068, "learning_rate": 2.5607394411703046e-06, "loss": 0.01, "step": 164850 }, { "epoch": 1.3920753203436702, "grad_norm": 0.14746910333633423, "learning_rate": 2.5600962241092005e-06, "loss": 0.0049, "step": 164860 }, { "epoch": 1.392159760190834, "grad_norm": 0.44916337728500366, "learning_rate": 2.559453060042306e-06, "loss": 0.0053, "step": 164870 }, { "epoch": 1.392244200037998, "grad_norm": 0.5161041617393494, "learning_rate": 2.558809948983592e-06, "loss": 0.0084, "step": 164880 }, { "epoch": 1.3923286398851618, "grad_norm": 0.3993936777114868, "learning_rate": 2.558166890947024e-06, "loss": 0.011, "step": 164890 }, { "epoch": 1.3924130797323258, "grad_norm": 0.8593581318855286, "learning_rate": 2.5575238859465714e-06, "loss": 0.0076, "step": 164900 }, { "epoch": 1.3924975195794895, "grad_norm": 0.21841219067573547, "learning_rate": 2.5568809339962003e-06, "loss": 0.0094, "step": 164910 }, { "epoch": 1.3925819594266535, "grad_norm": 0.2859419286251068, "learning_rate": 2.5562380351098727e-06, "loss": 0.005, "step": 164920 }, { "epoch": 1.3926663992738173, "grad_norm": 0.31519371271133423, "learning_rate": 2.5555951893015555e-06, "loss": 0.0069, "step": 164930 }, { "epoch": 1.392750839120981, "grad_norm": 0.18781477212905884, "learning_rate": 2.5549523965852106e-06, "loss": 0.0071, "step": 164940 }, { "epoch": 1.392835278968145, "grad_norm": 0.010378972627222538, "learning_rate": 2.5543096569747983e-06, "loss": 0.0047, "step": 164950 }, { "epoch": 1.392919718815309, "grad_norm": 0.08593814074993134, "learning_rate": 2.5536669704842776e-06, "loss": 0.0056, "step": 164960 }, { "epoch": 1.3930041586624728, "grad_norm": 0.2957225441932678, "learning_rate": 2.55302433712761e-06, "loss": 0.0075, "step": 164970 }, { "epoch": 1.3930885985096366, "grad_norm": 0.3776872754096985, "learning_rate": 2.5523817569187514e-06, "loss": 0.0081, "step": 164980 }, { "epoch": 1.3931730383568006, "grad_norm": 0.02821510098874569, "learning_rate": 2.551739229871661e-06, "loss": 0.007, "step": 164990 }, { "epoch": 1.3932574782039644, "grad_norm": 0.6707074046134949, "learning_rate": 2.5510967560002906e-06, "loss": 0.0112, "step": 165000 }, { "epoch": 1.3933419180511284, "grad_norm": 0.22381561994552612, "learning_rate": 2.5504543353185996e-06, "loss": 0.006, "step": 165010 }, { "epoch": 1.3934263578982922, "grad_norm": 0.2779165506362915, "learning_rate": 2.5498119678405375e-06, "loss": 0.0061, "step": 165020 }, { "epoch": 1.393510797745456, "grad_norm": 0.13209542632102966, "learning_rate": 2.549169653580058e-06, "loss": 0.0058, "step": 165030 }, { "epoch": 1.39359523759262, "grad_norm": 0.23642361164093018, "learning_rate": 2.548527392551111e-06, "loss": 0.0057, "step": 165040 }, { "epoch": 1.393679677439784, "grad_norm": 0.13585412502288818, "learning_rate": 2.547885184767645e-06, "loss": 0.0062, "step": 165050 }, { "epoch": 1.3937641172869477, "grad_norm": 0.1621512621641159, "learning_rate": 2.5472430302436126e-06, "loss": 0.0057, "step": 165060 }, { "epoch": 1.3938485571341115, "grad_norm": 0.14842809736728668, "learning_rate": 2.5466009289929563e-06, "loss": 0.0054, "step": 165070 }, { "epoch": 1.3939329969812755, "grad_norm": 0.08669279515743256, "learning_rate": 2.5459588810296276e-06, "loss": 0.0137, "step": 165080 }, { "epoch": 1.3940174368284393, "grad_norm": 0.3477996289730072, "learning_rate": 2.545316886367567e-06, "loss": 0.0078, "step": 165090 }, { "epoch": 1.3941018766756033, "grad_norm": 0.2119264155626297, "learning_rate": 2.544674945020722e-06, "loss": 0.0045, "step": 165100 }, { "epoch": 1.394186316522767, "grad_norm": 0.2458236962556839, "learning_rate": 2.5440330570030346e-06, "loss": 0.0053, "step": 165110 }, { "epoch": 1.394270756369931, "grad_norm": 0.1928224116563797, "learning_rate": 2.5433912223284457e-06, "loss": 0.0073, "step": 165120 }, { "epoch": 1.3943551962170948, "grad_norm": 0.27045488357543945, "learning_rate": 2.5427494410108944e-06, "loss": 0.0177, "step": 165130 }, { "epoch": 1.3944396360642588, "grad_norm": 0.26669618487358093, "learning_rate": 2.5421077130643234e-06, "loss": 0.0102, "step": 165140 }, { "epoch": 1.3945240759114226, "grad_norm": 0.0589941069483757, "learning_rate": 2.541466038502669e-06, "loss": 0.0046, "step": 165150 }, { "epoch": 1.3946085157585864, "grad_norm": 0.027885956689715385, "learning_rate": 2.540824417339867e-06, "loss": 0.0077, "step": 165160 }, { "epoch": 1.3946929556057504, "grad_norm": 0.997347354888916, "learning_rate": 2.5401828495898566e-06, "loss": 0.01, "step": 165170 }, { "epoch": 1.3947773954529143, "grad_norm": 0.1609015315771103, "learning_rate": 2.5395413352665686e-06, "loss": 0.0111, "step": 165180 }, { "epoch": 1.3948618353000781, "grad_norm": 0.24943120777606964, "learning_rate": 2.5388998743839407e-06, "loss": 0.0038, "step": 165190 }, { "epoch": 1.394946275147242, "grad_norm": 0.26726141571998596, "learning_rate": 2.5382584669559027e-06, "loss": 0.0101, "step": 165200 }, { "epoch": 1.395030714994406, "grad_norm": 0.15435226261615753, "learning_rate": 2.5376171129963868e-06, "loss": 0.0058, "step": 165210 }, { "epoch": 1.3951151548415697, "grad_norm": 0.27542778849601746, "learning_rate": 2.5369758125193205e-06, "loss": 0.0047, "step": 165220 }, { "epoch": 1.3951995946887337, "grad_norm": 0.35701897740364075, "learning_rate": 2.536334565538637e-06, "loss": 0.0106, "step": 165230 }, { "epoch": 1.3952840345358974, "grad_norm": 0.16075529158115387, "learning_rate": 2.5356933720682597e-06, "loss": 0.0015, "step": 165240 }, { "epoch": 1.3953684743830614, "grad_norm": 0.4917873442173004, "learning_rate": 2.5350522321221193e-06, "loss": 0.0085, "step": 165250 }, { "epoch": 1.3954529142302252, "grad_norm": 0.17452839016914368, "learning_rate": 2.534411145714139e-06, "loss": 0.0081, "step": 165260 }, { "epoch": 1.3955373540773892, "grad_norm": 0.1610478013753891, "learning_rate": 2.5337701128582436e-06, "loss": 0.0083, "step": 165270 }, { "epoch": 1.395621793924553, "grad_norm": 0.29303938150405884, "learning_rate": 2.533129133568356e-06, "loss": 0.0063, "step": 165280 }, { "epoch": 1.3957062337717168, "grad_norm": 0.37006404995918274, "learning_rate": 2.5324882078583967e-06, "loss": 0.0052, "step": 165290 }, { "epoch": 1.3957906736188808, "grad_norm": 0.18237197399139404, "learning_rate": 2.531847335742289e-06, "loss": 0.0031, "step": 165300 }, { "epoch": 1.3958751134660448, "grad_norm": 0.3621833026409149, "learning_rate": 2.53120651723395e-06, "loss": 0.0076, "step": 165310 }, { "epoch": 1.3959595533132085, "grad_norm": 0.15193313360214233, "learning_rate": 2.5305657523473015e-06, "loss": 0.0078, "step": 165320 }, { "epoch": 1.3960439931603723, "grad_norm": 0.3195211887359619, "learning_rate": 2.5299250410962566e-06, "loss": 0.0078, "step": 165330 }, { "epoch": 1.3961284330075363, "grad_norm": 0.2812671959400177, "learning_rate": 2.5292843834947357e-06, "loss": 0.0125, "step": 165340 }, { "epoch": 1.3962128728547, "grad_norm": 0.07228879630565643, "learning_rate": 2.528643779556652e-06, "loss": 0.0043, "step": 165350 }, { "epoch": 1.396297312701864, "grad_norm": 0.3127902150154114, "learning_rate": 2.5280032292959185e-06, "loss": 0.0113, "step": 165360 }, { "epoch": 1.3963817525490279, "grad_norm": 0.12781043350696564, "learning_rate": 2.527362732726446e-06, "loss": 0.0085, "step": 165370 }, { "epoch": 1.3964661923961916, "grad_norm": 0.17360413074493408, "learning_rate": 2.5267222898621514e-06, "loss": 0.0059, "step": 165380 }, { "epoch": 1.3965506322433556, "grad_norm": 0.6404470801353455, "learning_rate": 2.526081900716941e-06, "loss": 0.006, "step": 165390 }, { "epoch": 1.3966350720905196, "grad_norm": 0.5110870003700256, "learning_rate": 2.5254415653047225e-06, "loss": 0.0076, "step": 165400 }, { "epoch": 1.3967195119376834, "grad_norm": 0.09483002126216888, "learning_rate": 2.5248012836394087e-06, "loss": 0.003, "step": 165410 }, { "epoch": 1.3968039517848472, "grad_norm": 0.4294879734516144, "learning_rate": 2.5241610557349016e-06, "loss": 0.007, "step": 165420 }, { "epoch": 1.3968883916320112, "grad_norm": 0.018304962664842606, "learning_rate": 2.5235208816051113e-06, "loss": 0.0044, "step": 165430 }, { "epoch": 1.396972831479175, "grad_norm": 0.0771411582827568, "learning_rate": 2.5228807612639393e-06, "loss": 0.005, "step": 165440 }, { "epoch": 1.397057271326339, "grad_norm": 0.16981154680252075, "learning_rate": 2.5222406947252904e-06, "loss": 0.0063, "step": 165450 }, { "epoch": 1.3971417111735027, "grad_norm": 0.18295030295848846, "learning_rate": 2.5216006820030636e-06, "loss": 0.0104, "step": 165460 }, { "epoch": 1.3972261510206667, "grad_norm": 0.10535571724176407, "learning_rate": 2.5209607231111638e-06, "loss": 0.0047, "step": 165470 }, { "epoch": 1.3973105908678305, "grad_norm": 0.27820977568626404, "learning_rate": 2.5203208180634874e-06, "loss": 0.0073, "step": 165480 }, { "epoch": 1.3973950307149945, "grad_norm": 0.3396266996860504, "learning_rate": 2.5196809668739364e-06, "loss": 0.006, "step": 165490 }, { "epoch": 1.3974794705621583, "grad_norm": 0.5179864764213562, "learning_rate": 2.5190411695564067e-06, "loss": 0.0098, "step": 165500 }, { "epoch": 1.397563910409322, "grad_norm": 0.24073582887649536, "learning_rate": 2.5184014261247923e-06, "loss": 0.0043, "step": 165510 }, { "epoch": 1.397648350256486, "grad_norm": 0.24665187299251556, "learning_rate": 2.5177617365929946e-06, "loss": 0.0051, "step": 165520 }, { "epoch": 1.39773279010365, "grad_norm": 0.4571979343891144, "learning_rate": 2.517122100974899e-06, "loss": 0.0092, "step": 165530 }, { "epoch": 1.3978172299508138, "grad_norm": 0.15952490270137787, "learning_rate": 2.516482519284405e-06, "loss": 0.0092, "step": 165540 }, { "epoch": 1.3979016697979776, "grad_norm": 0.3002283573150635, "learning_rate": 2.5158429915354e-06, "loss": 0.0061, "step": 165550 }, { "epoch": 1.3979861096451416, "grad_norm": 0.24102768301963806, "learning_rate": 2.5152035177417777e-06, "loss": 0.0062, "step": 165560 }, { "epoch": 1.3980705494923054, "grad_norm": 0.4045323133468628, "learning_rate": 2.5145640979174236e-06, "loss": 0.0067, "step": 165570 }, { "epoch": 1.3981549893394694, "grad_norm": 0.10008561611175537, "learning_rate": 2.5139247320762305e-06, "loss": 0.0067, "step": 165580 }, { "epoch": 1.3982394291866331, "grad_norm": 0.11878806352615356, "learning_rate": 2.513285420232082e-06, "loss": 0.0055, "step": 165590 }, { "epoch": 1.3983238690337971, "grad_norm": 0.14579060673713684, "learning_rate": 2.512646162398865e-06, "loss": 0.0085, "step": 165600 }, { "epoch": 1.398408308880961, "grad_norm": 0.10693977028131485, "learning_rate": 2.5120069585904615e-06, "loss": 0.007, "step": 165610 }, { "epoch": 1.398492748728125, "grad_norm": 0.07112899422645569, "learning_rate": 2.511367808820759e-06, "loss": 0.0079, "step": 165620 }, { "epoch": 1.3985771885752887, "grad_norm": 0.22707326710224152, "learning_rate": 2.5107287131036385e-06, "loss": 0.0061, "step": 165630 }, { "epoch": 1.3986616284224525, "grad_norm": 0.35309356451034546, "learning_rate": 2.5100896714529778e-06, "loss": 0.0076, "step": 165640 }, { "epoch": 1.3987460682696164, "grad_norm": 0.23319509625434875, "learning_rate": 2.5094506838826615e-06, "loss": 0.0061, "step": 165650 }, { "epoch": 1.3988305081167804, "grad_norm": 0.26232123374938965, "learning_rate": 2.5088117504065636e-06, "loss": 0.0115, "step": 165660 }, { "epoch": 1.3989149479639442, "grad_norm": 0.18514059484004974, "learning_rate": 2.5081728710385658e-06, "loss": 0.0066, "step": 165670 }, { "epoch": 1.398999387811108, "grad_norm": 0.07934039831161499, "learning_rate": 2.5075340457925428e-06, "loss": 0.0032, "step": 165680 }, { "epoch": 1.399083827658272, "grad_norm": 0.20423875749111176, "learning_rate": 2.5068952746823694e-06, "loss": 0.0079, "step": 165690 }, { "epoch": 1.3991682675054358, "grad_norm": 0.2807864546775818, "learning_rate": 2.5062565577219185e-06, "loss": 0.0053, "step": 165700 }, { "epoch": 1.3992527073525998, "grad_norm": 0.27842792868614197, "learning_rate": 2.5056178949250655e-06, "loss": 0.0057, "step": 165710 }, { "epoch": 1.3993371471997635, "grad_norm": 0.0031481387559324503, "learning_rate": 2.5049792863056787e-06, "loss": 0.0059, "step": 165720 }, { "epoch": 1.3994215870469273, "grad_norm": 0.414605051279068, "learning_rate": 2.5043407318776324e-06, "loss": 0.0128, "step": 165730 }, { "epoch": 1.3995060268940913, "grad_norm": 0.12825345993041992, "learning_rate": 2.503702231654794e-06, "loss": 0.0093, "step": 165740 }, { "epoch": 1.3995904667412553, "grad_norm": 0.196262389421463, "learning_rate": 2.50306378565103e-06, "loss": 0.0041, "step": 165750 }, { "epoch": 1.399674906588419, "grad_norm": 0.03141217678785324, "learning_rate": 2.502425393880211e-06, "loss": 0.0065, "step": 165760 }, { "epoch": 1.3997593464355829, "grad_norm": 0.2464059442281723, "learning_rate": 2.5017870563562e-06, "loss": 0.0041, "step": 165770 }, { "epoch": 1.3998437862827469, "grad_norm": 0.3271022140979767, "learning_rate": 2.501148773092863e-06, "loss": 0.0119, "step": 165780 }, { "epoch": 1.3999282261299106, "grad_norm": 0.04034792259335518, "learning_rate": 2.50051054410406e-06, "loss": 0.0058, "step": 165790 }, { "epoch": 1.4000126659770746, "grad_norm": 0.0997065007686615, "learning_rate": 2.4998723694036574e-06, "loss": 0.0066, "step": 165800 }, { "epoch": 1.4000971058242384, "grad_norm": 0.3844453990459442, "learning_rate": 2.499234249005513e-06, "loss": 0.0045, "step": 165810 }, { "epoch": 1.4001815456714024, "grad_norm": 0.27379000186920166, "learning_rate": 2.4985961829234904e-06, "loss": 0.0061, "step": 165820 }, { "epoch": 1.4002659855185662, "grad_norm": 0.33747774362564087, "learning_rate": 2.4979581711714446e-06, "loss": 0.0062, "step": 165830 }, { "epoch": 1.4003504253657302, "grad_norm": 0.2575758993625641, "learning_rate": 2.497320213763238e-06, "loss": 0.0085, "step": 165840 }, { "epoch": 1.400434865212894, "grad_norm": 0.04759834334254265, "learning_rate": 2.4966823107127195e-06, "loss": 0.0113, "step": 165850 }, { "epoch": 1.4005193050600577, "grad_norm": 0.20660413801670074, "learning_rate": 2.4960444620337505e-06, "loss": 0.0054, "step": 165860 }, { "epoch": 1.4006037449072217, "grad_norm": 0.1167060136795044, "learning_rate": 2.4954066677401823e-06, "loss": 0.0075, "step": 165870 }, { "epoch": 1.4006881847543857, "grad_norm": 0.5896837115287781, "learning_rate": 2.4947689278458664e-06, "loss": 0.0061, "step": 165880 }, { "epoch": 1.4007726246015495, "grad_norm": 0.10780124366283417, "learning_rate": 2.494131242364658e-06, "loss": 0.0053, "step": 165890 }, { "epoch": 1.4008570644487133, "grad_norm": 0.13363778591156006, "learning_rate": 2.493493611310403e-06, "loss": 0.0221, "step": 165900 }, { "epoch": 1.4009415042958773, "grad_norm": 0.10349719971418381, "learning_rate": 2.4928560346969555e-06, "loss": 0.0067, "step": 165910 }, { "epoch": 1.401025944143041, "grad_norm": 0.2561958432197571, "learning_rate": 2.4922185125381614e-06, "loss": 0.0092, "step": 165920 }, { "epoch": 1.401110383990205, "grad_norm": 0.001107059302739799, "learning_rate": 2.491581044847867e-06, "loss": 0.0049, "step": 165930 }, { "epoch": 1.4011948238373688, "grad_norm": 0.2252565622329712, "learning_rate": 2.4909436316399166e-06, "loss": 0.0034, "step": 165940 }, { "epoch": 1.4012792636845326, "grad_norm": 0.2124433070421219, "learning_rate": 2.490306272928158e-06, "loss": 0.009, "step": 165950 }, { "epoch": 1.4013637035316966, "grad_norm": 0.44999799132347107, "learning_rate": 2.489668968726431e-06, "loss": 0.0132, "step": 165960 }, { "epoch": 1.4014481433788606, "grad_norm": 1.6796725988388062, "learning_rate": 2.4890317190485813e-06, "loss": 0.0069, "step": 165970 }, { "epoch": 1.4015325832260244, "grad_norm": 0.1577790230512619, "learning_rate": 2.488394523908449e-06, "loss": 0.0068, "step": 165980 }, { "epoch": 1.4016170230731881, "grad_norm": 0.4011788070201874, "learning_rate": 2.48775738331987e-06, "loss": 0.0056, "step": 165990 }, { "epoch": 1.4017014629203521, "grad_norm": 0.3810969293117523, "learning_rate": 2.4871202972966887e-06, "loss": 0.0106, "step": 166000 }, { "epoch": 1.401785902767516, "grad_norm": 0.04335613548755646, "learning_rate": 2.486483265852739e-06, "loss": 0.0072, "step": 166010 }, { "epoch": 1.40187034261468, "grad_norm": 0.09904097765684128, "learning_rate": 2.4858462890018576e-06, "loss": 0.0116, "step": 166020 }, { "epoch": 1.4019547824618437, "grad_norm": 0.09055778384208679, "learning_rate": 2.4852093667578775e-06, "loss": 0.0086, "step": 166030 }, { "epoch": 1.4020392223090077, "grad_norm": 0.0849636122584343, "learning_rate": 2.484572499134637e-06, "loss": 0.0059, "step": 166040 }, { "epoch": 1.4021236621561715, "grad_norm": 0.3749137222766876, "learning_rate": 2.4839356861459646e-06, "loss": 0.0066, "step": 166050 }, { "epoch": 1.4022081020033355, "grad_norm": 0.029756102710962296, "learning_rate": 2.4832989278056952e-06, "loss": 0.005, "step": 166060 }, { "epoch": 1.4022925418504992, "grad_norm": 0.16033412516117096, "learning_rate": 2.4826622241276556e-06, "loss": 0.0057, "step": 166070 }, { "epoch": 1.402376981697663, "grad_norm": 0.009889145381748676, "learning_rate": 2.482025575125678e-06, "loss": 0.0052, "step": 166080 }, { "epoch": 1.402461421544827, "grad_norm": 0.015294672921299934, "learning_rate": 2.4813889808135895e-06, "loss": 0.0035, "step": 166090 }, { "epoch": 1.402545861391991, "grad_norm": 0.7926065325737, "learning_rate": 2.480752441205216e-06, "loss": 0.011, "step": 166100 }, { "epoch": 1.4026303012391548, "grad_norm": 0.16157488524913788, "learning_rate": 2.4801159563143827e-06, "loss": 0.0083, "step": 166110 }, { "epoch": 1.4027147410863186, "grad_norm": 0.3599126636981964, "learning_rate": 2.4794795261549132e-06, "loss": 0.0083, "step": 166120 }, { "epoch": 1.4027991809334825, "grad_norm": 0.2870270609855652, "learning_rate": 2.478843150740634e-06, "loss": 0.0071, "step": 166130 }, { "epoch": 1.4028836207806463, "grad_norm": 0.2954919934272766, "learning_rate": 2.478206830085363e-06, "loss": 0.0054, "step": 166140 }, { "epoch": 1.4029680606278103, "grad_norm": 0.09059486538171768, "learning_rate": 2.4775705642029243e-06, "loss": 0.0079, "step": 166150 }, { "epoch": 1.403052500474974, "grad_norm": 0.2760851979255676, "learning_rate": 2.476934353107136e-06, "loss": 0.0094, "step": 166160 }, { "epoch": 1.403136940322138, "grad_norm": 0.8115183115005493, "learning_rate": 2.4762981968118173e-06, "loss": 0.0108, "step": 166170 }, { "epoch": 1.4032213801693019, "grad_norm": 0.1138012707233429, "learning_rate": 2.475662095330782e-06, "loss": 0.008, "step": 166180 }, { "epoch": 1.4033058200164659, "grad_norm": 0.02733587846159935, "learning_rate": 2.4750260486778505e-06, "loss": 0.005, "step": 166190 }, { "epoch": 1.4033902598636296, "grad_norm": 0.1578947901725769, "learning_rate": 2.4743900568668344e-06, "loss": 0.0094, "step": 166200 }, { "epoch": 1.4034746997107934, "grad_norm": 0.2698337137699127, "learning_rate": 2.4737541199115506e-06, "loss": 0.0075, "step": 166210 }, { "epoch": 1.4035591395579574, "grad_norm": 0.025542160496115685, "learning_rate": 2.473118237825809e-06, "loss": 0.0069, "step": 166220 }, { "epoch": 1.4036435794051214, "grad_norm": 0.12472471594810486, "learning_rate": 2.47248241062342e-06, "loss": 0.0095, "step": 166230 }, { "epoch": 1.4037280192522852, "grad_norm": 0.4626404643058777, "learning_rate": 2.471846638318197e-06, "loss": 0.0068, "step": 166240 }, { "epoch": 1.403812459099449, "grad_norm": 0.16938596963882446, "learning_rate": 2.471210920923947e-06, "loss": 0.0045, "step": 166250 }, { "epoch": 1.403896898946613, "grad_norm": 0.19478380680084229, "learning_rate": 2.470575258454478e-06, "loss": 0.0081, "step": 166260 }, { "epoch": 1.4039813387937767, "grad_norm": 0.17824748158454895, "learning_rate": 2.4699396509235936e-06, "loss": 0.0055, "step": 166270 }, { "epoch": 1.4040657786409407, "grad_norm": 0.4787488281726837, "learning_rate": 2.4693040983451033e-06, "loss": 0.0072, "step": 166280 }, { "epoch": 1.4041502184881045, "grad_norm": 0.1361071765422821, "learning_rate": 2.4686686007328082e-06, "loss": 0.0041, "step": 166290 }, { "epoch": 1.4042346583352683, "grad_norm": 0.4947792589664459, "learning_rate": 2.4680331581005133e-06, "loss": 0.0042, "step": 166300 }, { "epoch": 1.4043190981824323, "grad_norm": 0.4293108284473419, "learning_rate": 2.4673977704620184e-06, "loss": 0.0067, "step": 166310 }, { "epoch": 1.4044035380295963, "grad_norm": 0.36128664016723633, "learning_rate": 2.466762437831126e-06, "loss": 0.0034, "step": 166320 }, { "epoch": 1.40448797787676, "grad_norm": 0.3088203966617584, "learning_rate": 2.4661271602216353e-06, "loss": 0.0037, "step": 166330 }, { "epoch": 1.4045724177239238, "grad_norm": 0.13274267315864563, "learning_rate": 2.465491937647343e-06, "loss": 0.0043, "step": 166340 }, { "epoch": 1.4046568575710878, "grad_norm": 0.44459596276283264, "learning_rate": 2.464856770122046e-06, "loss": 0.0061, "step": 166350 }, { "epoch": 1.4047412974182516, "grad_norm": 0.27053025364875793, "learning_rate": 2.464221657659539e-06, "loss": 0.0058, "step": 166360 }, { "epoch": 1.4048257372654156, "grad_norm": 0.12692563235759735, "learning_rate": 2.4635866002736197e-06, "loss": 0.0065, "step": 166370 }, { "epoch": 1.4049101771125794, "grad_norm": 0.09542378783226013, "learning_rate": 2.462951597978078e-06, "loss": 0.0031, "step": 166380 }, { "epoch": 1.4049946169597434, "grad_norm": 0.12088783085346222, "learning_rate": 2.4623166507867096e-06, "loss": 0.0057, "step": 166390 }, { "epoch": 1.4050790568069071, "grad_norm": 0.29471954703330994, "learning_rate": 2.4616817587133015e-06, "loss": 0.0058, "step": 166400 }, { "epoch": 1.4051634966540711, "grad_norm": 0.44557076692581177, "learning_rate": 2.461046921771647e-06, "loss": 0.0055, "step": 166410 }, { "epoch": 1.405247936501235, "grad_norm": 0.23549491167068481, "learning_rate": 2.4604121399755336e-06, "loss": 0.0051, "step": 166420 }, { "epoch": 1.4053323763483987, "grad_norm": 0.26940974593162537, "learning_rate": 2.4597774133387482e-06, "loss": 0.0044, "step": 166430 }, { "epoch": 1.4054168161955627, "grad_norm": 0.20728762447834015, "learning_rate": 2.459142741875074e-06, "loss": 0.0043, "step": 166440 }, { "epoch": 1.4055012560427267, "grad_norm": 0.259297639131546, "learning_rate": 2.4585081255983017e-06, "loss": 0.0046, "step": 166450 }, { "epoch": 1.4055856958898905, "grad_norm": 0.4239519238471985, "learning_rate": 2.457873564522212e-06, "loss": 0.0075, "step": 166460 }, { "epoch": 1.4056701357370542, "grad_norm": 0.1653977632522583, "learning_rate": 2.457239058660585e-06, "loss": 0.0052, "step": 166470 }, { "epoch": 1.4057545755842182, "grad_norm": 0.21890988945960999, "learning_rate": 2.4566046080272072e-06, "loss": 0.0095, "step": 166480 }, { "epoch": 1.405839015431382, "grad_norm": 0.3051466643810272, "learning_rate": 2.455970212635855e-06, "loss": 0.0052, "step": 166490 }, { "epoch": 1.405923455278546, "grad_norm": 0.1394248753786087, "learning_rate": 2.4553358725003103e-06, "loss": 0.0043, "step": 166500 }, { "epoch": 1.4060078951257098, "grad_norm": 0.2878968417644501, "learning_rate": 2.4547015876343454e-06, "loss": 0.0078, "step": 166510 }, { "epoch": 1.4060923349728738, "grad_norm": 0.09948549419641495, "learning_rate": 2.454067358051743e-06, "loss": 0.0107, "step": 166520 }, { "epoch": 1.4061767748200376, "grad_norm": 0.2215363085269928, "learning_rate": 2.4534331837662737e-06, "loss": 0.0086, "step": 166530 }, { "epoch": 1.4062612146672016, "grad_norm": 0.10931272804737091, "learning_rate": 2.4527990647917167e-06, "loss": 0.0043, "step": 166540 }, { "epoch": 1.4063456545143653, "grad_norm": 0.2888430655002594, "learning_rate": 2.452165001141839e-06, "loss": 0.0037, "step": 166550 }, { "epoch": 1.406430094361529, "grad_norm": 0.2705272436141968, "learning_rate": 2.4515309928304175e-06, "loss": 0.0068, "step": 166560 }, { "epoch": 1.406514534208693, "grad_norm": 0.34179428219795227, "learning_rate": 2.4508970398712213e-06, "loss": 0.0051, "step": 166570 }, { "epoch": 1.4065989740558569, "grad_norm": 0.1104542687535286, "learning_rate": 2.4502631422780182e-06, "loss": 0.0068, "step": 166580 }, { "epoch": 1.4066834139030209, "grad_norm": 0.21903976798057556, "learning_rate": 2.4496293000645776e-06, "loss": 0.0049, "step": 166590 }, { "epoch": 1.4067678537501846, "grad_norm": 0.4456213414669037, "learning_rate": 2.448995513244664e-06, "loss": 0.0074, "step": 166600 }, { "epoch": 1.4068522935973486, "grad_norm": 0.04129182547330856, "learning_rate": 2.448361781832047e-06, "loss": 0.0078, "step": 166610 }, { "epoch": 1.4069367334445124, "grad_norm": 0.5556634068489075, "learning_rate": 2.447728105840487e-06, "loss": 0.0083, "step": 166620 }, { "epoch": 1.4070211732916764, "grad_norm": 0.06952359527349472, "learning_rate": 2.447094485283752e-06, "loss": 0.0067, "step": 166630 }, { "epoch": 1.4071056131388402, "grad_norm": 0.2981231212615967, "learning_rate": 2.4464609201755996e-06, "loss": 0.0074, "step": 166640 }, { "epoch": 1.407190052986004, "grad_norm": 0.3309464752674103, "learning_rate": 2.4458274105297946e-06, "loss": 0.0069, "step": 166650 }, { "epoch": 1.407274492833168, "grad_norm": 0.16166889667510986, "learning_rate": 2.4451939563600945e-06, "loss": 0.0052, "step": 166660 }, { "epoch": 1.407358932680332, "grad_norm": 0.017448225989937782, "learning_rate": 2.4445605576802583e-06, "loss": 0.0066, "step": 166670 }, { "epoch": 1.4074433725274957, "grad_norm": 0.436191588640213, "learning_rate": 2.4439272145040416e-06, "loss": 0.0075, "step": 166680 }, { "epoch": 1.4075278123746595, "grad_norm": 0.283149778842926, "learning_rate": 2.443293926845203e-06, "loss": 0.0109, "step": 166690 }, { "epoch": 1.4076122522218235, "grad_norm": 0.43194612860679626, "learning_rate": 2.4426606947174973e-06, "loss": 0.0088, "step": 166700 }, { "epoch": 1.4076966920689873, "grad_norm": 0.03680284321308136, "learning_rate": 2.4420275181346754e-06, "loss": 0.0025, "step": 166710 }, { "epoch": 1.4077811319161513, "grad_norm": 0.2283632606267929, "learning_rate": 2.4413943971104933e-06, "loss": 0.0073, "step": 166720 }, { "epoch": 1.407865571763315, "grad_norm": 0.3659746050834656, "learning_rate": 2.4407613316586986e-06, "loss": 0.0038, "step": 166730 }, { "epoch": 1.407950011610479, "grad_norm": 0.1513250172138214, "learning_rate": 2.440128321793047e-06, "loss": 0.0092, "step": 166740 }, { "epoch": 1.4080344514576428, "grad_norm": 0.051650531589984894, "learning_rate": 2.4394953675272796e-06, "loss": 0.0051, "step": 166750 }, { "epoch": 1.4081188913048068, "grad_norm": 0.24552612006664276, "learning_rate": 2.4388624688751505e-06, "loss": 0.0045, "step": 166760 }, { "epoch": 1.4082033311519706, "grad_norm": 0.2793222665786743, "learning_rate": 2.4382296258504024e-06, "loss": 0.0051, "step": 166770 }, { "epoch": 1.4082877709991344, "grad_norm": 0.19743043184280396, "learning_rate": 2.4375968384667833e-06, "loss": 0.0067, "step": 166780 }, { "epoch": 1.4083722108462984, "grad_norm": 0.16538935899734497, "learning_rate": 2.4369641067380335e-06, "loss": 0.0087, "step": 166790 }, { "epoch": 1.4084566506934624, "grad_norm": 0.1934712529182434, "learning_rate": 2.4363314306779e-06, "loss": 0.0098, "step": 166800 }, { "epoch": 1.4085410905406262, "grad_norm": 0.1908448189496994, "learning_rate": 2.4356988103001228e-06, "loss": 0.0052, "step": 166810 }, { "epoch": 1.40862553038779, "grad_norm": 0.08737313002347946, "learning_rate": 2.4350662456184417e-06, "loss": 0.0063, "step": 166820 }, { "epoch": 1.408709970234954, "grad_norm": 0.20176362991333008, "learning_rate": 2.4344337366465965e-06, "loss": 0.0025, "step": 166830 }, { "epoch": 1.4087944100821177, "grad_norm": 0.2855963706970215, "learning_rate": 2.4338012833983227e-06, "loss": 0.0057, "step": 166840 }, { "epoch": 1.4088788499292817, "grad_norm": 0.022472692653536797, "learning_rate": 2.433168885887361e-06, "loss": 0.0105, "step": 166850 }, { "epoch": 1.4089632897764455, "grad_norm": 0.29691192507743835, "learning_rate": 2.4325365441274433e-06, "loss": 0.0105, "step": 166860 }, { "epoch": 1.4090477296236092, "grad_norm": 0.05153842642903328, "learning_rate": 2.4319042581323078e-06, "loss": 0.0035, "step": 166870 }, { "epoch": 1.4091321694707732, "grad_norm": 0.10638204216957092, "learning_rate": 2.431272027915683e-06, "loss": 0.0059, "step": 166880 }, { "epoch": 1.4092166093179372, "grad_norm": 0.07148566842079163, "learning_rate": 2.4306398534913063e-06, "loss": 0.0059, "step": 166890 }, { "epoch": 1.409301049165101, "grad_norm": 0.09910137206315994, "learning_rate": 2.4300077348729046e-06, "loss": 0.01, "step": 166900 }, { "epoch": 1.4093854890122648, "grad_norm": 0.1643800437450409, "learning_rate": 2.4293756720742084e-06, "loss": 0.0067, "step": 166910 }, { "epoch": 1.4094699288594288, "grad_norm": 0.08660462498664856, "learning_rate": 2.4287436651089463e-06, "loss": 0.0064, "step": 166920 }, { "epoch": 1.4095543687065926, "grad_norm": 0.4232557415962219, "learning_rate": 2.4281117139908428e-06, "loss": 0.0082, "step": 166930 }, { "epoch": 1.4096388085537566, "grad_norm": 0.2932111620903015, "learning_rate": 2.4274798187336274e-06, "loss": 0.0074, "step": 166940 }, { "epoch": 1.4097232484009203, "grad_norm": 0.22408555448055267, "learning_rate": 2.4268479793510213e-06, "loss": 0.0112, "step": 166950 }, { "epoch": 1.4098076882480843, "grad_norm": 0.5053976774215698, "learning_rate": 2.426216195856752e-06, "loss": 0.0044, "step": 166960 }, { "epoch": 1.409892128095248, "grad_norm": 0.2930911183357239, "learning_rate": 2.425584468264538e-06, "loss": 0.0088, "step": 166970 }, { "epoch": 1.409976567942412, "grad_norm": 0.3324248492717743, "learning_rate": 2.424952796588103e-06, "loss": 0.0057, "step": 166980 }, { "epoch": 1.4100610077895759, "grad_norm": 0.29243355989456177, "learning_rate": 2.424321180841166e-06, "loss": 0.0086, "step": 166990 }, { "epoch": 1.4101454476367397, "grad_norm": 0.3205641508102417, "learning_rate": 2.423689621037445e-06, "loss": 0.0152, "step": 167000 }, { "epoch": 1.4102298874839037, "grad_norm": 0.08140672743320465, "learning_rate": 2.4230581171906554e-06, "loss": 0.004, "step": 167010 }, { "epoch": 1.4103143273310677, "grad_norm": 0.3060511648654938, "learning_rate": 2.4224266693145173e-06, "loss": 0.0069, "step": 167020 }, { "epoch": 1.4103987671782314, "grad_norm": 0.07545671612024307, "learning_rate": 2.4217952774227422e-06, "loss": 0.0054, "step": 167030 }, { "epoch": 1.4104832070253952, "grad_norm": 0.27691739797592163, "learning_rate": 2.421163941529047e-06, "loss": 0.0077, "step": 167040 }, { "epoch": 1.4105676468725592, "grad_norm": 0.23826070129871368, "learning_rate": 2.4205326616471426e-06, "loss": 0.0042, "step": 167050 }, { "epoch": 1.410652086719723, "grad_norm": 0.1646145135164261, "learning_rate": 2.4199014377907403e-06, "loss": 0.0048, "step": 167060 }, { "epoch": 1.410736526566887, "grad_norm": 0.2191794067621231, "learning_rate": 2.41927026997355e-06, "loss": 0.0054, "step": 167070 }, { "epoch": 1.4108209664140507, "grad_norm": 0.3517739176750183, "learning_rate": 2.418639158209278e-06, "loss": 0.0057, "step": 167080 }, { "epoch": 1.4109054062612147, "grad_norm": 0.42334553599357605, "learning_rate": 2.4180081025116375e-06, "loss": 0.0097, "step": 167090 }, { "epoch": 1.4109898461083785, "grad_norm": 0.6190963387489319, "learning_rate": 2.41737710289433e-06, "loss": 0.0083, "step": 167100 }, { "epoch": 1.4110742859555425, "grad_norm": 0.7079238295555115, "learning_rate": 2.416746159371064e-06, "loss": 0.01, "step": 167110 }, { "epoch": 1.4111587258027063, "grad_norm": 0.10219312459230423, "learning_rate": 2.4161152719555405e-06, "loss": 0.0053, "step": 167120 }, { "epoch": 1.41124316564987, "grad_norm": 0.14343224465847015, "learning_rate": 2.4154844406614653e-06, "loss": 0.0076, "step": 167130 }, { "epoch": 1.411327605497034, "grad_norm": 0.26009401679039, "learning_rate": 2.4148536655025386e-06, "loss": 0.0064, "step": 167140 }, { "epoch": 1.411412045344198, "grad_norm": 0.1266721785068512, "learning_rate": 2.4142229464924604e-06, "loss": 0.0043, "step": 167150 }, { "epoch": 1.4114964851913618, "grad_norm": 0.43070656061172485, "learning_rate": 2.4135922836449297e-06, "loss": 0.0105, "step": 167160 }, { "epoch": 1.4115809250385256, "grad_norm": 0.3891140818595886, "learning_rate": 2.412961676973643e-06, "loss": 0.009, "step": 167170 }, { "epoch": 1.4116653648856896, "grad_norm": 0.14307788014411926, "learning_rate": 2.4123311264923004e-06, "loss": 0.0098, "step": 167180 }, { "epoch": 1.4117498047328534, "grad_norm": 0.0915478840470314, "learning_rate": 2.411700632214594e-06, "loss": 0.0055, "step": 167190 }, { "epoch": 1.4118342445800174, "grad_norm": 0.15959948301315308, "learning_rate": 2.4110701941542207e-06, "loss": 0.0057, "step": 167200 }, { "epoch": 1.4119186844271812, "grad_norm": 0.3610459268093109, "learning_rate": 2.4104398123248713e-06, "loss": 0.0056, "step": 167210 }, { "epoch": 1.412003124274345, "grad_norm": 0.12735392153263092, "learning_rate": 2.4098094867402396e-06, "loss": 0.0058, "step": 167220 }, { "epoch": 1.412087564121509, "grad_norm": 0.08430697023868561, "learning_rate": 2.4091792174140154e-06, "loss": 0.0045, "step": 167230 }, { "epoch": 1.412172003968673, "grad_norm": 0.738973081111908, "learning_rate": 2.4085490043598876e-06, "loss": 0.0085, "step": 167240 }, { "epoch": 1.4122564438158367, "grad_norm": 0.3389345407485962, "learning_rate": 2.407918847591542e-06, "loss": 0.0058, "step": 167250 }, { "epoch": 1.4123408836630005, "grad_norm": 0.16864275932312012, "learning_rate": 2.4072887471226704e-06, "loss": 0.0071, "step": 167260 }, { "epoch": 1.4124253235101645, "grad_norm": 0.17970235645771027, "learning_rate": 2.4066587029669543e-06, "loss": 0.0122, "step": 167270 }, { "epoch": 1.4125097633573283, "grad_norm": 0.31612488627433777, "learning_rate": 2.4060287151380808e-06, "loss": 0.0085, "step": 167280 }, { "epoch": 1.4125942032044922, "grad_norm": 0.2459896206855774, "learning_rate": 2.405398783649732e-06, "loss": 0.004, "step": 167290 }, { "epoch": 1.412678643051656, "grad_norm": 0.43220433592796326, "learning_rate": 2.404768908515589e-06, "loss": 0.0113, "step": 167300 }, { "epoch": 1.41276308289882, "grad_norm": 0.5846384763717651, "learning_rate": 2.404139089749335e-06, "loss": 0.0057, "step": 167310 }, { "epoch": 1.4128475227459838, "grad_norm": 0.18417537212371826, "learning_rate": 2.4035093273646477e-06, "loss": 0.0046, "step": 167320 }, { "epoch": 1.4129319625931478, "grad_norm": 0.301951140165329, "learning_rate": 2.402879621375206e-06, "loss": 0.005, "step": 167330 }, { "epoch": 1.4130164024403116, "grad_norm": 0.044964101165533066, "learning_rate": 2.4022499717946844e-06, "loss": 0.0057, "step": 167340 }, { "epoch": 1.4131008422874753, "grad_norm": 0.17536497116088867, "learning_rate": 2.401620378636764e-06, "loss": 0.0078, "step": 167350 }, { "epoch": 1.4131852821346393, "grad_norm": 0.12941785156726837, "learning_rate": 2.4009908419151133e-06, "loss": 0.0043, "step": 167360 }, { "epoch": 1.4132697219818033, "grad_norm": 0.25288939476013184, "learning_rate": 2.400361361643412e-06, "loss": 0.0047, "step": 167370 }, { "epoch": 1.4133541618289671, "grad_norm": 0.37035924196243286, "learning_rate": 2.3997319378353284e-06, "loss": 0.0098, "step": 167380 }, { "epoch": 1.413438601676131, "grad_norm": 0.5872565507888794, "learning_rate": 2.3991025705045346e-06, "loss": 0.0075, "step": 167390 }, { "epoch": 1.4135230415232949, "grad_norm": 0.41012445092201233, "learning_rate": 2.3984732596647e-06, "loss": 0.0051, "step": 167400 }, { "epoch": 1.4136074813704587, "grad_norm": 0.3458746671676636, "learning_rate": 2.397844005329491e-06, "loss": 0.0121, "step": 167410 }, { "epoch": 1.4136919212176227, "grad_norm": 0.010364408604800701, "learning_rate": 2.397214807512579e-06, "loss": 0.0034, "step": 167420 }, { "epoch": 1.4137763610647864, "grad_norm": 0.41699957847595215, "learning_rate": 2.3965856662276265e-06, "loss": 0.0056, "step": 167430 }, { "epoch": 1.4138608009119502, "grad_norm": 0.8701325058937073, "learning_rate": 2.3959565814883013e-06, "loss": 0.0104, "step": 167440 }, { "epoch": 1.4139452407591142, "grad_norm": 0.35436615347862244, "learning_rate": 2.3953275533082637e-06, "loss": 0.0045, "step": 167450 }, { "epoch": 1.4140296806062782, "grad_norm": 0.1869092583656311, "learning_rate": 2.3946985817011798e-06, "loss": 0.008, "step": 167460 }, { "epoch": 1.414114120453442, "grad_norm": 0.15382005274295807, "learning_rate": 2.394069666680709e-06, "loss": 0.0045, "step": 167470 }, { "epoch": 1.4141985603006058, "grad_norm": 0.5289243459701538, "learning_rate": 2.3934408082605105e-06, "loss": 0.0099, "step": 167480 }, { "epoch": 1.4142830001477698, "grad_norm": 0.17528752982616425, "learning_rate": 2.392812006454242e-06, "loss": 0.0067, "step": 167490 }, { "epoch": 1.4143674399949335, "grad_norm": 0.35636910796165466, "learning_rate": 2.3921832612755643e-06, "loss": 0.007, "step": 167500 }, { "epoch": 1.4144518798420975, "grad_norm": 0.31708061695098877, "learning_rate": 2.3915545727381317e-06, "loss": 0.0059, "step": 167510 }, { "epoch": 1.4145363196892613, "grad_norm": 0.1657673716545105, "learning_rate": 2.3909259408555972e-06, "loss": 0.0072, "step": 167520 }, { "epoch": 1.4146207595364253, "grad_norm": 0.37841707468032837, "learning_rate": 2.390297365641619e-06, "loss": 0.0043, "step": 167530 }, { "epoch": 1.414705199383589, "grad_norm": 0.3483579456806183, "learning_rate": 2.3896688471098457e-06, "loss": 0.0054, "step": 167540 }, { "epoch": 1.414789639230753, "grad_norm": 0.46194443106651306, "learning_rate": 2.3890403852739315e-06, "loss": 0.0082, "step": 167550 }, { "epoch": 1.4148740790779168, "grad_norm": 0.07417551428079605, "learning_rate": 2.388411980147525e-06, "loss": 0.0108, "step": 167560 }, { "epoch": 1.4149585189250806, "grad_norm": 0.5368977785110474, "learning_rate": 2.387783631744275e-06, "loss": 0.0108, "step": 167570 }, { "epoch": 1.4150429587722446, "grad_norm": 0.0988895520567894, "learning_rate": 2.3871553400778275e-06, "loss": 0.0077, "step": 167580 }, { "epoch": 1.4151273986194086, "grad_norm": 0.03100990317761898, "learning_rate": 2.3865271051618323e-06, "loss": 0.0027, "step": 167590 }, { "epoch": 1.4152118384665724, "grad_norm": 0.33355072140693665, "learning_rate": 2.385898927009931e-06, "loss": 0.0042, "step": 167600 }, { "epoch": 1.4152962783137362, "grad_norm": 1.0906962156295776, "learning_rate": 2.3852708056357716e-06, "loss": 0.0155, "step": 167610 }, { "epoch": 1.4153807181609002, "grad_norm": 0.48277997970581055, "learning_rate": 2.384642741052992e-06, "loss": 0.0083, "step": 167620 }, { "epoch": 1.415465158008064, "grad_norm": 0.5567965507507324, "learning_rate": 2.384014733275238e-06, "loss": 0.0078, "step": 167630 }, { "epoch": 1.415549597855228, "grad_norm": 0.16989681124687195, "learning_rate": 2.3833867823161504e-06, "loss": 0.0049, "step": 167640 }, { "epoch": 1.4156340377023917, "grad_norm": 0.14910919964313507, "learning_rate": 2.382758888189362e-06, "loss": 0.0083, "step": 167650 }, { "epoch": 1.4157184775495557, "grad_norm": 0.0811070129275322, "learning_rate": 2.382131050908516e-06, "loss": 0.0063, "step": 167660 }, { "epoch": 1.4158029173967195, "grad_norm": 0.3922193944454193, "learning_rate": 2.381503270487245e-06, "loss": 0.0039, "step": 167670 }, { "epoch": 1.4158873572438835, "grad_norm": 0.1118040680885315, "learning_rate": 2.380875546939188e-06, "loss": 0.0067, "step": 167680 }, { "epoch": 1.4159717970910473, "grad_norm": 0.21304717659950256, "learning_rate": 2.380247880277976e-06, "loss": 0.0054, "step": 167690 }, { "epoch": 1.416056236938211, "grad_norm": 0.16056089103221893, "learning_rate": 2.379620270517245e-06, "loss": 0.0079, "step": 167700 }, { "epoch": 1.416140676785375, "grad_norm": 0.20085124671459198, "learning_rate": 2.3789927176706245e-06, "loss": 0.0059, "step": 167710 }, { "epoch": 1.416225116632539, "grad_norm": 0.22750023007392883, "learning_rate": 2.378365221751745e-06, "loss": 0.0085, "step": 167720 }, { "epoch": 1.4163095564797028, "grad_norm": 0.30321791768074036, "learning_rate": 2.3777377827742344e-06, "loss": 0.0062, "step": 167730 }, { "epoch": 1.4163939963268666, "grad_norm": 0.09086134284734726, "learning_rate": 2.3771104007517232e-06, "loss": 0.0065, "step": 167740 }, { "epoch": 1.4164784361740306, "grad_norm": 0.26896172761917114, "learning_rate": 2.3764830756978367e-06, "loss": 0.0069, "step": 167750 }, { "epoch": 1.4165628760211944, "grad_norm": 0.2963431179523468, "learning_rate": 2.375855807626198e-06, "loss": 0.0047, "step": 167760 }, { "epoch": 1.4166473158683583, "grad_norm": 0.2626575231552124, "learning_rate": 2.3752285965504364e-06, "loss": 0.004, "step": 167770 }, { "epoch": 1.4167317557155221, "grad_norm": 0.12211031466722488, "learning_rate": 2.374601442484169e-06, "loss": 0.0067, "step": 167780 }, { "epoch": 1.416816195562686, "grad_norm": 0.577060878276825, "learning_rate": 2.3739743454410217e-06, "loss": 0.008, "step": 167790 }, { "epoch": 1.41690063540985, "grad_norm": 0.3685930073261261, "learning_rate": 2.3733473054346145e-06, "loss": 0.0057, "step": 167800 }, { "epoch": 1.416985075257014, "grad_norm": 0.28261271119117737, "learning_rate": 2.372720322478565e-06, "loss": 0.0113, "step": 167810 }, { "epoch": 1.4170695151041777, "grad_norm": 0.43005236983299255, "learning_rate": 2.37209339658649e-06, "loss": 0.0088, "step": 167820 }, { "epoch": 1.4171539549513414, "grad_norm": 0.08820633590221405, "learning_rate": 2.37146652777201e-06, "loss": 0.0059, "step": 167830 }, { "epoch": 1.4172383947985054, "grad_norm": 0.0031879127491265535, "learning_rate": 2.370839716048736e-06, "loss": 0.004, "step": 167840 }, { "epoch": 1.4173228346456692, "grad_norm": 0.4437481164932251, "learning_rate": 2.3702129614302873e-06, "loss": 0.0103, "step": 167850 }, { "epoch": 1.4174072744928332, "grad_norm": 0.2786496579647064, "learning_rate": 2.369586263930272e-06, "loss": 0.0072, "step": 167860 }, { "epoch": 1.417491714339997, "grad_norm": 0.524082601070404, "learning_rate": 2.3689596235623052e-06, "loss": 0.0055, "step": 167870 }, { "epoch": 1.417576154187161, "grad_norm": 0.5721184015274048, "learning_rate": 2.3683330403399967e-06, "loss": 0.0058, "step": 167880 }, { "epoch": 1.4176605940343248, "grad_norm": 0.1255222111940384, "learning_rate": 2.3677065142769556e-06, "loss": 0.0045, "step": 167890 }, { "epoch": 1.4177450338814888, "grad_norm": 0.29551753401756287, "learning_rate": 2.3670800453867892e-06, "loss": 0.0033, "step": 167900 }, { "epoch": 1.4178294737286525, "grad_norm": 0.4735589623451233, "learning_rate": 2.366453633683103e-06, "loss": 0.0062, "step": 167910 }, { "epoch": 1.4179139135758163, "grad_norm": 0.42082542181015015, "learning_rate": 2.365827279179506e-06, "loss": 0.0079, "step": 167920 }, { "epoch": 1.4179983534229803, "grad_norm": 0.06791109591722488, "learning_rate": 2.3652009818895988e-06, "loss": 0.0073, "step": 167930 }, { "epoch": 1.4180827932701443, "grad_norm": 0.3510567247867584, "learning_rate": 2.3645747418269883e-06, "loss": 0.0058, "step": 167940 }, { "epoch": 1.418167233117308, "grad_norm": 0.11931554973125458, "learning_rate": 2.3639485590052724e-06, "loss": 0.0049, "step": 167950 }, { "epoch": 1.4182516729644719, "grad_norm": 0.2469492405653, "learning_rate": 2.3633224334380573e-06, "loss": 0.0043, "step": 167960 }, { "epoch": 1.4183361128116359, "grad_norm": 0.03585309907793999, "learning_rate": 2.362696365138935e-06, "loss": 0.0072, "step": 167970 }, { "epoch": 1.4184205526587996, "grad_norm": 0.1244814321398735, "learning_rate": 2.362070354121509e-06, "loss": 0.0053, "step": 167980 }, { "epoch": 1.4185049925059636, "grad_norm": 0.7402659058570862, "learning_rate": 2.361444400399375e-06, "loss": 0.0126, "step": 167990 }, { "epoch": 1.4185894323531274, "grad_norm": 0.7449191212654114, "learning_rate": 2.3608185039861253e-06, "loss": 0.0053, "step": 168000 }, { "epoch": 1.4186738722002914, "grad_norm": 0.007203711662441492, "learning_rate": 2.3601926648953587e-06, "loss": 0.0029, "step": 168010 }, { "epoch": 1.4187583120474552, "grad_norm": 0.1800566166639328, "learning_rate": 2.3595668831406647e-06, "loss": 0.0048, "step": 168020 }, { "epoch": 1.4188427518946192, "grad_norm": 0.12684020400047302, "learning_rate": 2.358941158735639e-06, "loss": 0.0039, "step": 168030 }, { "epoch": 1.418927191741783, "grad_norm": 0.8836225271224976, "learning_rate": 2.35831549169387e-06, "loss": 0.0049, "step": 168040 }, { "epoch": 1.4190116315889467, "grad_norm": 0.006876429542899132, "learning_rate": 2.357689882028947e-06, "loss": 0.0025, "step": 168050 }, { "epoch": 1.4190960714361107, "grad_norm": 0.0009878217242658138, "learning_rate": 2.3570643297544573e-06, "loss": 0.0101, "step": 168060 }, { "epoch": 1.4191805112832747, "grad_norm": 0.23707003891468048, "learning_rate": 2.35643883488399e-06, "loss": 0.0054, "step": 168070 }, { "epoch": 1.4192649511304385, "grad_norm": 1.281550645828247, "learning_rate": 2.3558133974311277e-06, "loss": 0.0111, "step": 168080 }, { "epoch": 1.4193493909776023, "grad_norm": 0.21122924983501434, "learning_rate": 2.3551880174094582e-06, "loss": 0.0086, "step": 168090 }, { "epoch": 1.4194338308247663, "grad_norm": 0.1720757931470871, "learning_rate": 2.3545626948325634e-06, "loss": 0.0094, "step": 168100 }, { "epoch": 1.41951827067193, "grad_norm": 0.05893007293343544, "learning_rate": 2.3539374297140223e-06, "loss": 0.0054, "step": 168110 }, { "epoch": 1.419602710519094, "grad_norm": 0.09081390500068665, "learning_rate": 2.3533122220674205e-06, "loss": 0.0054, "step": 168120 }, { "epoch": 1.4196871503662578, "grad_norm": 0.19479626417160034, "learning_rate": 2.3526870719063343e-06, "loss": 0.0082, "step": 168130 }, { "epoch": 1.4197715902134216, "grad_norm": 0.41054803133010864, "learning_rate": 2.352061979244343e-06, "loss": 0.0121, "step": 168140 }, { "epoch": 1.4198560300605856, "grad_norm": 0.08525343984365463, "learning_rate": 2.351436944095021e-06, "loss": 0.005, "step": 168150 }, { "epoch": 1.4199404699077496, "grad_norm": 0.2681870460510254, "learning_rate": 2.350811966471947e-06, "loss": 0.0066, "step": 168160 }, { "epoch": 1.4200249097549134, "grad_norm": 0.3848058581352234, "learning_rate": 2.3501870463886926e-06, "loss": 0.0047, "step": 168170 }, { "epoch": 1.4201093496020771, "grad_norm": 0.1287468522787094, "learning_rate": 2.349562183858835e-06, "loss": 0.0055, "step": 168180 }, { "epoch": 1.4201937894492411, "grad_norm": 0.5123632550239563, "learning_rate": 2.3489373788959414e-06, "loss": 0.0065, "step": 168190 }, { "epoch": 1.420278229296405, "grad_norm": 0.15277844667434692, "learning_rate": 2.3483126315135864e-06, "loss": 0.0024, "step": 168200 }, { "epoch": 1.420362669143569, "grad_norm": 1.0170907974243164, "learning_rate": 2.3476879417253378e-06, "loss": 0.0063, "step": 168210 }, { "epoch": 1.4204471089907327, "grad_norm": 0.15296083688735962, "learning_rate": 2.347063309544764e-06, "loss": 0.0059, "step": 168220 }, { "epoch": 1.4205315488378967, "grad_norm": 0.017677538096904755, "learning_rate": 2.3464387349854307e-06, "loss": 0.0041, "step": 168230 }, { "epoch": 1.4206159886850604, "grad_norm": 0.44101834297180176, "learning_rate": 2.3458142180609027e-06, "loss": 0.0068, "step": 168240 }, { "epoch": 1.4207004285322244, "grad_norm": 0.7065367698669434, "learning_rate": 2.3451897587847484e-06, "loss": 0.0045, "step": 168250 }, { "epoch": 1.4207848683793882, "grad_norm": 0.45713329315185547, "learning_rate": 2.3445653571705255e-06, "loss": 0.0087, "step": 168260 }, { "epoch": 1.420869308226552, "grad_norm": 0.24687205255031586, "learning_rate": 2.3439410132318017e-06, "loss": 0.0068, "step": 168270 }, { "epoch": 1.420953748073716, "grad_norm": 0.1858794242143631, "learning_rate": 2.343316726982135e-06, "loss": 0.0053, "step": 168280 }, { "epoch": 1.42103818792088, "grad_norm": 0.4462479054927826, "learning_rate": 2.342692498435084e-06, "loss": 0.009, "step": 168290 }, { "epoch": 1.4211226277680438, "grad_norm": 0.11201511323451996, "learning_rate": 2.3420683276042055e-06, "loss": 0.0119, "step": 168300 }, { "epoch": 1.4212070676152075, "grad_norm": 0.18562965095043182, "learning_rate": 2.3414442145030603e-06, "loss": 0.0099, "step": 168310 }, { "epoch": 1.4212915074623715, "grad_norm": 0.15481409430503845, "learning_rate": 2.3408201591452e-06, "loss": 0.0066, "step": 168320 }, { "epoch": 1.4213759473095353, "grad_norm": 0.12064211815595627, "learning_rate": 2.340196161544183e-06, "loss": 0.0085, "step": 168330 }, { "epoch": 1.4214603871566993, "grad_norm": 0.12883128225803375, "learning_rate": 2.3395722217135604e-06, "loss": 0.0082, "step": 168340 }, { "epoch": 1.421544827003863, "grad_norm": 0.18240928649902344, "learning_rate": 2.338948339666882e-06, "loss": 0.0053, "step": 168350 }, { "epoch": 1.4216292668510269, "grad_norm": 0.2713015079498291, "learning_rate": 2.338324515417702e-06, "loss": 0.0055, "step": 168360 }, { "epoch": 1.4217137066981909, "grad_norm": 0.31035134196281433, "learning_rate": 2.3377007489795684e-06, "loss": 0.0107, "step": 168370 }, { "epoch": 1.4217981465453549, "grad_norm": 0.616145133972168, "learning_rate": 2.3370770403660294e-06, "loss": 0.0054, "step": 168380 }, { "epoch": 1.4218825863925186, "grad_norm": 0.22151827812194824, "learning_rate": 2.3364533895906287e-06, "loss": 0.0028, "step": 168390 }, { "epoch": 1.4219670262396824, "grad_norm": 0.15859059989452362, "learning_rate": 2.3358297966669163e-06, "loss": 0.0066, "step": 168400 }, { "epoch": 1.4220514660868464, "grad_norm": 0.057559654116630554, "learning_rate": 2.3352062616084338e-06, "loss": 0.0054, "step": 168410 }, { "epoch": 1.4221359059340102, "grad_norm": 0.23066498339176178, "learning_rate": 2.334582784428727e-06, "loss": 0.0077, "step": 168420 }, { "epoch": 1.4222203457811742, "grad_norm": 0.3275831937789917, "learning_rate": 2.3339593651413333e-06, "loss": 0.0051, "step": 168430 }, { "epoch": 1.422304785628338, "grad_norm": 0.32152116298675537, "learning_rate": 2.333336003759798e-06, "loss": 0.0088, "step": 168440 }, { "epoch": 1.422389225475502, "grad_norm": 0.5086144208908081, "learning_rate": 2.3327127002976584e-06, "loss": 0.0101, "step": 168450 }, { "epoch": 1.4224736653226657, "grad_norm": 0.18675926327705383, "learning_rate": 2.3320894547684526e-06, "loss": 0.0105, "step": 168460 }, { "epoch": 1.4225581051698297, "grad_norm": 0.10627919435501099, "learning_rate": 2.3314662671857164e-06, "loss": 0.0069, "step": 168470 }, { "epoch": 1.4226425450169935, "grad_norm": 0.2582598328590393, "learning_rate": 2.3308431375629846e-06, "loss": 0.0087, "step": 168480 }, { "epoch": 1.4227269848641573, "grad_norm": 0.15868698060512543, "learning_rate": 2.3302200659137952e-06, "loss": 0.0055, "step": 168490 }, { "epoch": 1.4228114247113213, "grad_norm": 0.02565046027302742, "learning_rate": 2.3295970522516764e-06, "loss": 0.006, "step": 168500 }, { "epoch": 1.4228958645584853, "grad_norm": 0.3157142996788025, "learning_rate": 2.328974096590165e-06, "loss": 0.0075, "step": 168510 }, { "epoch": 1.422980304405649, "grad_norm": 0.6348860263824463, "learning_rate": 2.328351198942786e-06, "loss": 0.0045, "step": 168520 }, { "epoch": 1.4230647442528128, "grad_norm": 0.12833137810230255, "learning_rate": 2.327728359323074e-06, "loss": 0.0156, "step": 168530 }, { "epoch": 1.4231491840999768, "grad_norm": 0.7150945663452148, "learning_rate": 2.3271055777445537e-06, "loss": 0.007, "step": 168540 }, { "epoch": 1.4232336239471406, "grad_norm": 0.20006419718265533, "learning_rate": 2.3264828542207537e-06, "loss": 0.009, "step": 168550 }, { "epoch": 1.4233180637943046, "grad_norm": 0.20445962250232697, "learning_rate": 2.3258601887651956e-06, "loss": 0.0099, "step": 168560 }, { "epoch": 1.4234025036414684, "grad_norm": 0.017274849116802216, "learning_rate": 2.3252375813914085e-06, "loss": 0.0037, "step": 168570 }, { "epoch": 1.4234869434886324, "grad_norm": 0.23077070713043213, "learning_rate": 2.324615032112913e-06, "loss": 0.0071, "step": 168580 }, { "epoch": 1.4235713833357961, "grad_norm": 0.2447512447834015, "learning_rate": 2.3239925409432285e-06, "loss": 0.007, "step": 168590 }, { "epoch": 1.4236558231829601, "grad_norm": 0.10010417550802231, "learning_rate": 2.32337010789588e-06, "loss": 0.0064, "step": 168600 }, { "epoch": 1.423740263030124, "grad_norm": 0.4150175452232361, "learning_rate": 2.3227477329843833e-06, "loss": 0.0042, "step": 168610 }, { "epoch": 1.4238247028772877, "grad_norm": 0.3234054148197174, "learning_rate": 2.3221254162222585e-06, "loss": 0.0049, "step": 168620 }, { "epoch": 1.4239091427244517, "grad_norm": 0.47148939967155457, "learning_rate": 2.3215031576230178e-06, "loss": 0.005, "step": 168630 }, { "epoch": 1.4239935825716157, "grad_norm": 0.20662137866020203, "learning_rate": 2.320880957200182e-06, "loss": 0.0043, "step": 168640 }, { "epoch": 1.4240780224187795, "grad_norm": 0.2639155089855194, "learning_rate": 2.3202588149672616e-06, "loss": 0.006, "step": 168650 }, { "epoch": 1.4241624622659432, "grad_norm": 0.40358880162239075, "learning_rate": 2.319636730937772e-06, "loss": 0.0074, "step": 168660 }, { "epoch": 1.4242469021131072, "grad_norm": 0.3007908761501312, "learning_rate": 2.3190147051252216e-06, "loss": 0.0062, "step": 168670 }, { "epoch": 1.424331341960271, "grad_norm": 0.14118961989879608, "learning_rate": 2.318392737543124e-06, "loss": 0.0069, "step": 168680 }, { "epoch": 1.424415781807435, "grad_norm": 0.38681766390800476, "learning_rate": 2.3177708282049868e-06, "loss": 0.0105, "step": 168690 }, { "epoch": 1.4245002216545988, "grad_norm": 0.05241138115525246, "learning_rate": 2.3171489771243172e-06, "loss": 0.0081, "step": 168700 }, { "epoch": 1.4245846615017626, "grad_norm": 0.30755236744880676, "learning_rate": 2.3165271843146225e-06, "loss": 0.0045, "step": 168710 }, { "epoch": 1.4246691013489265, "grad_norm": 0.24303899705410004, "learning_rate": 2.315905449789406e-06, "loss": 0.0034, "step": 168720 }, { "epoch": 1.4247535411960905, "grad_norm": 0.22813037037849426, "learning_rate": 2.315283773562174e-06, "loss": 0.0045, "step": 168730 }, { "epoch": 1.4248379810432543, "grad_norm": 0.01945977471768856, "learning_rate": 2.3146621556464264e-06, "loss": 0.0082, "step": 168740 }, { "epoch": 1.424922420890418, "grad_norm": 0.017219798639416695, "learning_rate": 2.3140405960556696e-06, "loss": 0.0064, "step": 168750 }, { "epoch": 1.425006860737582, "grad_norm": 0.6056690812110901, "learning_rate": 2.3134190948033974e-06, "loss": 0.0093, "step": 168760 }, { "epoch": 1.4250913005847459, "grad_norm": 0.49053794145584106, "learning_rate": 2.3127976519031147e-06, "loss": 0.0078, "step": 168770 }, { "epoch": 1.4251757404319099, "grad_norm": 0.15495406091213226, "learning_rate": 2.3121762673683162e-06, "loss": 0.008, "step": 168780 }, { "epoch": 1.4252601802790736, "grad_norm": 0.2730267643928528, "learning_rate": 2.311554941212498e-06, "loss": 0.0106, "step": 168790 }, { "epoch": 1.4253446201262376, "grad_norm": 0.39155274629592896, "learning_rate": 2.310933673449154e-06, "loss": 0.007, "step": 168800 }, { "epoch": 1.4254290599734014, "grad_norm": 0.04276270419359207, "learning_rate": 2.310312464091781e-06, "loss": 0.0136, "step": 168810 }, { "epoch": 1.4255134998205654, "grad_norm": 0.10225912928581238, "learning_rate": 2.3096913131538706e-06, "loss": 0.0063, "step": 168820 }, { "epoch": 1.4255979396677292, "grad_norm": 0.029933741316199303, "learning_rate": 2.309070220648912e-06, "loss": 0.0141, "step": 168830 }, { "epoch": 1.425682379514893, "grad_norm": 0.20418886840343475, "learning_rate": 2.3084491865903978e-06, "loss": 0.0069, "step": 168840 }, { "epoch": 1.425766819362057, "grad_norm": 0.0032842860091477633, "learning_rate": 2.3078282109918144e-06, "loss": 0.0047, "step": 168850 }, { "epoch": 1.425851259209221, "grad_norm": 0.0899333730340004, "learning_rate": 2.3072072938666547e-06, "loss": 0.0045, "step": 168860 }, { "epoch": 1.4259356990563847, "grad_norm": 0.1346403956413269, "learning_rate": 2.306586435228396e-06, "loss": 0.0086, "step": 168870 }, { "epoch": 1.4260201389035485, "grad_norm": 0.0056692794896662235, "learning_rate": 2.305965635090531e-06, "loss": 0.0075, "step": 168880 }, { "epoch": 1.4261045787507125, "grad_norm": 0.1661568582057953, "learning_rate": 2.305344893466537e-06, "loss": 0.0076, "step": 168890 }, { "epoch": 1.4261890185978763, "grad_norm": 0.4983552098274231, "learning_rate": 2.304724210369903e-06, "loss": 0.004, "step": 168900 }, { "epoch": 1.4262734584450403, "grad_norm": 0.062281686812639236, "learning_rate": 2.3041035858141044e-06, "loss": 0.006, "step": 168910 }, { "epoch": 1.426357898292204, "grad_norm": 0.20798943936824799, "learning_rate": 2.303483019812625e-06, "loss": 0.0058, "step": 168920 }, { "epoch": 1.426442338139368, "grad_norm": 0.3958841860294342, "learning_rate": 2.3028625123789426e-06, "loss": 0.0078, "step": 168930 }, { "epoch": 1.4265267779865318, "grad_norm": 0.7570561170578003, "learning_rate": 2.302242063526533e-06, "loss": 0.0079, "step": 168940 }, { "epoch": 1.4266112178336958, "grad_norm": 0.12112286686897278, "learning_rate": 2.3016216732688735e-06, "loss": 0.0057, "step": 168950 }, { "epoch": 1.4266956576808596, "grad_norm": 0.23399175703525543, "learning_rate": 2.3010013416194366e-06, "loss": 0.0119, "step": 168960 }, { "epoch": 1.4267800975280234, "grad_norm": 0.020945053547620773, "learning_rate": 2.3003810685916986e-06, "loss": 0.0037, "step": 168970 }, { "epoch": 1.4268645373751874, "grad_norm": 0.25013697147369385, "learning_rate": 2.2997608541991295e-06, "loss": 0.0051, "step": 168980 }, { "epoch": 1.4269489772223511, "grad_norm": 0.024206670001149178, "learning_rate": 2.299140698455203e-06, "loss": 0.0059, "step": 168990 }, { "epoch": 1.4270334170695151, "grad_norm": 0.3126084804534912, "learning_rate": 2.2985206013733846e-06, "loss": 0.0063, "step": 169000 }, { "epoch": 1.427117856916679, "grad_norm": 0.0019607970025390387, "learning_rate": 2.2979005629671475e-06, "loss": 0.0063, "step": 169010 }, { "epoch": 1.427202296763843, "grad_norm": 0.36732998490333557, "learning_rate": 2.297280583249956e-06, "loss": 0.0065, "step": 169020 }, { "epoch": 1.4272867366110067, "grad_norm": 0.40088534355163574, "learning_rate": 2.296660662235277e-06, "loss": 0.0058, "step": 169030 }, { "epoch": 1.4273711764581707, "grad_norm": 0.2893493175506592, "learning_rate": 2.2960407999365724e-06, "loss": 0.0065, "step": 169040 }, { "epoch": 1.4274556163053345, "grad_norm": 0.2968350052833557, "learning_rate": 2.2954209963673095e-06, "loss": 0.0085, "step": 169050 }, { "epoch": 1.4275400561524982, "grad_norm": 0.12424321472644806, "learning_rate": 2.294801251540948e-06, "loss": 0.0071, "step": 169060 }, { "epoch": 1.4276244959996622, "grad_norm": 0.22736170887947083, "learning_rate": 2.2941815654709477e-06, "loss": 0.0051, "step": 169070 }, { "epoch": 1.4277089358468262, "grad_norm": 0.28342100977897644, "learning_rate": 2.29356193817077e-06, "loss": 0.01, "step": 169080 }, { "epoch": 1.42779337569399, "grad_norm": 0.1574784219264984, "learning_rate": 2.2929423696538717e-06, "loss": 0.0057, "step": 169090 }, { "epoch": 1.4278778155411538, "grad_norm": 0.1268671602010727, "learning_rate": 2.292322859933712e-06, "loss": 0.0076, "step": 169100 }, { "epoch": 1.4279622553883178, "grad_norm": 0.03119690716266632, "learning_rate": 2.291703409023745e-06, "loss": 0.0124, "step": 169110 }, { "epoch": 1.4280466952354816, "grad_norm": 0.05472123250365257, "learning_rate": 2.291084016937425e-06, "loss": 0.0099, "step": 169120 }, { "epoch": 1.4281311350826456, "grad_norm": 0.28261953592300415, "learning_rate": 2.2904646836882027e-06, "loss": 0.0054, "step": 169130 }, { "epoch": 1.4282155749298093, "grad_norm": 0.09262333810329437, "learning_rate": 2.2898454092895344e-06, "loss": 0.0088, "step": 169140 }, { "epoch": 1.4283000147769733, "grad_norm": 0.09137316048145294, "learning_rate": 2.2892261937548665e-06, "loss": 0.0075, "step": 169150 }, { "epoch": 1.428384454624137, "grad_norm": 0.10027941316366196, "learning_rate": 2.288607037097652e-06, "loss": 0.003, "step": 169160 }, { "epoch": 1.428468894471301, "grad_norm": 0.12532101571559906, "learning_rate": 2.287987939331337e-06, "loss": 0.0064, "step": 169170 }, { "epoch": 1.4285533343184649, "grad_norm": 0.0915297344326973, "learning_rate": 2.2873689004693682e-06, "loss": 0.0075, "step": 169180 }, { "epoch": 1.4286377741656286, "grad_norm": 0.47677910327911377, "learning_rate": 2.2867499205251912e-06, "loss": 0.0058, "step": 169190 }, { "epoch": 1.4287222140127926, "grad_norm": 0.03209599852561951, "learning_rate": 2.286130999512248e-06, "loss": 0.005, "step": 169200 }, { "epoch": 1.4288066538599566, "grad_norm": 0.547635018825531, "learning_rate": 2.2855121374439853e-06, "loss": 0.0081, "step": 169210 }, { "epoch": 1.4288910937071204, "grad_norm": 0.2097756266593933, "learning_rate": 2.2848933343338408e-06, "loss": 0.0063, "step": 169220 }, { "epoch": 1.4289755335542842, "grad_norm": 0.07546092569828033, "learning_rate": 2.284274590195259e-06, "loss": 0.0046, "step": 169230 }, { "epoch": 1.4290599734014482, "grad_norm": 0.1858384758234024, "learning_rate": 2.283655905041674e-06, "loss": 0.0098, "step": 169240 }, { "epoch": 1.429144413248612, "grad_norm": 0.3962939977645874, "learning_rate": 2.2830372788865283e-06, "loss": 0.0128, "step": 169250 }, { "epoch": 1.429228853095776, "grad_norm": 0.18519903719425201, "learning_rate": 2.2824187117432563e-06, "loss": 0.004, "step": 169260 }, { "epoch": 1.4293132929429397, "grad_norm": 0.3402844965457916, "learning_rate": 2.2818002036252928e-06, "loss": 0.0047, "step": 169270 }, { "epoch": 1.4293977327901035, "grad_norm": 0.6804598569869995, "learning_rate": 2.2811817545460702e-06, "loss": 0.0077, "step": 169280 }, { "epoch": 1.4294821726372675, "grad_norm": 0.3678823411464691, "learning_rate": 2.2805633645190244e-06, "loss": 0.0126, "step": 169290 }, { "epoch": 1.4295666124844315, "grad_norm": 0.08722590655088425, "learning_rate": 2.279945033557585e-06, "loss": 0.0078, "step": 169300 }, { "epoch": 1.4296510523315953, "grad_norm": 0.6533942818641663, "learning_rate": 2.27932676167518e-06, "loss": 0.006, "step": 169310 }, { "epoch": 1.429735492178759, "grad_norm": 0.16732215881347656, "learning_rate": 2.278708548885243e-06, "loss": 0.0055, "step": 169320 }, { "epoch": 1.429819932025923, "grad_norm": 0.37066003680229187, "learning_rate": 2.2780903952011956e-06, "loss": 0.0082, "step": 169330 }, { "epoch": 1.4299043718730868, "grad_norm": 0.08287538588047028, "learning_rate": 2.2774723006364696e-06, "loss": 0.0033, "step": 169340 }, { "epoch": 1.4299888117202508, "grad_norm": 0.13231608271598816, "learning_rate": 2.276854265204487e-06, "loss": 0.0061, "step": 169350 }, { "epoch": 1.4300732515674146, "grad_norm": 0.2521893084049225, "learning_rate": 2.2762362889186723e-06, "loss": 0.0066, "step": 169360 }, { "epoch": 1.4301576914145786, "grad_norm": 0.1945202350616455, "learning_rate": 2.275618371792445e-06, "loss": 0.0067, "step": 169370 }, { "epoch": 1.4302421312617424, "grad_norm": 0.3466421365737915, "learning_rate": 2.275000513839231e-06, "loss": 0.0084, "step": 169380 }, { "epoch": 1.4303265711089064, "grad_norm": 0.3183562755584717, "learning_rate": 2.2743827150724445e-06, "loss": 0.0103, "step": 169390 }, { "epoch": 1.4304110109560702, "grad_norm": 0.2945971190929413, "learning_rate": 2.273764975505509e-06, "loss": 0.0046, "step": 169400 }, { "epoch": 1.430495450803234, "grad_norm": 0.30858752131462097, "learning_rate": 2.2731472951518396e-06, "loss": 0.0076, "step": 169410 }, { "epoch": 1.430579890650398, "grad_norm": 0.051612019538879395, "learning_rate": 2.2725296740248504e-06, "loss": 0.0073, "step": 169420 }, { "epoch": 1.430664330497562, "grad_norm": 0.19684559106826782, "learning_rate": 2.27191211213796e-06, "loss": 0.0066, "step": 169430 }, { "epoch": 1.4307487703447257, "grad_norm": 0.4613589346408844, "learning_rate": 2.2712946095045792e-06, "loss": 0.006, "step": 169440 }, { "epoch": 1.4308332101918895, "grad_norm": 0.4175136089324951, "learning_rate": 2.27067716613812e-06, "loss": 0.0094, "step": 169450 }, { "epoch": 1.4309176500390535, "grad_norm": 0.5367829203605652, "learning_rate": 2.2700597820519922e-06, "loss": 0.0098, "step": 169460 }, { "epoch": 1.4310020898862172, "grad_norm": 0.28941696882247925, "learning_rate": 2.2694424572596075e-06, "loss": 0.0097, "step": 169470 }, { "epoch": 1.4310865297333812, "grad_norm": 0.19652506709098816, "learning_rate": 2.268825191774372e-06, "loss": 0.0083, "step": 169480 }, { "epoch": 1.431170969580545, "grad_norm": 0.22060920298099518, "learning_rate": 2.268207985609695e-06, "loss": 0.0075, "step": 169490 }, { "epoch": 1.431255409427709, "grad_norm": 0.2502214312553406, "learning_rate": 2.267590838778981e-06, "loss": 0.0036, "step": 169500 }, { "epoch": 1.4313398492748728, "grad_norm": 0.38250303268432617, "learning_rate": 2.2669737512956342e-06, "loss": 0.0059, "step": 169510 }, { "epoch": 1.4314242891220368, "grad_norm": 0.3637206256389618, "learning_rate": 2.2663567231730576e-06, "loss": 0.0071, "step": 169520 }, { "epoch": 1.4315087289692006, "grad_norm": 0.5366841554641724, "learning_rate": 2.265739754424651e-06, "loss": 0.007, "step": 169530 }, { "epoch": 1.4315931688163643, "grad_norm": 0.17570582032203674, "learning_rate": 2.265122845063818e-06, "loss": 0.0098, "step": 169540 }, { "epoch": 1.4316776086635283, "grad_norm": 0.24726428091526031, "learning_rate": 2.2645059951039546e-06, "loss": 0.0035, "step": 169550 }, { "epoch": 1.4317620485106923, "grad_norm": 0.013095454312860966, "learning_rate": 2.2638892045584622e-06, "loss": 0.0069, "step": 169560 }, { "epoch": 1.431846488357856, "grad_norm": 0.7363985776901245, "learning_rate": 2.263272473440734e-06, "loss": 0.0126, "step": 169570 }, { "epoch": 1.4319309282050199, "grad_norm": 0.010015706531703472, "learning_rate": 2.2626558017641686e-06, "loss": 0.0085, "step": 169580 }, { "epoch": 1.4320153680521839, "grad_norm": 0.16199380159378052, "learning_rate": 2.2620391895421585e-06, "loss": 0.01, "step": 169590 }, { "epoch": 1.4320998078993477, "grad_norm": 0.47142553329467773, "learning_rate": 2.261422636788096e-06, "loss": 0.0059, "step": 169600 }, { "epoch": 1.4321842477465117, "grad_norm": 0.08423786610364914, "learning_rate": 2.2608061435153704e-06, "loss": 0.0071, "step": 169610 }, { "epoch": 1.4322686875936754, "grad_norm": 0.16624906659126282, "learning_rate": 2.2601897097373763e-06, "loss": 0.0061, "step": 169620 }, { "epoch": 1.4323531274408392, "grad_norm": 0.6420237421989441, "learning_rate": 2.259573335467498e-06, "loss": 0.0081, "step": 169630 }, { "epoch": 1.4324375672880032, "grad_norm": 0.1415368914604187, "learning_rate": 2.258957020719128e-06, "loss": 0.0086, "step": 169640 }, { "epoch": 1.4325220071351672, "grad_norm": 0.07870378345251083, "learning_rate": 2.2583407655056495e-06, "loss": 0.0106, "step": 169650 }, { "epoch": 1.432606446982331, "grad_norm": 0.011945703998208046, "learning_rate": 2.2577245698404452e-06, "loss": 0.0033, "step": 169660 }, { "epoch": 1.4326908868294947, "grad_norm": 0.1957257241010666, "learning_rate": 2.2571084337369038e-06, "loss": 0.0108, "step": 169670 }, { "epoch": 1.4327753266766587, "grad_norm": 0.13139303028583527, "learning_rate": 2.2564923572084053e-06, "loss": 0.0058, "step": 169680 }, { "epoch": 1.4328597665238225, "grad_norm": 0.3306069076061249, "learning_rate": 2.25587634026833e-06, "loss": 0.0061, "step": 169690 }, { "epoch": 1.4329442063709865, "grad_norm": 0.04509067162871361, "learning_rate": 2.255260382930057e-06, "loss": 0.0052, "step": 169700 }, { "epoch": 1.4330286462181503, "grad_norm": 0.18504942953586578, "learning_rate": 2.2546444852069673e-06, "loss": 0.0046, "step": 169710 }, { "epoch": 1.4331130860653143, "grad_norm": 0.3432008922100067, "learning_rate": 2.254028647112435e-06, "loss": 0.0047, "step": 169720 }, { "epoch": 1.433197525912478, "grad_norm": 0.65554279088974, "learning_rate": 2.25341286865984e-06, "loss": 0.0129, "step": 169730 }, { "epoch": 1.433281965759642, "grad_norm": 0.6908416748046875, "learning_rate": 2.2527971498625527e-06, "loss": 0.0104, "step": 169740 }, { "epoch": 1.4333664056068058, "grad_norm": 0.1587575525045395, "learning_rate": 2.2521814907339504e-06, "loss": 0.0059, "step": 169750 }, { "epoch": 1.4334508454539696, "grad_norm": 0.010872024111449718, "learning_rate": 2.2515658912874044e-06, "loss": 0.0048, "step": 169760 }, { "epoch": 1.4335352853011336, "grad_norm": 0.6671677231788635, "learning_rate": 2.250950351536281e-06, "loss": 0.0147, "step": 169770 }, { "epoch": 1.4336197251482976, "grad_norm": 0.2591036558151245, "learning_rate": 2.250334871493954e-06, "loss": 0.0052, "step": 169780 }, { "epoch": 1.4337041649954614, "grad_norm": 0.28105223178863525, "learning_rate": 2.2497194511737885e-06, "loss": 0.004, "step": 169790 }, { "epoch": 1.4337886048426252, "grad_norm": 0.5550975799560547, "learning_rate": 2.2491040905891546e-06, "loss": 0.0096, "step": 169800 }, { "epoch": 1.4338730446897892, "grad_norm": 0.1761217713356018, "learning_rate": 2.2484887897534142e-06, "loss": 0.0035, "step": 169810 }, { "epoch": 1.433957484536953, "grad_norm": 0.10466950386762619, "learning_rate": 2.2478735486799357e-06, "loss": 0.0061, "step": 169820 }, { "epoch": 1.434041924384117, "grad_norm": 0.32649821043014526, "learning_rate": 2.2472583673820795e-06, "loss": 0.0051, "step": 169830 }, { "epoch": 1.4341263642312807, "grad_norm": 0.21078859269618988, "learning_rate": 2.2466432458732074e-06, "loss": 0.0057, "step": 169840 }, { "epoch": 1.4342108040784447, "grad_norm": 0.13281138241291046, "learning_rate": 2.2460281841666782e-06, "loss": 0.0058, "step": 169850 }, { "epoch": 1.4342952439256085, "grad_norm": 0.2480136603116989, "learning_rate": 2.245413182275854e-06, "loss": 0.004, "step": 169860 }, { "epoch": 1.4343796837727725, "grad_norm": 0.40111151337623596, "learning_rate": 2.2447982402140893e-06, "loss": 0.0062, "step": 169870 }, { "epoch": 1.4344641236199362, "grad_norm": 0.04386008530855179, "learning_rate": 2.244183357994744e-06, "loss": 0.0053, "step": 169880 }, { "epoch": 1.4345485634671, "grad_norm": 0.25325125455856323, "learning_rate": 2.2435685356311716e-06, "loss": 0.0066, "step": 169890 }, { "epoch": 1.434633003314264, "grad_norm": 0.0448877289891243, "learning_rate": 2.242953773136724e-06, "loss": 0.0145, "step": 169900 }, { "epoch": 1.4347174431614278, "grad_norm": 0.21406374871730804, "learning_rate": 2.242339070524757e-06, "loss": 0.0078, "step": 169910 }, { "epoch": 1.4348018830085918, "grad_norm": 0.10028113424777985, "learning_rate": 2.2417244278086207e-06, "loss": 0.0105, "step": 169920 }, { "epoch": 1.4348863228557556, "grad_norm": 0.27704086899757385, "learning_rate": 2.2411098450016645e-06, "loss": 0.0078, "step": 169930 }, { "epoch": 1.4349707627029196, "grad_norm": 0.13100524246692657, "learning_rate": 2.240495322117235e-06, "loss": 0.0082, "step": 169940 }, { "epoch": 1.4350552025500833, "grad_norm": 0.35450848937034607, "learning_rate": 2.2398808591686837e-06, "loss": 0.0072, "step": 169950 }, { "epoch": 1.4351396423972473, "grad_norm": 0.22166405618190765, "learning_rate": 2.239266456169352e-06, "loss": 0.0052, "step": 169960 }, { "epoch": 1.4352240822444111, "grad_norm": 0.27276307344436646, "learning_rate": 2.23865211313259e-06, "loss": 0.0055, "step": 169970 }, { "epoch": 1.435308522091575, "grad_norm": 0.9054333567619324, "learning_rate": 2.238037830071736e-06, "loss": 0.0078, "step": 169980 }, { "epoch": 1.4353929619387389, "grad_norm": 0.22957922518253326, "learning_rate": 2.2374236070001365e-06, "loss": 0.0069, "step": 169990 }, { "epoch": 1.4354774017859029, "grad_norm": 0.2714475691318512, "learning_rate": 2.2368094439311293e-06, "loss": 0.003, "step": 170000 }, { "epoch": 1.4355618416330667, "grad_norm": 0.37742552161216736, "learning_rate": 2.236195340878056e-06, "loss": 0.0076, "step": 170010 }, { "epoch": 1.4356462814802304, "grad_norm": 1.0359432697296143, "learning_rate": 2.2355812978542523e-06, "loss": 0.0046, "step": 170020 }, { "epoch": 1.4357307213273944, "grad_norm": 0.17300593852996826, "learning_rate": 2.234967314873055e-06, "loss": 0.0065, "step": 170030 }, { "epoch": 1.4358151611745582, "grad_norm": 0.25415199995040894, "learning_rate": 2.2343533919478034e-06, "loss": 0.006, "step": 170040 }, { "epoch": 1.4358996010217222, "grad_norm": 0.3265589475631714, "learning_rate": 2.233739529091827e-06, "loss": 0.0054, "step": 170050 }, { "epoch": 1.435984040868886, "grad_norm": 0.23435699939727783, "learning_rate": 2.233125726318463e-06, "loss": 0.0084, "step": 170060 }, { "epoch": 1.43606848071605, "grad_norm": 0.5916582942008972, "learning_rate": 2.2325119836410394e-06, "loss": 0.0048, "step": 170070 }, { "epoch": 1.4361529205632138, "grad_norm": 0.14918731153011322, "learning_rate": 2.2318983010728917e-06, "loss": 0.0061, "step": 170080 }, { "epoch": 1.4362373604103778, "grad_norm": 0.19499938189983368, "learning_rate": 2.231284678627343e-06, "loss": 0.0072, "step": 170090 }, { "epoch": 1.4363218002575415, "grad_norm": 0.4484120309352875, "learning_rate": 2.2306711163177245e-06, "loss": 0.0108, "step": 170100 }, { "epoch": 1.4364062401047053, "grad_norm": 0.28211483359336853, "learning_rate": 2.230057614157363e-06, "loss": 0.0073, "step": 170110 }, { "epoch": 1.4364906799518693, "grad_norm": 0.359273761510849, "learning_rate": 2.2294441721595792e-06, "loss": 0.0072, "step": 170120 }, { "epoch": 1.4365751197990333, "grad_norm": 0.15558522939682007, "learning_rate": 2.2288307903377034e-06, "loss": 0.0055, "step": 170130 }, { "epoch": 1.436659559646197, "grad_norm": 0.17141252756118774, "learning_rate": 2.2282174687050524e-06, "loss": 0.0067, "step": 170140 }, { "epoch": 1.4367439994933608, "grad_norm": 0.18926486372947693, "learning_rate": 2.227604207274952e-06, "loss": 0.0036, "step": 170150 }, { "epoch": 1.4368284393405248, "grad_norm": 0.17166303098201752, "learning_rate": 2.22699100606072e-06, "loss": 0.0115, "step": 170160 }, { "epoch": 1.4369128791876886, "grad_norm": 0.00215636333450675, "learning_rate": 2.2263778650756752e-06, "loss": 0.0073, "step": 170170 }, { "epoch": 1.4369973190348526, "grad_norm": 0.149192214012146, "learning_rate": 2.225764784333133e-06, "loss": 0.0087, "step": 170180 }, { "epoch": 1.4370817588820164, "grad_norm": 0.1867339164018631, "learning_rate": 2.2251517638464133e-06, "loss": 0.0055, "step": 170190 }, { "epoch": 1.4371661987291802, "grad_norm": 0.27617648243904114, "learning_rate": 2.224538803628826e-06, "loss": 0.0041, "step": 170200 }, { "epoch": 1.4372506385763442, "grad_norm": 0.36731499433517456, "learning_rate": 2.22392590369369e-06, "loss": 0.0039, "step": 170210 }, { "epoch": 1.4373350784235082, "grad_norm": 0.1624947488307953, "learning_rate": 2.223313064054312e-06, "loss": 0.0042, "step": 170220 }, { "epoch": 1.437419518270672, "grad_norm": 0.1300150454044342, "learning_rate": 2.222700284724007e-06, "loss": 0.0134, "step": 170230 }, { "epoch": 1.4375039581178357, "grad_norm": 0.4495781660079956, "learning_rate": 2.2220875657160833e-06, "loss": 0.0061, "step": 170240 }, { "epoch": 1.4375883979649997, "grad_norm": 0.034683555364608765, "learning_rate": 2.221474907043848e-06, "loss": 0.0038, "step": 170250 }, { "epoch": 1.4376728378121635, "grad_norm": 0.09370185434818268, "learning_rate": 2.2208623087206085e-06, "loss": 0.0048, "step": 170260 }, { "epoch": 1.4377572776593275, "grad_norm": 0.2361922711133957, "learning_rate": 2.220249770759668e-06, "loss": 0.0064, "step": 170270 }, { "epoch": 1.4378417175064913, "grad_norm": 0.14775177836418152, "learning_rate": 2.2196372931743354e-06, "loss": 0.0062, "step": 170280 }, { "epoch": 1.4379261573536553, "grad_norm": 0.376676082611084, "learning_rate": 2.219024875977909e-06, "loss": 0.0092, "step": 170290 }, { "epoch": 1.438010597200819, "grad_norm": 0.1691407412290573, "learning_rate": 2.2184125191836935e-06, "loss": 0.0128, "step": 170300 }, { "epoch": 1.438095037047983, "grad_norm": 0.17563971877098083, "learning_rate": 2.2178002228049866e-06, "loss": 0.0094, "step": 170310 }, { "epoch": 1.4381794768951468, "grad_norm": 0.2634684443473816, "learning_rate": 2.217187986855091e-06, "loss": 0.0057, "step": 170320 }, { "epoch": 1.4382639167423106, "grad_norm": 0.18289436399936676, "learning_rate": 2.216575811347302e-06, "loss": 0.0066, "step": 170330 }, { "epoch": 1.4383483565894746, "grad_norm": 0.3060328960418701, "learning_rate": 2.2159636962949153e-06, "loss": 0.0071, "step": 170340 }, { "epoch": 1.4384327964366386, "grad_norm": 0.3800762891769409, "learning_rate": 2.2153516417112275e-06, "loss": 0.0105, "step": 170350 }, { "epoch": 1.4385172362838023, "grad_norm": 0.3376327455043793, "learning_rate": 2.214739647609529e-06, "loss": 0.0081, "step": 170360 }, { "epoch": 1.4386016761309661, "grad_norm": 0.07161017507314682, "learning_rate": 2.2141277140031163e-06, "loss": 0.012, "step": 170370 }, { "epoch": 1.4386861159781301, "grad_norm": 0.04764146730303764, "learning_rate": 2.2135158409052774e-06, "loss": 0.0027, "step": 170380 }, { "epoch": 1.438770555825294, "grad_norm": 0.42683926224708557, "learning_rate": 2.212904028329305e-06, "loss": 0.0129, "step": 170390 }, { "epoch": 1.438854995672458, "grad_norm": 0.24874572455883026, "learning_rate": 2.212292276288486e-06, "loss": 0.0075, "step": 170400 }, { "epoch": 1.4389394355196217, "grad_norm": 0.08069974184036255, "learning_rate": 2.2116805847961083e-06, "loss": 0.0058, "step": 170410 }, { "epoch": 1.4390238753667857, "grad_norm": 0.1755245178937912, "learning_rate": 2.211068953865454e-06, "loss": 0.0058, "step": 170420 }, { "epoch": 1.4391083152139494, "grad_norm": 0.5734670758247375, "learning_rate": 2.210457383509813e-06, "loss": 0.007, "step": 170430 }, { "epoch": 1.4391927550611134, "grad_norm": 0.08578776568174362, "learning_rate": 2.209845873742464e-06, "loss": 0.0079, "step": 170440 }, { "epoch": 1.4392771949082772, "grad_norm": 0.38924136757850647, "learning_rate": 2.209234424576692e-06, "loss": 0.0093, "step": 170450 }, { "epoch": 1.439361634755441, "grad_norm": 0.4485059380531311, "learning_rate": 2.2086230360257753e-06, "loss": 0.0061, "step": 170460 }, { "epoch": 1.439446074602605, "grad_norm": 0.4820554852485657, "learning_rate": 2.2080117081029962e-06, "loss": 0.0092, "step": 170470 }, { "epoch": 1.439530514449769, "grad_norm": 0.18371836841106415, "learning_rate": 2.2074004408216306e-06, "loss": 0.0067, "step": 170480 }, { "epoch": 1.4396149542969328, "grad_norm": 0.17124496400356293, "learning_rate": 2.2067892341949547e-06, "loss": 0.0074, "step": 170490 }, { "epoch": 1.4396993941440965, "grad_norm": 0.28507915139198303, "learning_rate": 2.206178088236245e-06, "loss": 0.0089, "step": 170500 }, { "epoch": 1.4397838339912605, "grad_norm": 0.19294601678848267, "learning_rate": 2.2055670029587723e-06, "loss": 0.0072, "step": 170510 }, { "epoch": 1.4398682738384243, "grad_norm": 0.11900864541530609, "learning_rate": 2.2049559783758135e-06, "loss": 0.0068, "step": 170520 }, { "epoch": 1.4399527136855883, "grad_norm": 0.2939011752605438, "learning_rate": 2.204345014500636e-06, "loss": 0.0046, "step": 170530 }, { "epoch": 1.440037153532752, "grad_norm": 0.056707218289375305, "learning_rate": 2.2037341113465143e-06, "loss": 0.0057, "step": 170540 }, { "epoch": 1.4401215933799159, "grad_norm": 0.33025386929512024, "learning_rate": 2.2031232689267127e-06, "loss": 0.0097, "step": 170550 }, { "epoch": 1.4402060332270799, "grad_norm": 0.23211166262626648, "learning_rate": 2.2025124872545022e-06, "loss": 0.0059, "step": 170560 }, { "epoch": 1.4402904730742438, "grad_norm": 0.1526564657688141, "learning_rate": 2.2019017663431473e-06, "loss": 0.0051, "step": 170570 }, { "epoch": 1.4403749129214076, "grad_norm": 0.2396906465291977, "learning_rate": 2.2012911062059134e-06, "loss": 0.008, "step": 170580 }, { "epoch": 1.4404593527685714, "grad_norm": 0.19321592152118683, "learning_rate": 2.200680506856062e-06, "loss": 0.0075, "step": 170590 }, { "epoch": 1.4405437926157354, "grad_norm": 0.4765799939632416, "learning_rate": 2.2000699683068553e-06, "loss": 0.0054, "step": 170600 }, { "epoch": 1.4406282324628992, "grad_norm": 0.17074991762638092, "learning_rate": 2.1994594905715567e-06, "loss": 0.0072, "step": 170610 }, { "epoch": 1.4407126723100632, "grad_norm": 0.224339559674263, "learning_rate": 2.198849073663422e-06, "loss": 0.0042, "step": 170620 }, { "epoch": 1.440797112157227, "grad_norm": 0.13748006522655487, "learning_rate": 2.1982387175957137e-06, "loss": 0.0053, "step": 170630 }, { "epoch": 1.440881552004391, "grad_norm": 0.17607350647449493, "learning_rate": 2.197628422381684e-06, "loss": 0.0104, "step": 170640 }, { "epoch": 1.4409659918515547, "grad_norm": 0.49193891882896423, "learning_rate": 2.197018188034593e-06, "loss": 0.0082, "step": 170650 }, { "epoch": 1.4410504316987187, "grad_norm": 0.2978811264038086, "learning_rate": 2.1964080145676924e-06, "loss": 0.005, "step": 170660 }, { "epoch": 1.4411348715458825, "grad_norm": 0.4909258186817169, "learning_rate": 2.195797901994235e-06, "loss": 0.0053, "step": 170670 }, { "epoch": 1.4412193113930463, "grad_norm": 0.07855523377656937, "learning_rate": 2.195187850327471e-06, "loss": 0.0056, "step": 170680 }, { "epoch": 1.4413037512402103, "grad_norm": 0.2787822186946869, "learning_rate": 2.1945778595806538e-06, "loss": 0.0063, "step": 170690 }, { "epoch": 1.4413881910873743, "grad_norm": 0.12734197080135345, "learning_rate": 2.193967929767029e-06, "loss": 0.0068, "step": 170700 }, { "epoch": 1.441472630934538, "grad_norm": 0.04956725239753723, "learning_rate": 2.193358060899847e-06, "loss": 0.0041, "step": 170710 }, { "epoch": 1.4415570707817018, "grad_norm": 0.08348444849252701, "learning_rate": 2.192748252992353e-06, "loss": 0.0063, "step": 170720 }, { "epoch": 1.4416415106288658, "grad_norm": 0.007930951192975044, "learning_rate": 2.1921385060577925e-06, "loss": 0.0041, "step": 170730 }, { "epoch": 1.4417259504760296, "grad_norm": 0.0887991264462471, "learning_rate": 2.1915288201094075e-06, "loss": 0.004, "step": 170740 }, { "epoch": 1.4418103903231936, "grad_norm": 0.12627547979354858, "learning_rate": 2.190919195160439e-06, "loss": 0.0071, "step": 170750 }, { "epoch": 1.4418948301703574, "grad_norm": 0.2959650754928589, "learning_rate": 2.1903096312241325e-06, "loss": 0.0045, "step": 170760 }, { "epoch": 1.4419792700175211, "grad_norm": 0.45797544717788696, "learning_rate": 2.1897001283137224e-06, "loss": 0.004, "step": 170770 }, { "epoch": 1.4420637098646851, "grad_norm": 0.10110527276992798, "learning_rate": 2.189090686442452e-06, "loss": 0.0046, "step": 170780 }, { "epoch": 1.4421481497118491, "grad_norm": 0.2694006860256195, "learning_rate": 2.1884813056235545e-06, "loss": 0.0063, "step": 170790 }, { "epoch": 1.442232589559013, "grad_norm": 0.06929880380630493, "learning_rate": 2.187871985870268e-06, "loss": 0.0039, "step": 170800 }, { "epoch": 1.4423170294061767, "grad_norm": 0.29868701100349426, "learning_rate": 2.1872627271958258e-06, "loss": 0.003, "step": 170810 }, { "epoch": 1.4424014692533407, "grad_norm": 0.7212485671043396, "learning_rate": 2.1866535296134607e-06, "loss": 0.0094, "step": 170820 }, { "epoch": 1.4424859091005044, "grad_norm": 0.3365471065044403, "learning_rate": 2.1860443931364044e-06, "loss": 0.0035, "step": 170830 }, { "epoch": 1.4425703489476684, "grad_norm": 0.4327029287815094, "learning_rate": 2.185435317777886e-06, "loss": 0.0074, "step": 170840 }, { "epoch": 1.4426547887948322, "grad_norm": 0.23483309149742126, "learning_rate": 2.184826303551137e-06, "loss": 0.0046, "step": 170850 }, { "epoch": 1.4427392286419962, "grad_norm": 0.5574667453765869, "learning_rate": 2.184217350469382e-06, "loss": 0.0064, "step": 170860 }, { "epoch": 1.44282366848916, "grad_norm": 0.22881338000297546, "learning_rate": 2.183608458545851e-06, "loss": 0.0081, "step": 170870 }, { "epoch": 1.442908108336324, "grad_norm": 0.24176332354545593, "learning_rate": 2.182999627793766e-06, "loss": 0.0032, "step": 170880 }, { "epoch": 1.4429925481834878, "grad_norm": 0.1770932525396347, "learning_rate": 2.1823908582263528e-06, "loss": 0.0042, "step": 170890 }, { "epoch": 1.4430769880306515, "grad_norm": 0.2031095325946808, "learning_rate": 2.181782149856833e-06, "loss": 0.0071, "step": 170900 }, { "epoch": 1.4431614278778155, "grad_norm": 0.12240690737962723, "learning_rate": 2.1811735026984276e-06, "loss": 0.0044, "step": 170910 }, { "epoch": 1.4432458677249795, "grad_norm": 0.143039733171463, "learning_rate": 2.1805649167643538e-06, "loss": 0.0051, "step": 170920 }, { "epoch": 1.4433303075721433, "grad_norm": 0.27737101912498474, "learning_rate": 2.1799563920678347e-06, "loss": 0.0071, "step": 170930 }, { "epoch": 1.443414747419307, "grad_norm": 0.10960625857114792, "learning_rate": 2.179347928622084e-06, "loss": 0.0047, "step": 170940 }, { "epoch": 1.443499187266471, "grad_norm": 0.18729887902736664, "learning_rate": 2.178739526440318e-06, "loss": 0.0036, "step": 170950 }, { "epoch": 1.4435836271136349, "grad_norm": 0.20844605565071106, "learning_rate": 2.1781311855357516e-06, "loss": 0.0043, "step": 170960 }, { "epoch": 1.4436680669607989, "grad_norm": 0.32325059175491333, "learning_rate": 2.1775229059215984e-06, "loss": 0.0037, "step": 170970 }, { "epoch": 1.4437525068079626, "grad_norm": 0.4763135612010956, "learning_rate": 2.1769146876110694e-06, "loss": 0.0076, "step": 170980 }, { "epoch": 1.4438369466551266, "grad_norm": 0.29047486186027527, "learning_rate": 2.1763065306173727e-06, "loss": 0.0039, "step": 170990 }, { "epoch": 1.4439213865022904, "grad_norm": 0.07446349412202835, "learning_rate": 2.175698434953721e-06, "loss": 0.0118, "step": 171000 }, { "epoch": 1.4440058263494544, "grad_norm": 0.2700245678424835, "learning_rate": 2.175090400633319e-06, "loss": 0.0105, "step": 171010 }, { "epoch": 1.4440902661966182, "grad_norm": 0.6938925385475159, "learning_rate": 2.1744824276693767e-06, "loss": 0.0057, "step": 171020 }, { "epoch": 1.444174706043782, "grad_norm": 0.35554927587509155, "learning_rate": 2.1738745160750953e-06, "loss": 0.0073, "step": 171030 }, { "epoch": 1.444259145890946, "grad_norm": 0.028236279264092445, "learning_rate": 2.173266665863682e-06, "loss": 0.0087, "step": 171040 }, { "epoch": 1.44434358573811, "grad_norm": 0.2646685540676117, "learning_rate": 2.172658877048337e-06, "loss": 0.0094, "step": 171050 }, { "epoch": 1.4444280255852737, "grad_norm": 0.03964401036500931, "learning_rate": 2.1720511496422624e-06, "loss": 0.0056, "step": 171060 }, { "epoch": 1.4445124654324375, "grad_norm": 0.019210465252399445, "learning_rate": 2.1714434836586574e-06, "loss": 0.0041, "step": 171070 }, { "epoch": 1.4445969052796015, "grad_norm": 0.028708213940262794, "learning_rate": 2.170835879110718e-06, "loss": 0.0061, "step": 171080 }, { "epoch": 1.4446813451267653, "grad_norm": 0.3853462338447571, "learning_rate": 2.1702283360116453e-06, "loss": 0.0123, "step": 171090 }, { "epoch": 1.4447657849739293, "grad_norm": 0.052612606436014175, "learning_rate": 2.169620854374632e-06, "loss": 0.0035, "step": 171100 }, { "epoch": 1.444850224821093, "grad_norm": 0.4302632212638855, "learning_rate": 2.1690134342128756e-06, "loss": 0.0077, "step": 171110 }, { "epoch": 1.4449346646682568, "grad_norm": 0.2044571489095688, "learning_rate": 2.1684060755395646e-06, "loss": 0.0042, "step": 171120 }, { "epoch": 1.4450191045154208, "grad_norm": 0.37605807185173035, "learning_rate": 2.1677987783678963e-06, "loss": 0.0206, "step": 171130 }, { "epoch": 1.4451035443625848, "grad_norm": 1.3152673244476318, "learning_rate": 2.1671915427110573e-06, "loss": 0.0094, "step": 171140 }, { "epoch": 1.4451879842097486, "grad_norm": 0.10849104076623917, "learning_rate": 2.166584368582238e-06, "loss": 0.0036, "step": 171150 }, { "epoch": 1.4452724240569124, "grad_norm": 0.10598822683095932, "learning_rate": 2.1659772559946236e-06, "loss": 0.0058, "step": 171160 }, { "epoch": 1.4453568639040764, "grad_norm": 0.4964991807937622, "learning_rate": 2.1653702049614044e-06, "loss": 0.005, "step": 171170 }, { "epoch": 1.4454413037512401, "grad_norm": 0.13913406431674957, "learning_rate": 2.1647632154957634e-06, "loss": 0.0123, "step": 171180 }, { "epoch": 1.4455257435984041, "grad_norm": 0.04799291864037514, "learning_rate": 2.164156287610882e-06, "loss": 0.0037, "step": 171190 }, { "epoch": 1.445610183445568, "grad_norm": 0.1917276233434677, "learning_rate": 2.1635494213199478e-06, "loss": 0.0033, "step": 171200 }, { "epoch": 1.445694623292732, "grad_norm": 0.6465777158737183, "learning_rate": 2.1629426166361362e-06, "loss": 0.0137, "step": 171210 }, { "epoch": 1.4457790631398957, "grad_norm": 0.24297483265399933, "learning_rate": 2.162335873572632e-06, "loss": 0.0051, "step": 171220 }, { "epoch": 1.4458635029870597, "grad_norm": 0.15889817476272583, "learning_rate": 2.1617291921426107e-06, "loss": 0.0067, "step": 171230 }, { "epoch": 1.4459479428342235, "grad_norm": 0.24129168689250946, "learning_rate": 2.16112257235925e-06, "loss": 0.0049, "step": 171240 }, { "epoch": 1.4460323826813872, "grad_norm": 0.16831405460834503, "learning_rate": 2.1605160142357235e-06, "loss": 0.0075, "step": 171250 }, { "epoch": 1.4461168225285512, "grad_norm": 0.10116491466760635, "learning_rate": 2.159909517785209e-06, "loss": 0.0035, "step": 171260 }, { "epoch": 1.4462012623757152, "grad_norm": 0.5188562273979187, "learning_rate": 2.1593030830208756e-06, "loss": 0.0085, "step": 171270 }, { "epoch": 1.446285702222879, "grad_norm": 0.2819933593273163, "learning_rate": 2.1586967099558996e-06, "loss": 0.012, "step": 171280 }, { "epoch": 1.4463701420700428, "grad_norm": 0.14149995148181915, "learning_rate": 2.1580903986034485e-06, "loss": 0.0041, "step": 171290 }, { "epoch": 1.4464545819172068, "grad_norm": 0.10921365767717361, "learning_rate": 2.157484148976692e-06, "loss": 0.0091, "step": 171300 }, { "epoch": 1.4465390217643705, "grad_norm": 0.9165326952934265, "learning_rate": 2.156877961088796e-06, "loss": 0.0109, "step": 171310 }, { "epoch": 1.4466234616115345, "grad_norm": 0.43446627259254456, "learning_rate": 2.156271834952928e-06, "loss": 0.0068, "step": 171320 }, { "epoch": 1.4467079014586983, "grad_norm": 0.5772121548652649, "learning_rate": 2.1556657705822535e-06, "loss": 0.0092, "step": 171330 }, { "epoch": 1.4467923413058623, "grad_norm": 0.2158561646938324, "learning_rate": 2.1550597679899344e-06, "loss": 0.0035, "step": 171340 }, { "epoch": 1.446876781153026, "grad_norm": 1.1043874025344849, "learning_rate": 2.154453827189136e-06, "loss": 0.0072, "step": 171350 }, { "epoch": 1.44696122100019, "grad_norm": 0.3205132782459259, "learning_rate": 2.1538479481930154e-06, "loss": 0.0048, "step": 171360 }, { "epoch": 1.4470456608473539, "grad_norm": 0.6868729591369629, "learning_rate": 2.1532421310147363e-06, "loss": 0.005, "step": 171370 }, { "epoch": 1.4471301006945176, "grad_norm": 0.7751705646514893, "learning_rate": 2.1526363756674538e-06, "loss": 0.02, "step": 171380 }, { "epoch": 1.4472145405416816, "grad_norm": 1.0553644895553589, "learning_rate": 2.1520306821643265e-06, "loss": 0.0092, "step": 171390 }, { "epoch": 1.4472989803888454, "grad_norm": 0.4098161458969116, "learning_rate": 2.1514250505185073e-06, "loss": 0.0049, "step": 171400 }, { "epoch": 1.4473834202360094, "grad_norm": 0.5002874732017517, "learning_rate": 2.150819480743154e-06, "loss": 0.0108, "step": 171410 }, { "epoch": 1.4474678600831732, "grad_norm": 0.17238248884677887, "learning_rate": 2.1502139728514177e-06, "loss": 0.0049, "step": 171420 }, { "epoch": 1.4475522999303372, "grad_norm": 0.45852425694465637, "learning_rate": 2.149608526856448e-06, "loss": 0.0067, "step": 171430 }, { "epoch": 1.447636739777501, "grad_norm": 0.32716798782348633, "learning_rate": 2.149003142771399e-06, "loss": 0.0041, "step": 171440 }, { "epoch": 1.447721179624665, "grad_norm": 0.19292372465133667, "learning_rate": 2.148397820609416e-06, "loss": 0.0054, "step": 171450 }, { "epoch": 1.4478056194718287, "grad_norm": 0.20938433706760406, "learning_rate": 2.1477925603836493e-06, "loss": 0.006, "step": 171460 }, { "epoch": 1.4478900593189925, "grad_norm": 0.06514310091733932, "learning_rate": 2.1471873621072438e-06, "loss": 0.0099, "step": 171470 }, { "epoch": 1.4479744991661565, "grad_norm": 0.4681106209754944, "learning_rate": 2.1465822257933443e-06, "loss": 0.0076, "step": 171480 }, { "epoch": 1.4480589390133205, "grad_norm": 0.09201142191886902, "learning_rate": 2.1459771514550927e-06, "loss": 0.0048, "step": 171490 }, { "epoch": 1.4481433788604843, "grad_norm": 0.24706842005252838, "learning_rate": 2.1453721391056336e-06, "loss": 0.0068, "step": 171500 }, { "epoch": 1.448227818707648, "grad_norm": 0.395829439163208, "learning_rate": 2.1447671887581053e-06, "loss": 0.0083, "step": 171510 }, { "epoch": 1.448312258554812, "grad_norm": 0.2506791651248932, "learning_rate": 2.14416230042565e-06, "loss": 0.0052, "step": 171520 }, { "epoch": 1.4483966984019758, "grad_norm": 0.18471352756023407, "learning_rate": 2.143557474121405e-06, "loss": 0.0074, "step": 171530 }, { "epoch": 1.4484811382491398, "grad_norm": 0.1958678662776947, "learning_rate": 2.142952709858504e-06, "loss": 0.0077, "step": 171540 }, { "epoch": 1.4485655780963036, "grad_norm": 0.1867895871400833, "learning_rate": 2.142348007650088e-06, "loss": 0.0025, "step": 171550 }, { "epoch": 1.4486500179434676, "grad_norm": 0.08578799664974213, "learning_rate": 2.1417433675092845e-06, "loss": 0.003, "step": 171560 }, { "epoch": 1.4487344577906314, "grad_norm": 0.349261075258255, "learning_rate": 2.1411387894492313e-06, "loss": 0.0058, "step": 171570 }, { "epoch": 1.4488188976377954, "grad_norm": 0.21868880093097687, "learning_rate": 2.140534273483056e-06, "loss": 0.0066, "step": 171580 }, { "epoch": 1.4489033374849591, "grad_norm": 0.29854607582092285, "learning_rate": 2.1399298196238918e-06, "loss": 0.0057, "step": 171590 }, { "epoch": 1.448987777332123, "grad_norm": 0.004909161943942308, "learning_rate": 2.139325427884864e-06, "loss": 0.0072, "step": 171600 }, { "epoch": 1.449072217179287, "grad_norm": 0.2077229619026184, "learning_rate": 2.1387210982791033e-06, "loss": 0.0046, "step": 171610 }, { "epoch": 1.449156657026451, "grad_norm": 0.08460550010204315, "learning_rate": 2.138116830819735e-06, "loss": 0.0062, "step": 171620 }, { "epoch": 1.4492410968736147, "grad_norm": 0.24997597932815552, "learning_rate": 2.137512625519882e-06, "loss": 0.0064, "step": 171630 }, { "epoch": 1.4493255367207785, "grad_norm": 0.09836467355489731, "learning_rate": 2.136908482392666e-06, "loss": 0.0062, "step": 171640 }, { "epoch": 1.4494099765679425, "grad_norm": 0.042583730071783066, "learning_rate": 2.1363044014512136e-06, "loss": 0.0065, "step": 171650 }, { "epoch": 1.4494944164151062, "grad_norm": 0.7308726906776428, "learning_rate": 2.135700382708642e-06, "loss": 0.0071, "step": 171660 }, { "epoch": 1.4495788562622702, "grad_norm": 0.3412436544895172, "learning_rate": 2.13509642617807e-06, "loss": 0.0072, "step": 171670 }, { "epoch": 1.449663296109434, "grad_norm": 0.3951902985572815, "learning_rate": 2.1344925318726186e-06, "loss": 0.0092, "step": 171680 }, { "epoch": 1.4497477359565978, "grad_norm": 0.05839700996875763, "learning_rate": 2.1338886998053994e-06, "loss": 0.0033, "step": 171690 }, { "epoch": 1.4498321758037618, "grad_norm": 0.08382397145032883, "learning_rate": 2.1332849299895325e-06, "loss": 0.0036, "step": 171700 }, { "epoch": 1.4499166156509258, "grad_norm": 0.4883076250553131, "learning_rate": 2.13268122243813e-06, "loss": 0.0069, "step": 171710 }, { "epoch": 1.4500010554980896, "grad_norm": 0.7006595134735107, "learning_rate": 2.1320775771643034e-06, "loss": 0.0083, "step": 171720 }, { "epoch": 1.4500854953452533, "grad_norm": 0.1438993215560913, "learning_rate": 2.1314739941811623e-06, "loss": 0.0106, "step": 171730 }, { "epoch": 1.4501699351924173, "grad_norm": 0.1061142235994339, "learning_rate": 2.1308704735018197e-06, "loss": 0.013, "step": 171740 }, { "epoch": 1.450254375039581, "grad_norm": 0.0029756349977105856, "learning_rate": 2.130267015139381e-06, "loss": 0.0077, "step": 171750 }, { "epoch": 1.450338814886745, "grad_norm": 0.12311706691980362, "learning_rate": 2.129663619106957e-06, "loss": 0.0077, "step": 171760 }, { "epoch": 1.4504232547339089, "grad_norm": 0.7816860675811768, "learning_rate": 2.1290602854176507e-06, "loss": 0.0093, "step": 171770 }, { "epoch": 1.4505076945810729, "grad_norm": 0.11235383152961731, "learning_rate": 2.1284570140845646e-06, "loss": 0.0055, "step": 171780 }, { "epoch": 1.4505921344282366, "grad_norm": 0.40908050537109375, "learning_rate": 2.127853805120805e-06, "loss": 0.0073, "step": 171790 }, { "epoch": 1.4506765742754006, "grad_norm": 0.15615002810955048, "learning_rate": 2.127250658539473e-06, "loss": 0.0048, "step": 171800 }, { "epoch": 1.4507610141225644, "grad_norm": 0.6103613376617432, "learning_rate": 2.126647574353668e-06, "loss": 0.0117, "step": 171810 }, { "epoch": 1.4508454539697282, "grad_norm": 0.20264281332492828, "learning_rate": 2.126044552576487e-06, "loss": 0.0075, "step": 171820 }, { "epoch": 1.4509298938168922, "grad_norm": 0.27089816331863403, "learning_rate": 2.125441593221031e-06, "loss": 0.0078, "step": 171830 }, { "epoch": 1.4510143336640562, "grad_norm": 0.0030967441853135824, "learning_rate": 2.124838696300393e-06, "loss": 0.0047, "step": 171840 }, { "epoch": 1.45109877351122, "grad_norm": 0.08850914239883423, "learning_rate": 2.124235861827671e-06, "loss": 0.0062, "step": 171850 }, { "epoch": 1.4511832133583837, "grad_norm": 0.2504720091819763, "learning_rate": 2.1236330898159553e-06, "loss": 0.006, "step": 171860 }, { "epoch": 1.4512676532055477, "grad_norm": 0.17034295201301575, "learning_rate": 2.1230303802783426e-06, "loss": 0.004, "step": 171870 }, { "epoch": 1.4513520930527115, "grad_norm": 0.2726561427116394, "learning_rate": 2.122427733227917e-06, "loss": 0.0114, "step": 171880 }, { "epoch": 1.4514365328998755, "grad_norm": 0.29661035537719727, "learning_rate": 2.121825148677773e-06, "loss": 0.0154, "step": 171890 }, { "epoch": 1.4515209727470393, "grad_norm": 0.22345660626888275, "learning_rate": 2.1212226266409975e-06, "loss": 0.0052, "step": 171900 }, { "epoch": 1.4516054125942033, "grad_norm": 0.6180976033210754, "learning_rate": 2.1206201671306744e-06, "loss": 0.0096, "step": 171910 }, { "epoch": 1.451689852441367, "grad_norm": 0.3967593312263489, "learning_rate": 2.120017770159893e-06, "loss": 0.0086, "step": 171920 }, { "epoch": 1.451774292288531, "grad_norm": 0.30098429322242737, "learning_rate": 2.1194154357417334e-06, "loss": 0.0058, "step": 171930 }, { "epoch": 1.4518587321356948, "grad_norm": 0.1349337100982666, "learning_rate": 2.118813163889283e-06, "loss": 0.0065, "step": 171940 }, { "epoch": 1.4519431719828586, "grad_norm": 0.4306827783584595, "learning_rate": 2.1182109546156193e-06, "loss": 0.0081, "step": 171950 }, { "epoch": 1.4520276118300226, "grad_norm": 0.10698535293340683, "learning_rate": 2.1176088079338235e-06, "loss": 0.0078, "step": 171960 }, { "epoch": 1.4521120516771866, "grad_norm": 0.25319230556488037, "learning_rate": 2.1170067238569718e-06, "loss": 0.0105, "step": 171970 }, { "epoch": 1.4521964915243504, "grad_norm": 0.4444197416305542, "learning_rate": 2.1164047023981454e-06, "loss": 0.0079, "step": 171980 }, { "epoch": 1.4522809313715141, "grad_norm": 0.06271631270647049, "learning_rate": 2.1158027435704166e-06, "loss": 0.0077, "step": 171990 }, { "epoch": 1.4523653712186781, "grad_norm": 0.35348108410835266, "learning_rate": 2.115200847386862e-06, "loss": 0.0071, "step": 172000 }, { "epoch": 1.452449811065842, "grad_norm": 0.318965882062912, "learning_rate": 2.1145990138605543e-06, "loss": 0.0135, "step": 172010 }, { "epoch": 1.452534250913006, "grad_norm": 0.3391469717025757, "learning_rate": 2.113997243004563e-06, "loss": 0.0146, "step": 172020 }, { "epoch": 1.4526186907601697, "grad_norm": 0.2368112951517105, "learning_rate": 2.1133955348319618e-06, "loss": 0.0065, "step": 172030 }, { "epoch": 1.4527031306073335, "grad_norm": 0.04702889546751976, "learning_rate": 2.112793889355818e-06, "loss": 0.0028, "step": 172040 }, { "epoch": 1.4527875704544975, "grad_norm": 0.11091183871030807, "learning_rate": 2.1121923065891996e-06, "loss": 0.0085, "step": 172050 }, { "epoch": 1.4528720103016615, "grad_norm": 0.8070999979972839, "learning_rate": 2.111590786545171e-06, "loss": 0.0069, "step": 172060 }, { "epoch": 1.4529564501488252, "grad_norm": 0.06403181701898575, "learning_rate": 2.1109893292367994e-06, "loss": 0.0046, "step": 172070 }, { "epoch": 1.453040889995989, "grad_norm": 0.09019322693347931, "learning_rate": 2.110387934677146e-06, "loss": 0.0025, "step": 172080 }, { "epoch": 1.453125329843153, "grad_norm": 0.006285178940743208, "learning_rate": 2.109786602879277e-06, "loss": 0.0233, "step": 172090 }, { "epoch": 1.4532097696903168, "grad_norm": 0.6279982328414917, "learning_rate": 2.1091853338562484e-06, "loss": 0.0089, "step": 172100 }, { "epoch": 1.4532942095374808, "grad_norm": 0.022181136533617973, "learning_rate": 2.108584127621124e-06, "loss": 0.005, "step": 172110 }, { "epoch": 1.4533786493846446, "grad_norm": 0.12265709787607193, "learning_rate": 2.1079829841869597e-06, "loss": 0.0052, "step": 172120 }, { "epoch": 1.4534630892318086, "grad_norm": 0.29334551095962524, "learning_rate": 2.1073819035668125e-06, "loss": 0.0092, "step": 172130 }, { "epoch": 1.4535475290789723, "grad_norm": 0.42405885457992554, "learning_rate": 2.106780885773738e-06, "loss": 0.009, "step": 172140 }, { "epoch": 1.4536319689261363, "grad_norm": 0.5749354362487793, "learning_rate": 2.106179930820787e-06, "loss": 0.0074, "step": 172150 }, { "epoch": 1.4537164087733, "grad_norm": 0.10763535648584366, "learning_rate": 2.1055790387210175e-06, "loss": 0.0045, "step": 172160 }, { "epoch": 1.4538008486204639, "grad_norm": 0.760193943977356, "learning_rate": 2.104978209487476e-06, "loss": 0.0114, "step": 172170 }, { "epoch": 1.4538852884676279, "grad_norm": 0.06641912460327148, "learning_rate": 2.1043774431332164e-06, "loss": 0.0071, "step": 172180 }, { "epoch": 1.4539697283147919, "grad_norm": 0.004408693872392178, "learning_rate": 2.1037767396712856e-06, "loss": 0.0047, "step": 172190 }, { "epoch": 1.4540541681619557, "grad_norm": 0.29527032375335693, "learning_rate": 2.10317609911473e-06, "loss": 0.0084, "step": 172200 }, { "epoch": 1.4541386080091194, "grad_norm": 0.06320462375879288, "learning_rate": 2.102575521476594e-06, "loss": 0.0039, "step": 172210 }, { "epoch": 1.4542230478562834, "grad_norm": 0.19055838882923126, "learning_rate": 2.1019750067699264e-06, "loss": 0.0073, "step": 172220 }, { "epoch": 1.4543074877034472, "grad_norm": 0.6775668263435364, "learning_rate": 2.1013745550077656e-06, "loss": 0.0104, "step": 172230 }, { "epoch": 1.4543919275506112, "grad_norm": 0.38292786478996277, "learning_rate": 2.100774166203158e-06, "loss": 0.0073, "step": 172240 }, { "epoch": 1.454476367397775, "grad_norm": 0.21832706034183502, "learning_rate": 2.1001738403691406e-06, "loss": 0.0072, "step": 172250 }, { "epoch": 1.454560807244939, "grad_norm": 0.15141811966896057, "learning_rate": 2.0995735775187513e-06, "loss": 0.0038, "step": 172260 }, { "epoch": 1.4546452470921027, "grad_norm": 0.04852305352687836, "learning_rate": 2.098973377665032e-06, "loss": 0.0067, "step": 172270 }, { "epoch": 1.4547296869392667, "grad_norm": 0.23561182618141174, "learning_rate": 2.0983732408210155e-06, "loss": 0.0065, "step": 172280 }, { "epoch": 1.4548141267864305, "grad_norm": 0.25800302624702454, "learning_rate": 2.0977731669997385e-06, "loss": 0.0042, "step": 172290 }, { "epoch": 1.4548985666335943, "grad_norm": 0.3388892114162445, "learning_rate": 2.0971731562142316e-06, "loss": 0.0054, "step": 172300 }, { "epoch": 1.4549830064807583, "grad_norm": 0.26422882080078125, "learning_rate": 2.0965732084775303e-06, "loss": 0.0069, "step": 172310 }, { "epoch": 1.455067446327922, "grad_norm": 0.14860565960407257, "learning_rate": 2.0959733238026626e-06, "loss": 0.0081, "step": 172320 }, { "epoch": 1.455151886175086, "grad_norm": 0.3842429220676422, "learning_rate": 2.0953735022026604e-06, "loss": 0.0128, "step": 172330 }, { "epoch": 1.4552363260222498, "grad_norm": 0.054491836577653885, "learning_rate": 2.0947737436905495e-06, "loss": 0.0099, "step": 172340 }, { "epoch": 1.4553207658694138, "grad_norm": 0.3957909643650055, "learning_rate": 2.094174048279359e-06, "loss": 0.0073, "step": 172350 }, { "epoch": 1.4554052057165776, "grad_norm": 0.047723621129989624, "learning_rate": 2.093574415982113e-06, "loss": 0.0104, "step": 172360 }, { "epoch": 1.4554896455637416, "grad_norm": 0.5958817005157471, "learning_rate": 2.092974846811834e-06, "loss": 0.0055, "step": 172370 }, { "epoch": 1.4555740854109054, "grad_norm": 0.025031080469489098, "learning_rate": 2.092375340781547e-06, "loss": 0.0061, "step": 172380 }, { "epoch": 1.4556585252580692, "grad_norm": 0.0024568464141339064, "learning_rate": 2.0917758979042697e-06, "loss": 0.0041, "step": 172390 }, { "epoch": 1.4557429651052332, "grad_norm": 0.02207295037806034, "learning_rate": 2.0911765181930254e-06, "loss": 0.0073, "step": 172400 }, { "epoch": 1.4558274049523972, "grad_norm": 0.5053673982620239, "learning_rate": 2.090577201660829e-06, "loss": 0.005, "step": 172410 }, { "epoch": 1.455911844799561, "grad_norm": 0.04833013191819191, "learning_rate": 2.089977948320702e-06, "loss": 0.0055, "step": 172420 }, { "epoch": 1.4559962846467247, "grad_norm": 0.14771565794944763, "learning_rate": 2.0893787581856557e-06, "loss": 0.0063, "step": 172430 }, { "epoch": 1.4560807244938887, "grad_norm": 0.3311850428581238, "learning_rate": 2.0887796312687074e-06, "loss": 0.0082, "step": 172440 }, { "epoch": 1.4561651643410525, "grad_norm": 0.1870696246623993, "learning_rate": 2.0881805675828697e-06, "loss": 0.0046, "step": 172450 }, { "epoch": 1.4562496041882165, "grad_norm": 0.0965554490685463, "learning_rate": 2.0875815671411533e-06, "loss": 0.0078, "step": 172460 }, { "epoch": 1.4563340440353802, "grad_norm": 0.3616187572479248, "learning_rate": 2.0869826299565667e-06, "loss": 0.0065, "step": 172470 }, { "epoch": 1.4564184838825442, "grad_norm": 0.6065681576728821, "learning_rate": 2.0863837560421217e-06, "loss": 0.0109, "step": 172480 }, { "epoch": 1.456502923729708, "grad_norm": 0.01215402688831091, "learning_rate": 2.085784945410825e-06, "loss": 0.0089, "step": 172490 }, { "epoch": 1.456587363576872, "grad_norm": 0.3557158410549164, "learning_rate": 2.08518619807568e-06, "loss": 0.0064, "step": 172500 }, { "epoch": 1.4566718034240358, "grad_norm": 0.27613839507102966, "learning_rate": 2.0845875140496956e-06, "loss": 0.0092, "step": 172510 }, { "epoch": 1.4567562432711996, "grad_norm": 0.18962255120277405, "learning_rate": 2.083988893345873e-06, "loss": 0.0108, "step": 172520 }, { "epoch": 1.4568406831183636, "grad_norm": 0.19080166518688202, "learning_rate": 2.0833903359772135e-06, "loss": 0.0057, "step": 172530 }, { "epoch": 1.4569251229655276, "grad_norm": 0.11232315748929977, "learning_rate": 2.0827918419567167e-06, "loss": 0.01, "step": 172540 }, { "epoch": 1.4570095628126913, "grad_norm": 0.12690207362174988, "learning_rate": 2.0821934112973853e-06, "loss": 0.0106, "step": 172550 }, { "epoch": 1.4570940026598551, "grad_norm": 0.8129735589027405, "learning_rate": 2.0815950440122133e-06, "loss": 0.009, "step": 172560 }, { "epoch": 1.4571784425070191, "grad_norm": 0.13207069039344788, "learning_rate": 2.0809967401142005e-06, "loss": 0.0052, "step": 172570 }, { "epoch": 1.4572628823541829, "grad_norm": 0.12213319540023804, "learning_rate": 2.080398499616339e-06, "loss": 0.0044, "step": 172580 }, { "epoch": 1.4573473222013469, "grad_norm": 0.3084638714790344, "learning_rate": 2.079800322531625e-06, "loss": 0.0052, "step": 172590 }, { "epoch": 1.4574317620485107, "grad_norm": 0.2742963433265686, "learning_rate": 2.0792022088730503e-06, "loss": 0.0099, "step": 172600 }, { "epoch": 1.4575162018956744, "grad_norm": 0.25683724880218506, "learning_rate": 2.0786041586536044e-06, "loss": 0.0066, "step": 172610 }, { "epoch": 1.4576006417428384, "grad_norm": 0.35434019565582275, "learning_rate": 2.0780061718862776e-06, "loss": 0.006, "step": 172620 }, { "epoch": 1.4576850815900024, "grad_norm": 0.4770048260688782, "learning_rate": 2.0774082485840564e-06, "loss": 0.0151, "step": 172630 }, { "epoch": 1.4577695214371662, "grad_norm": 0.30909404158592224, "learning_rate": 2.0768103887599306e-06, "loss": 0.0067, "step": 172640 }, { "epoch": 1.45785396128433, "grad_norm": 0.45596253871917725, "learning_rate": 2.0762125924268826e-06, "loss": 0.0041, "step": 172650 }, { "epoch": 1.457938401131494, "grad_norm": 0.27951687574386597, "learning_rate": 2.0756148595978997e-06, "loss": 0.0051, "step": 172660 }, { "epoch": 1.4580228409786578, "grad_norm": 0.3988695740699768, "learning_rate": 2.0750171902859612e-06, "loss": 0.0088, "step": 172670 }, { "epoch": 1.4581072808258217, "grad_norm": 0.31452369689941406, "learning_rate": 2.0744195845040514e-06, "loss": 0.0053, "step": 172680 }, { "epoch": 1.4581917206729855, "grad_norm": 0.6534383296966553, "learning_rate": 2.0738220422651493e-06, "loss": 0.0062, "step": 172690 }, { "epoch": 1.4582761605201495, "grad_norm": 0.11709684878587723, "learning_rate": 2.0732245635822324e-06, "loss": 0.0052, "step": 172700 }, { "epoch": 1.4583606003673133, "grad_norm": 0.06498857587575912, "learning_rate": 2.072627148468278e-06, "loss": 0.0054, "step": 172710 }, { "epoch": 1.4584450402144773, "grad_norm": 0.06248990818858147, "learning_rate": 2.0720297969362606e-06, "loss": 0.0031, "step": 172720 }, { "epoch": 1.458529480061641, "grad_norm": 0.15514357388019562, "learning_rate": 2.071432508999158e-06, "loss": 0.0097, "step": 172730 }, { "epoch": 1.4586139199088048, "grad_norm": 0.3765728771686554, "learning_rate": 2.0708352846699393e-06, "loss": 0.0049, "step": 172740 }, { "epoch": 1.4586983597559688, "grad_norm": 0.9159442782402039, "learning_rate": 2.0702381239615793e-06, "loss": 0.012, "step": 172750 }, { "epoch": 1.4587827996031328, "grad_norm": 0.20299383997917175, "learning_rate": 2.0696410268870455e-06, "loss": 0.0089, "step": 172760 }, { "epoch": 1.4588672394502966, "grad_norm": 0.07864031940698624, "learning_rate": 2.0690439934593115e-06, "loss": 0.009, "step": 172770 }, { "epoch": 1.4589516792974604, "grad_norm": 0.06443092226982117, "learning_rate": 2.0684470236913377e-06, "loss": 0.0039, "step": 172780 }, { "epoch": 1.4590361191446244, "grad_norm": 0.5413389801979065, "learning_rate": 2.067850117596096e-06, "loss": 0.0124, "step": 172790 }, { "epoch": 1.4591205589917882, "grad_norm": 0.20861247181892395, "learning_rate": 2.0672532751865464e-06, "loss": 0.0075, "step": 172800 }, { "epoch": 1.4592049988389522, "grad_norm": 0.24261927604675293, "learning_rate": 2.0666564964756575e-06, "loss": 0.0083, "step": 172810 }, { "epoch": 1.459289438686116, "grad_norm": 0.18215598165988922, "learning_rate": 2.0660597814763854e-06, "loss": 0.006, "step": 172820 }, { "epoch": 1.45937387853328, "grad_norm": 0.1119922548532486, "learning_rate": 2.0654631302016966e-06, "loss": 0.0085, "step": 172830 }, { "epoch": 1.4594583183804437, "grad_norm": 0.3775191903114319, "learning_rate": 2.0648665426645464e-06, "loss": 0.0109, "step": 172840 }, { "epoch": 1.4595427582276077, "grad_norm": 0.1535586416721344, "learning_rate": 2.0642700188778935e-06, "loss": 0.0057, "step": 172850 }, { "epoch": 1.4596271980747715, "grad_norm": 0.052149154245853424, "learning_rate": 2.0636735588546946e-06, "loss": 0.01, "step": 172860 }, { "epoch": 1.4597116379219353, "grad_norm": 0.01039521861821413, "learning_rate": 2.0630771626079026e-06, "loss": 0.0123, "step": 172870 }, { "epoch": 1.4597960777690993, "grad_norm": 0.37324002385139465, "learning_rate": 2.062480830150474e-06, "loss": 0.0078, "step": 172880 }, { "epoch": 1.4598805176162633, "grad_norm": 0.022033926099538803, "learning_rate": 2.0618845614953588e-06, "loss": 0.0125, "step": 172890 }, { "epoch": 1.459964957463427, "grad_norm": 0.23297694325447083, "learning_rate": 2.0612883566555097e-06, "loss": 0.0046, "step": 172900 }, { "epoch": 1.4600493973105908, "grad_norm": 0.15261372923851013, "learning_rate": 2.060692215643874e-06, "loss": 0.0085, "step": 172910 }, { "epoch": 1.4601338371577548, "grad_norm": 0.3235227167606354, "learning_rate": 2.060096138473403e-06, "loss": 0.0072, "step": 172920 }, { "epoch": 1.4602182770049186, "grad_norm": 0.17097537219524384, "learning_rate": 2.059500125157041e-06, "loss": 0.0072, "step": 172930 }, { "epoch": 1.4603027168520826, "grad_norm": 0.006340791471302509, "learning_rate": 2.0589041757077345e-06, "loss": 0.0055, "step": 172940 }, { "epoch": 1.4603871566992463, "grad_norm": 0.22984161972999573, "learning_rate": 2.058308290138426e-06, "loss": 0.0053, "step": 172950 }, { "epoch": 1.4604715965464101, "grad_norm": 0.017433080822229385, "learning_rate": 2.057712468462057e-06, "loss": 0.0085, "step": 172960 }, { "epoch": 1.4605560363935741, "grad_norm": 0.40674835443496704, "learning_rate": 2.057116710691572e-06, "loss": 0.0088, "step": 172970 }, { "epoch": 1.4606404762407381, "grad_norm": 0.41389310359954834, "learning_rate": 2.056521016839907e-06, "loss": 0.0081, "step": 172980 }, { "epoch": 1.460724916087902, "grad_norm": 0.6173913478851318, "learning_rate": 2.055925386920004e-06, "loss": 0.0081, "step": 172990 }, { "epoch": 1.4608093559350657, "grad_norm": 0.22428685426712036, "learning_rate": 2.055329820944797e-06, "loss": 0.0099, "step": 173000 }, { "epoch": 1.4608937957822297, "grad_norm": 0.6600457429885864, "learning_rate": 2.0547343189272247e-06, "loss": 0.0056, "step": 173010 }, { "epoch": 1.4609782356293934, "grad_norm": 0.0756501778960228, "learning_rate": 2.0541388808802192e-06, "loss": 0.0039, "step": 173020 }, { "epoch": 1.4610626754765574, "grad_norm": 0.2685580849647522, "learning_rate": 2.053543506816714e-06, "loss": 0.0129, "step": 173030 }, { "epoch": 1.4611471153237212, "grad_norm": 0.5003408789634705, "learning_rate": 2.0529481967496375e-06, "loss": 0.0065, "step": 173040 }, { "epoch": 1.4612315551708852, "grad_norm": 0.8583136200904846, "learning_rate": 2.052352950691924e-06, "loss": 0.0069, "step": 173050 }, { "epoch": 1.461315995018049, "grad_norm": 0.283983051776886, "learning_rate": 2.0517577686564993e-06, "loss": 0.0036, "step": 173060 }, { "epoch": 1.461400434865213, "grad_norm": 0.3300277888774872, "learning_rate": 2.051162650656293e-06, "loss": 0.0071, "step": 173070 }, { "epoch": 1.4614848747123768, "grad_norm": 0.16695234179496765, "learning_rate": 2.05056759670423e-06, "loss": 0.0056, "step": 173080 }, { "epoch": 1.4615693145595405, "grad_norm": 0.14911313354969025, "learning_rate": 2.049972606813234e-06, "loss": 0.0031, "step": 173090 }, { "epoch": 1.4616537544067045, "grad_norm": 0.535065770149231, "learning_rate": 2.0493776809962286e-06, "loss": 0.0085, "step": 173100 }, { "epoch": 1.4617381942538685, "grad_norm": 0.2998661994934082, "learning_rate": 2.0487828192661337e-06, "loss": 0.0089, "step": 173110 }, { "epoch": 1.4618226341010323, "grad_norm": 0.3638426661491394, "learning_rate": 2.048188021635873e-06, "loss": 0.0062, "step": 173120 }, { "epoch": 1.461907073948196, "grad_norm": 0.0998559519648552, "learning_rate": 2.0475932881183613e-06, "loss": 0.0084, "step": 173130 }, { "epoch": 1.46199151379536, "grad_norm": 0.4823794960975647, "learning_rate": 2.046998618726521e-06, "loss": 0.0079, "step": 173140 }, { "epoch": 1.4620759536425239, "grad_norm": 0.47992604970932007, "learning_rate": 2.046404013473263e-06, "loss": 0.0075, "step": 173150 }, { "epoch": 1.4621603934896878, "grad_norm": 0.20203544199466705, "learning_rate": 2.045809472371507e-06, "loss": 0.0064, "step": 173160 }, { "epoch": 1.4622448333368516, "grad_norm": 0.3141428828239441, "learning_rate": 2.045214995434164e-06, "loss": 0.0069, "step": 173170 }, { "epoch": 1.4623292731840154, "grad_norm": 0.41439124941825867, "learning_rate": 2.044620582674145e-06, "loss": 0.0122, "step": 173180 }, { "epoch": 1.4624137130311794, "grad_norm": 0.24046988785266876, "learning_rate": 2.0440262341043616e-06, "loss": 0.0059, "step": 173190 }, { "epoch": 1.4624981528783434, "grad_norm": 0.35113173723220825, "learning_rate": 2.0434319497377216e-06, "loss": 0.0078, "step": 173200 }, { "epoch": 1.4625825927255072, "grad_norm": 0.24465367197990417, "learning_rate": 2.042837729587134e-06, "loss": 0.0065, "step": 173210 }, { "epoch": 1.462667032572671, "grad_norm": 0.10946530103683472, "learning_rate": 2.0422435736655046e-06, "loss": 0.0092, "step": 173220 }, { "epoch": 1.462751472419835, "grad_norm": 0.25685641169548035, "learning_rate": 2.0416494819857404e-06, "loss": 0.0095, "step": 173230 }, { "epoch": 1.4628359122669987, "grad_norm": 0.3812575340270996, "learning_rate": 2.0410554545607407e-06, "loss": 0.0048, "step": 173240 }, { "epoch": 1.4629203521141627, "grad_norm": 0.17541901767253876, "learning_rate": 2.040461491403412e-06, "loss": 0.0071, "step": 173250 }, { "epoch": 1.4630047919613265, "grad_norm": 0.3309074640274048, "learning_rate": 2.0398675925266537e-06, "loss": 0.0074, "step": 173260 }, { "epoch": 1.4630892318084905, "grad_norm": 0.8577284812927246, "learning_rate": 2.039273757943365e-06, "loss": 0.0058, "step": 173270 }, { "epoch": 1.4631736716556543, "grad_norm": 0.158359557390213, "learning_rate": 2.038679987666441e-06, "loss": 0.0083, "step": 173280 }, { "epoch": 1.4632581115028183, "grad_norm": 0.42889559268951416, "learning_rate": 2.0380862817087833e-06, "loss": 0.0084, "step": 173290 }, { "epoch": 1.463342551349982, "grad_norm": 0.1874287724494934, "learning_rate": 2.0374926400832824e-06, "loss": 0.0045, "step": 173300 }, { "epoch": 1.4634269911971458, "grad_norm": 0.29365965723991394, "learning_rate": 2.036899062802836e-06, "loss": 0.0077, "step": 173310 }, { "epoch": 1.4635114310443098, "grad_norm": 0.1868634968996048, "learning_rate": 2.036305549880335e-06, "loss": 0.0084, "step": 173320 }, { "epoch": 1.4635958708914738, "grad_norm": 0.0050928290002048016, "learning_rate": 2.0357121013286683e-06, "loss": 0.0064, "step": 173330 }, { "epoch": 1.4636803107386376, "grad_norm": 0.059121496975421906, "learning_rate": 2.035118717160729e-06, "loss": 0.0128, "step": 173340 }, { "epoch": 1.4637647505858014, "grad_norm": 0.13786567747592926, "learning_rate": 2.0345253973894038e-06, "loss": 0.0022, "step": 173350 }, { "epoch": 1.4638491904329654, "grad_norm": 0.6090666651725769, "learning_rate": 2.0339321420275794e-06, "loss": 0.0067, "step": 173360 }, { "epoch": 1.4639336302801291, "grad_norm": 0.13776810467243195, "learning_rate": 2.0333389510881385e-06, "loss": 0.0066, "step": 173370 }, { "epoch": 1.4640180701272931, "grad_norm": 0.18594248592853546, "learning_rate": 2.0327458245839706e-06, "loss": 0.0057, "step": 173380 }, { "epoch": 1.464102509974457, "grad_norm": 0.2646363079547882, "learning_rate": 2.0321527625279524e-06, "loss": 0.0069, "step": 173390 }, { "epoch": 1.464186949821621, "grad_norm": 0.037273555994033813, "learning_rate": 2.0315597649329703e-06, "loss": 0.0054, "step": 173400 }, { "epoch": 1.4642713896687847, "grad_norm": 0.00856708362698555, "learning_rate": 2.030966831811902e-06, "loss": 0.0052, "step": 173410 }, { "epoch": 1.4643558295159487, "grad_norm": 0.35489732027053833, "learning_rate": 2.0303739631776255e-06, "loss": 0.0044, "step": 173420 }, { "epoch": 1.4644402693631124, "grad_norm": 0.305996298789978, "learning_rate": 2.0297811590430182e-06, "loss": 0.007, "step": 173430 }, { "epoch": 1.4645247092102762, "grad_norm": 0.2894350290298462, "learning_rate": 2.0291884194209534e-06, "loss": 0.0047, "step": 173440 }, { "epoch": 1.4646091490574402, "grad_norm": 0.43876001238822937, "learning_rate": 2.028595744324309e-06, "loss": 0.0089, "step": 173450 }, { "epoch": 1.4646935889046042, "grad_norm": 0.47737792134284973, "learning_rate": 2.028003133765955e-06, "loss": 0.0063, "step": 173460 }, { "epoch": 1.464778028751768, "grad_norm": 0.3068501055240631, "learning_rate": 2.027410587758765e-06, "loss": 0.0061, "step": 173470 }, { "epoch": 1.4648624685989318, "grad_norm": 0.056708674877882004, "learning_rate": 2.0268181063156066e-06, "loss": 0.0096, "step": 173480 }, { "epoch": 1.4649469084460958, "grad_norm": 0.0017997750546783209, "learning_rate": 2.0262256894493515e-06, "loss": 0.0066, "step": 173490 }, { "epoch": 1.4650313482932595, "grad_norm": 0.14151878654956818, "learning_rate": 2.025633337172865e-06, "loss": 0.0055, "step": 173500 }, { "epoch": 1.4651157881404235, "grad_norm": 0.21168093383312225, "learning_rate": 2.0250410494990123e-06, "loss": 0.0051, "step": 173510 }, { "epoch": 1.4652002279875873, "grad_norm": 0.26086366176605225, "learning_rate": 2.024448826440657e-06, "loss": 0.0048, "step": 173520 }, { "epoch": 1.465284667834751, "grad_norm": 0.008611399680376053, "learning_rate": 2.023856668010665e-06, "loss": 0.0022, "step": 173530 }, { "epoch": 1.465369107681915, "grad_norm": 0.11082812398672104, "learning_rate": 2.023264574221897e-06, "loss": 0.006, "step": 173540 }, { "epoch": 1.465453547529079, "grad_norm": 0.057884715497493744, "learning_rate": 2.0226725450872105e-06, "loss": 0.0047, "step": 173550 }, { "epoch": 1.4655379873762429, "grad_norm": 0.1921992301940918, "learning_rate": 2.0220805806194678e-06, "loss": 0.006, "step": 173560 }, { "epoch": 1.4656224272234066, "grad_norm": 0.3771894574165344, "learning_rate": 2.0214886808315226e-06, "loss": 0.0114, "step": 173570 }, { "epoch": 1.4657068670705706, "grad_norm": 0.09147349745035172, "learning_rate": 2.020896845736235e-06, "loss": 0.0055, "step": 173580 }, { "epoch": 1.4657913069177344, "grad_norm": 0.3816736042499542, "learning_rate": 2.020305075346457e-06, "loss": 0.0082, "step": 173590 }, { "epoch": 1.4658757467648984, "grad_norm": 0.06816022843122482, "learning_rate": 2.0197133696750425e-06, "loss": 0.0088, "step": 173600 }, { "epoch": 1.4659601866120622, "grad_norm": 0.4043010473251343, "learning_rate": 2.0191217287348412e-06, "loss": 0.0078, "step": 173610 }, { "epoch": 1.4660446264592262, "grad_norm": 0.2609887719154358, "learning_rate": 2.018530152538707e-06, "loss": 0.0049, "step": 173620 }, { "epoch": 1.46612906630639, "grad_norm": 0.5687957406044006, "learning_rate": 2.0179386410994845e-06, "loss": 0.0073, "step": 173630 }, { "epoch": 1.466213506153554, "grad_norm": 0.05643169954419136, "learning_rate": 2.017347194430026e-06, "loss": 0.005, "step": 173640 }, { "epoch": 1.4662979460007177, "grad_norm": 0.19321532547473907, "learning_rate": 2.0167558125431736e-06, "loss": 0.0104, "step": 173650 }, { "epoch": 1.4663823858478815, "grad_norm": 0.09432077407836914, "learning_rate": 2.0161644954517747e-06, "loss": 0.0057, "step": 173660 }, { "epoch": 1.4664668256950455, "grad_norm": 0.290865957736969, "learning_rate": 2.0155732431686737e-06, "loss": 0.011, "step": 173670 }, { "epoch": 1.4665512655422095, "grad_norm": 0.11178954690694809, "learning_rate": 2.0149820557067067e-06, "loss": 0.0042, "step": 173680 }, { "epoch": 1.4666357053893733, "grad_norm": 0.19661808013916016, "learning_rate": 2.0143909330787196e-06, "loss": 0.0036, "step": 173690 }, { "epoch": 1.466720145236537, "grad_norm": 0.3487982451915741, "learning_rate": 2.013799875297548e-06, "loss": 0.0065, "step": 173700 }, { "epoch": 1.466804585083701, "grad_norm": 0.18355397880077362, "learning_rate": 2.0132088823760325e-06, "loss": 0.0044, "step": 173710 }, { "epoch": 1.4668890249308648, "grad_norm": 0.15964555740356445, "learning_rate": 2.0126179543270065e-06, "loss": 0.0061, "step": 173720 }, { "epoch": 1.4669734647780288, "grad_norm": 0.1855044811964035, "learning_rate": 2.0120270911633085e-06, "loss": 0.0066, "step": 173730 }, { "epoch": 1.4670579046251926, "grad_norm": 0.27386078238487244, "learning_rate": 2.0114362928977692e-06, "loss": 0.0049, "step": 173740 }, { "epoch": 1.4671423444723566, "grad_norm": 0.20162655413150787, "learning_rate": 2.0108455595432214e-06, "loss": 0.0061, "step": 173750 }, { "epoch": 1.4672267843195204, "grad_norm": 0.11088589578866959, "learning_rate": 2.0102548911124936e-06, "loss": 0.0076, "step": 173760 }, { "epoch": 1.4673112241666844, "grad_norm": 0.21730530261993408, "learning_rate": 2.0096642876184193e-06, "loss": 0.0049, "step": 173770 }, { "epoch": 1.4673956640138481, "grad_norm": 0.6016883254051208, "learning_rate": 2.009073749073823e-06, "loss": 0.0092, "step": 173780 }, { "epoch": 1.467480103861012, "grad_norm": 0.012802965007722378, "learning_rate": 2.0084832754915306e-06, "loss": 0.0027, "step": 173790 }, { "epoch": 1.467564543708176, "grad_norm": 0.1732356995344162, "learning_rate": 2.0078928668843706e-06, "loss": 0.0135, "step": 173800 }, { "epoch": 1.46764898355534, "grad_norm": 0.4000850021839142, "learning_rate": 2.007302523265162e-06, "loss": 0.0049, "step": 173810 }, { "epoch": 1.4677334234025037, "grad_norm": 0.5002107620239258, "learning_rate": 2.0067122446467314e-06, "loss": 0.011, "step": 173820 }, { "epoch": 1.4678178632496675, "grad_norm": 0.18713930249214172, "learning_rate": 2.0061220310418978e-06, "loss": 0.0045, "step": 173830 }, { "epoch": 1.4679023030968315, "grad_norm": 0.40962356328964233, "learning_rate": 2.0055318824634794e-06, "loss": 0.0095, "step": 173840 }, { "epoch": 1.4679867429439952, "grad_norm": 0.1874774545431137, "learning_rate": 2.0049417989242936e-06, "loss": 0.0096, "step": 173850 }, { "epoch": 1.4680711827911592, "grad_norm": 0.21965594589710236, "learning_rate": 2.00435178043716e-06, "loss": 0.0043, "step": 173860 }, { "epoch": 1.468155622638323, "grad_norm": 0.1710928976535797, "learning_rate": 2.00376182701489e-06, "loss": 0.0077, "step": 173870 }, { "epoch": 1.4682400624854868, "grad_norm": 0.1318407505750656, "learning_rate": 2.003171938670301e-06, "loss": 0.006, "step": 173880 }, { "epoch": 1.4683245023326508, "grad_norm": 0.18442632257938385, "learning_rate": 2.002582115416201e-06, "loss": 0.0058, "step": 173890 }, { "epoch": 1.4684089421798148, "grad_norm": 0.18673047423362732, "learning_rate": 2.001992357265406e-06, "loss": 0.0119, "step": 173900 }, { "epoch": 1.4684933820269785, "grad_norm": 0.26504045724868774, "learning_rate": 2.001402664230722e-06, "loss": 0.0081, "step": 173910 }, { "epoch": 1.4685778218741423, "grad_norm": 0.12987594306468964, "learning_rate": 2.0008130363249577e-06, "loss": 0.008, "step": 173920 }, { "epoch": 1.4686622617213063, "grad_norm": 0.1242595762014389, "learning_rate": 2.00022347356092e-06, "loss": 0.009, "step": 173930 }, { "epoch": 1.46874670156847, "grad_norm": 0.2829039394855499, "learning_rate": 1.9996339759514115e-06, "loss": 0.0096, "step": 173940 }, { "epoch": 1.468831141415634, "grad_norm": 0.3629155457019806, "learning_rate": 1.99904454350924e-06, "loss": 0.0119, "step": 173950 }, { "epoch": 1.4689155812627979, "grad_norm": 0.3201526999473572, "learning_rate": 1.998455176247205e-06, "loss": 0.0065, "step": 173960 }, { "epoch": 1.4690000211099619, "grad_norm": 0.21771444380283356, "learning_rate": 1.99786587417811e-06, "loss": 0.0049, "step": 173970 }, { "epoch": 1.4690844609571256, "grad_norm": 0.3352484107017517, "learning_rate": 1.9972766373147504e-06, "loss": 0.0063, "step": 173980 }, { "epoch": 1.4691689008042896, "grad_norm": 0.3376912474632263, "learning_rate": 1.9966874656699315e-06, "loss": 0.0093, "step": 173990 }, { "epoch": 1.4692533406514534, "grad_norm": 0.4352252781391144, "learning_rate": 1.9960983592564413e-06, "loss": 0.009, "step": 174000 }, { "epoch": 1.4693377804986172, "grad_norm": 0.31488925218582153, "learning_rate": 1.995509318087081e-06, "loss": 0.0053, "step": 174010 }, { "epoch": 1.4694222203457812, "grad_norm": 0.32814982533454895, "learning_rate": 1.9949203421746426e-06, "loss": 0.0061, "step": 174020 }, { "epoch": 1.4695066601929452, "grad_norm": 0.14584071934223175, "learning_rate": 1.9943314315319175e-06, "loss": 0.0066, "step": 174030 }, { "epoch": 1.469591100040109, "grad_norm": 0.25827327370643616, "learning_rate": 1.9937425861716984e-06, "loss": 0.0063, "step": 174040 }, { "epoch": 1.4696755398872727, "grad_norm": 0.2404761016368866, "learning_rate": 1.9931538061067733e-06, "loss": 0.0108, "step": 174050 }, { "epoch": 1.4697599797344367, "grad_norm": 0.35123470425605774, "learning_rate": 1.992565091349933e-06, "loss": 0.0054, "step": 174060 }, { "epoch": 1.4698444195816005, "grad_norm": 0.26705437898635864, "learning_rate": 1.9919764419139627e-06, "loss": 0.0033, "step": 174070 }, { "epoch": 1.4699288594287645, "grad_norm": 0.10997213423252106, "learning_rate": 1.9913878578116473e-06, "loss": 0.009, "step": 174080 }, { "epoch": 1.4700132992759283, "grad_norm": 0.07672824710607529, "learning_rate": 1.9907993390557694e-06, "loss": 0.0075, "step": 174090 }, { "epoch": 1.470097739123092, "grad_norm": 0.12592853605747223, "learning_rate": 1.9902108856591157e-06, "loss": 0.0069, "step": 174100 }, { "epoch": 1.470182178970256, "grad_norm": 0.4516276717185974, "learning_rate": 1.9896224976344626e-06, "loss": 0.0114, "step": 174110 }, { "epoch": 1.47026661881742, "grad_norm": 0.2325809746980667, "learning_rate": 1.9890341749945933e-06, "loss": 0.0061, "step": 174120 }, { "epoch": 1.4703510586645838, "grad_norm": 0.4201975464820862, "learning_rate": 1.9884459177522854e-06, "loss": 0.0105, "step": 174130 }, { "epoch": 1.4704354985117476, "grad_norm": 0.183625265955925, "learning_rate": 1.9878577259203126e-06, "loss": 0.0113, "step": 174140 }, { "epoch": 1.4705199383589116, "grad_norm": 0.06704694032669067, "learning_rate": 1.987269599511455e-06, "loss": 0.0026, "step": 174150 }, { "epoch": 1.4706043782060754, "grad_norm": 0.22952203452587128, "learning_rate": 1.986681538538484e-06, "loss": 0.0062, "step": 174160 }, { "epoch": 1.4706888180532394, "grad_norm": 0.11910749226808548, "learning_rate": 1.9860935430141725e-06, "loss": 0.0086, "step": 174170 }, { "epoch": 1.4707732579004031, "grad_norm": 0.4376595616340637, "learning_rate": 1.98550561295129e-06, "loss": 0.0057, "step": 174180 }, { "epoch": 1.4708576977475671, "grad_norm": 0.7222533226013184, "learning_rate": 1.9849177483626096e-06, "loss": 0.0151, "step": 174190 }, { "epoch": 1.470942137594731, "grad_norm": 0.010848072357475758, "learning_rate": 1.9843299492608962e-06, "loss": 0.0037, "step": 174200 }, { "epoch": 1.471026577441895, "grad_norm": 0.14944970607757568, "learning_rate": 1.98374221565892e-06, "loss": 0.0056, "step": 174210 }, { "epoch": 1.4711110172890587, "grad_norm": 0.03800336271524429, "learning_rate": 1.9831545475694436e-06, "loss": 0.0074, "step": 174220 }, { "epoch": 1.4711954571362225, "grad_norm": 0.4680652916431427, "learning_rate": 1.982566945005233e-06, "loss": 0.0108, "step": 174230 }, { "epoch": 1.4712798969833865, "grad_norm": 0.17574214935302734, "learning_rate": 1.9819794079790504e-06, "loss": 0.0068, "step": 174240 }, { "epoch": 1.4713643368305505, "grad_norm": 0.29455211758613586, "learning_rate": 1.981391936503657e-06, "loss": 0.0087, "step": 174250 }, { "epoch": 1.4714487766777142, "grad_norm": 0.3831353485584259, "learning_rate": 1.9808045305918116e-06, "loss": 0.0124, "step": 174260 }, { "epoch": 1.471533216524878, "grad_norm": 0.09343642741441727, "learning_rate": 1.9802171902562715e-06, "loss": 0.0086, "step": 174270 }, { "epoch": 1.471617656372042, "grad_norm": 0.6546886563301086, "learning_rate": 1.9796299155097976e-06, "loss": 0.0087, "step": 174280 }, { "epoch": 1.4717020962192058, "grad_norm": 0.17329521477222443, "learning_rate": 1.979042706365141e-06, "loss": 0.0112, "step": 174290 }, { "epoch": 1.4717865360663698, "grad_norm": 0.3985859751701355, "learning_rate": 1.9784555628350586e-06, "loss": 0.0106, "step": 174300 }, { "epoch": 1.4718709759135336, "grad_norm": 0.2701367735862732, "learning_rate": 1.977868484932303e-06, "loss": 0.0018, "step": 174310 }, { "epoch": 1.4719554157606975, "grad_norm": 0.1991063505411148, "learning_rate": 1.9772814726696243e-06, "loss": 0.0045, "step": 174320 }, { "epoch": 1.4720398556078613, "grad_norm": 0.019560618326067924, "learning_rate": 1.9766945260597708e-06, "loss": 0.0127, "step": 174330 }, { "epoch": 1.4721242954550253, "grad_norm": 0.39140427112579346, "learning_rate": 1.9761076451154948e-06, "loss": 0.0091, "step": 174340 }, { "epoch": 1.472208735302189, "grad_norm": 0.07864102721214294, "learning_rate": 1.9755208298495394e-06, "loss": 0.0023, "step": 174350 }, { "epoch": 1.4722931751493529, "grad_norm": 1.5210309028625488, "learning_rate": 1.974934080274653e-06, "loss": 0.0083, "step": 174360 }, { "epoch": 1.4723776149965169, "grad_norm": 0.002122990321367979, "learning_rate": 1.9743473964035793e-06, "loss": 0.0075, "step": 174370 }, { "epoch": 1.4724620548436809, "grad_norm": 0.19025570154190063, "learning_rate": 1.9737607782490576e-06, "loss": 0.0115, "step": 174380 }, { "epoch": 1.4725464946908446, "grad_norm": 0.2556372582912445, "learning_rate": 1.9731742258238336e-06, "loss": 0.0061, "step": 174390 }, { "epoch": 1.4726309345380084, "grad_norm": 0.07533920556306839, "learning_rate": 1.9725877391406452e-06, "loss": 0.0037, "step": 174400 }, { "epoch": 1.4727153743851724, "grad_norm": 0.5479491353034973, "learning_rate": 1.972001318212231e-06, "loss": 0.0043, "step": 174410 }, { "epoch": 1.4727998142323362, "grad_norm": 0.16675154864788055, "learning_rate": 1.9714149630513256e-06, "loss": 0.0063, "step": 174420 }, { "epoch": 1.4728842540795002, "grad_norm": 0.047989144921302795, "learning_rate": 1.9708286736706684e-06, "loss": 0.0044, "step": 174430 }, { "epoch": 1.472968693926664, "grad_norm": 1.0064961910247803, "learning_rate": 1.9702424500829896e-06, "loss": 0.0086, "step": 174440 }, { "epoch": 1.4730531337738277, "grad_norm": 0.44159507751464844, "learning_rate": 1.9696562923010264e-06, "loss": 0.0135, "step": 174450 }, { "epoch": 1.4731375736209917, "grad_norm": 0.4863753616809845, "learning_rate": 1.9690702003375056e-06, "loss": 0.0077, "step": 174460 }, { "epoch": 1.4732220134681557, "grad_norm": 0.11435318738222122, "learning_rate": 1.96848417420516e-06, "loss": 0.0082, "step": 174470 }, { "epoch": 1.4733064533153195, "grad_norm": 0.2484499216079712, "learning_rate": 1.967898213916718e-06, "loss": 0.0086, "step": 174480 }, { "epoch": 1.4733908931624833, "grad_norm": 0.06677419692277908, "learning_rate": 1.967312319484905e-06, "loss": 0.0079, "step": 174490 }, { "epoch": 1.4734753330096473, "grad_norm": 0.23034033179283142, "learning_rate": 1.9667264909224463e-06, "loss": 0.0053, "step": 174500 }, { "epoch": 1.473559772856811, "grad_norm": 0.22669488191604614, "learning_rate": 1.966140728242066e-06, "loss": 0.0098, "step": 174510 }, { "epoch": 1.473644212703975, "grad_norm": 0.3274877369403839, "learning_rate": 1.965555031456488e-06, "loss": 0.006, "step": 174520 }, { "epoch": 1.4737286525511388, "grad_norm": 0.0014048614539206028, "learning_rate": 1.964969400578432e-06, "loss": 0.0046, "step": 174530 }, { "epoch": 1.4738130923983028, "grad_norm": 0.061305515468120575, "learning_rate": 1.9643838356206203e-06, "loss": 0.003, "step": 174540 }, { "epoch": 1.4738975322454666, "grad_norm": 0.1496872752904892, "learning_rate": 1.9637983365957674e-06, "loss": 0.0138, "step": 174550 }, { "epoch": 1.4739819720926306, "grad_norm": 0.14614993333816528, "learning_rate": 1.963212903516595e-06, "loss": 0.0096, "step": 174560 }, { "epoch": 1.4740664119397944, "grad_norm": 0.7398226857185364, "learning_rate": 1.962627536395815e-06, "loss": 0.0104, "step": 174570 }, { "epoch": 1.4741508517869581, "grad_norm": 0.06434648483991623, "learning_rate": 1.962042235246144e-06, "loss": 0.0099, "step": 174580 }, { "epoch": 1.4742352916341221, "grad_norm": 0.29587844014167786, "learning_rate": 1.9614570000802903e-06, "loss": 0.0086, "step": 174590 }, { "epoch": 1.4743197314812861, "grad_norm": 0.39150241017341614, "learning_rate": 1.9608718309109698e-06, "loss": 0.0056, "step": 174600 }, { "epoch": 1.47440417132845, "grad_norm": 0.6721009016036987, "learning_rate": 1.9602867277508904e-06, "loss": 0.0068, "step": 174610 }, { "epoch": 1.4744886111756137, "grad_norm": 0.1855795681476593, "learning_rate": 1.9597016906127585e-06, "loss": 0.0044, "step": 174620 }, { "epoch": 1.4745730510227777, "grad_norm": 0.31320521235466003, "learning_rate": 1.9591167195092847e-06, "loss": 0.0069, "step": 174630 }, { "epoch": 1.4746574908699415, "grad_norm": 0.12589216232299805, "learning_rate": 1.9585318144531723e-06, "loss": 0.0076, "step": 174640 }, { "epoch": 1.4747419307171055, "grad_norm": 0.5324447154998779, "learning_rate": 1.957946975457126e-06, "loss": 0.006, "step": 174650 }, { "epoch": 1.4748263705642692, "grad_norm": 0.05927345156669617, "learning_rate": 1.9573622025338463e-06, "loss": 0.0058, "step": 174660 }, { "epoch": 1.4749108104114332, "grad_norm": 0.13366949558258057, "learning_rate": 1.9567774956960377e-06, "loss": 0.0055, "step": 174670 }, { "epoch": 1.474995250258597, "grad_norm": 0.6564876437187195, "learning_rate": 1.956192854956397e-06, "loss": 0.0108, "step": 174680 }, { "epoch": 1.475079690105761, "grad_norm": 0.057720717042684555, "learning_rate": 1.955608280327625e-06, "loss": 0.0118, "step": 174690 }, { "epoch": 1.4751641299529248, "grad_norm": 0.0816318690776825, "learning_rate": 1.955023771822415e-06, "loss": 0.0071, "step": 174700 }, { "epoch": 1.4752485698000886, "grad_norm": 0.19966623187065125, "learning_rate": 1.9544393294534677e-06, "loss": 0.0051, "step": 174710 }, { "epoch": 1.4753330096472526, "grad_norm": 0.31645628809928894, "learning_rate": 1.953854953233473e-06, "loss": 0.0059, "step": 174720 }, { "epoch": 1.4754174494944163, "grad_norm": 0.05596546828746796, "learning_rate": 1.9532706431751252e-06, "loss": 0.0072, "step": 174730 }, { "epoch": 1.4755018893415803, "grad_norm": 0.0708077996969223, "learning_rate": 1.9526863992911145e-06, "loss": 0.0054, "step": 174740 }, { "epoch": 1.475586329188744, "grad_norm": 0.02551659196615219, "learning_rate": 1.9521022215941286e-06, "loss": 0.0066, "step": 174750 }, { "epoch": 1.475670769035908, "grad_norm": 0.17948129773139954, "learning_rate": 1.9515181100968605e-06, "loss": 0.0047, "step": 174760 }, { "epoch": 1.4757552088830719, "grad_norm": 0.03394879400730133, "learning_rate": 1.9509340648119918e-06, "loss": 0.0091, "step": 174770 }, { "epoch": 1.4758396487302359, "grad_norm": 0.13725654780864716, "learning_rate": 1.9503500857522124e-06, "loss": 0.0065, "step": 174780 }, { "epoch": 1.4759240885773997, "grad_norm": 0.008945683017373085, "learning_rate": 1.9497661729302027e-06, "loss": 0.0097, "step": 174790 }, { "epoch": 1.4760085284245634, "grad_norm": 0.08302681148052216, "learning_rate": 1.949182326358648e-06, "loss": 0.0076, "step": 174800 }, { "epoch": 1.4760929682717274, "grad_norm": 0.14491190016269684, "learning_rate": 1.948598546050228e-06, "loss": 0.0053, "step": 174810 }, { "epoch": 1.4761774081188914, "grad_norm": 0.2869992256164551, "learning_rate": 1.9480148320176228e-06, "loss": 0.013, "step": 174820 }, { "epoch": 1.4762618479660552, "grad_norm": 0.41240352392196655, "learning_rate": 1.9474311842735076e-06, "loss": 0.0094, "step": 174830 }, { "epoch": 1.476346287813219, "grad_norm": 0.28059300780296326, "learning_rate": 1.946847602830564e-06, "loss": 0.0067, "step": 174840 }, { "epoch": 1.476430727660383, "grad_norm": 0.23705388605594635, "learning_rate": 1.946264087701464e-06, "loss": 0.0068, "step": 174850 }, { "epoch": 1.4765151675075467, "grad_norm": 0.10391578078269958, "learning_rate": 1.9456806388988812e-06, "loss": 0.003, "step": 174860 }, { "epoch": 1.4765996073547107, "grad_norm": 0.35731032490730286, "learning_rate": 1.945097256435491e-06, "loss": 0.0034, "step": 174870 }, { "epoch": 1.4766840472018745, "grad_norm": 0.5218428373336792, "learning_rate": 1.94451394032396e-06, "loss": 0.0075, "step": 174880 }, { "epoch": 1.4767684870490385, "grad_norm": 0.16224545240402222, "learning_rate": 1.943930690576964e-06, "loss": 0.0091, "step": 174890 }, { "epoch": 1.4768529268962023, "grad_norm": 0.10263372957706451, "learning_rate": 1.9433475072071632e-06, "loss": 0.0036, "step": 174900 }, { "epoch": 1.4769373667433663, "grad_norm": 0.24376006424427032, "learning_rate": 1.94276439022723e-06, "loss": 0.0121, "step": 174910 }, { "epoch": 1.47702180659053, "grad_norm": 0.14316728711128235, "learning_rate": 1.9421813396498263e-06, "loss": 0.0049, "step": 174920 }, { "epoch": 1.4771062464376938, "grad_norm": 0.4789392948150635, "learning_rate": 1.941598355487619e-06, "loss": 0.0035, "step": 174930 }, { "epoch": 1.4771906862848578, "grad_norm": 0.12837859988212585, "learning_rate": 1.941015437753267e-06, "loss": 0.004, "step": 174940 }, { "epoch": 1.4772751261320218, "grad_norm": 0.13512560725212097, "learning_rate": 1.9404325864594344e-06, "loss": 0.0043, "step": 174950 }, { "epoch": 1.4773595659791856, "grad_norm": 0.01873861812055111, "learning_rate": 1.939849801618779e-06, "loss": 0.0048, "step": 174960 }, { "epoch": 1.4774440058263494, "grad_norm": 0.11569485068321228, "learning_rate": 1.9392670832439594e-06, "loss": 0.0081, "step": 174970 }, { "epoch": 1.4775284456735134, "grad_norm": 0.1838693916797638, "learning_rate": 1.9386844313476305e-06, "loss": 0.0036, "step": 174980 }, { "epoch": 1.4776128855206772, "grad_norm": 0.17306149005889893, "learning_rate": 1.9381018459424473e-06, "loss": 0.0072, "step": 174990 }, { "epoch": 1.4776973253678412, "grad_norm": 0.1739199310541153, "learning_rate": 1.9375193270410663e-06, "loss": 0.0049, "step": 175000 }, { "epoch": 1.477781765215005, "grad_norm": 0.30619123578071594, "learning_rate": 1.936936874656136e-06, "loss": 0.0049, "step": 175010 }, { "epoch": 1.4778662050621687, "grad_norm": 0.4823731482028961, "learning_rate": 1.9363544888003117e-06, "loss": 0.0059, "step": 175020 }, { "epoch": 1.4779506449093327, "grad_norm": 0.684539258480072, "learning_rate": 1.9357721694862375e-06, "loss": 0.0082, "step": 175030 }, { "epoch": 1.4780350847564967, "grad_norm": 0.5749721527099609, "learning_rate": 1.9351899167265658e-06, "loss": 0.0105, "step": 175040 }, { "epoch": 1.4781195246036605, "grad_norm": 0.01943264529109001, "learning_rate": 1.934607730533941e-06, "loss": 0.0065, "step": 175050 }, { "epoch": 1.4782039644508242, "grad_norm": 0.14202675223350525, "learning_rate": 1.934025610921008e-06, "loss": 0.0072, "step": 175060 }, { "epoch": 1.4782884042979882, "grad_norm": 0.3065197765827179, "learning_rate": 1.933443557900409e-06, "loss": 0.0052, "step": 175070 }, { "epoch": 1.478372844145152, "grad_norm": 0.10366871953010559, "learning_rate": 1.9328615714847893e-06, "loss": 0.0049, "step": 175080 }, { "epoch": 1.478457283992316, "grad_norm": 0.25871825218200684, "learning_rate": 1.9322796516867875e-06, "loss": 0.0149, "step": 175090 }, { "epoch": 1.4785417238394798, "grad_norm": 0.16084453463554382, "learning_rate": 1.931697798519042e-06, "loss": 0.0056, "step": 175100 }, { "epoch": 1.4786261636866438, "grad_norm": 0.26711341738700867, "learning_rate": 1.9311160119941924e-06, "loss": 0.0059, "step": 175110 }, { "epoch": 1.4787106035338076, "grad_norm": 0.04896567389369011, "learning_rate": 1.930534292124873e-06, "loss": 0.0061, "step": 175120 }, { "epoch": 1.4787950433809716, "grad_norm": 0.48265039920806885, "learning_rate": 1.9299526389237204e-06, "loss": 0.0079, "step": 175130 }, { "epoch": 1.4788794832281353, "grad_norm": 0.3515864908695221, "learning_rate": 1.9293710524033687e-06, "loss": 0.0112, "step": 175140 }, { "epoch": 1.4789639230752991, "grad_norm": 0.05190810561180115, "learning_rate": 1.9287895325764476e-06, "loss": 0.0092, "step": 175150 }, { "epoch": 1.4790483629224631, "grad_norm": 0.3406674265861511, "learning_rate": 1.9282080794555875e-06, "loss": 0.0088, "step": 175160 }, { "epoch": 1.479132802769627, "grad_norm": 0.8745115995407104, "learning_rate": 1.92762669305342e-06, "loss": 0.0116, "step": 175170 }, { "epoch": 1.4792172426167909, "grad_norm": 0.31839272379875183, "learning_rate": 1.927045373382569e-06, "loss": 0.0038, "step": 175180 }, { "epoch": 1.4793016824639547, "grad_norm": 0.6078656911849976, "learning_rate": 1.926464120455665e-06, "loss": 0.0118, "step": 175190 }, { "epoch": 1.4793861223111187, "grad_norm": 0.08964791148900986, "learning_rate": 1.9258829342853304e-06, "loss": 0.0051, "step": 175200 }, { "epoch": 1.4794705621582824, "grad_norm": 0.27182647585868835, "learning_rate": 1.925301814884188e-06, "loss": 0.0057, "step": 175210 }, { "epoch": 1.4795550020054464, "grad_norm": 0.45466601848602295, "learning_rate": 1.9247207622648605e-06, "loss": 0.006, "step": 175220 }, { "epoch": 1.4796394418526102, "grad_norm": 0.13028709590435028, "learning_rate": 1.924139776439966e-06, "loss": 0.0036, "step": 175230 }, { "epoch": 1.4797238816997742, "grad_norm": 0.1345217376947403, "learning_rate": 1.9235588574221276e-06, "loss": 0.007, "step": 175240 }, { "epoch": 1.479808321546938, "grad_norm": 0.19977231323719025, "learning_rate": 1.922978005223958e-06, "loss": 0.0073, "step": 175250 }, { "epoch": 1.479892761394102, "grad_norm": 0.6725417971611023, "learning_rate": 1.9223972198580776e-06, "loss": 0.0069, "step": 175260 }, { "epoch": 1.4799772012412657, "grad_norm": 0.2005491554737091, "learning_rate": 1.921816501337097e-06, "loss": 0.0139, "step": 175270 }, { "epoch": 1.4800616410884295, "grad_norm": 0.22729380428791046, "learning_rate": 1.921235849673633e-06, "loss": 0.0164, "step": 175280 }, { "epoch": 1.4801460809355935, "grad_norm": 0.07709823548793793, "learning_rate": 1.9206552648802955e-06, "loss": 0.0035, "step": 175290 }, { "epoch": 1.4802305207827575, "grad_norm": 0.21097615361213684, "learning_rate": 1.9200747469696945e-06, "loss": 0.0056, "step": 175300 }, { "epoch": 1.4803149606299213, "grad_norm": 0.1333530843257904, "learning_rate": 1.9194942959544373e-06, "loss": 0.0075, "step": 175310 }, { "epoch": 1.480399400477085, "grad_norm": 0.4029921591281891, "learning_rate": 1.9189139118471343e-06, "loss": 0.0071, "step": 175320 }, { "epoch": 1.480483840324249, "grad_norm": 0.2562624514102936, "learning_rate": 1.9183335946603897e-06, "loss": 0.005, "step": 175330 }, { "epoch": 1.4805682801714128, "grad_norm": 0.1666640341281891, "learning_rate": 1.9177533444068065e-06, "loss": 0.0048, "step": 175340 }, { "epoch": 1.4806527200185768, "grad_norm": 0.2612086832523346, "learning_rate": 1.9171731610989906e-06, "loss": 0.0089, "step": 175350 }, { "epoch": 1.4807371598657406, "grad_norm": 0.6821607947349548, "learning_rate": 1.91659304474954e-06, "loss": 0.0057, "step": 175360 }, { "epoch": 1.4808215997129044, "grad_norm": 0.2204127013683319, "learning_rate": 1.9160129953710586e-06, "loss": 0.011, "step": 175370 }, { "epoch": 1.4809060395600684, "grad_norm": 0.09699241071939468, "learning_rate": 1.9154330129761434e-06, "loss": 0.0078, "step": 175380 }, { "epoch": 1.4809904794072324, "grad_norm": 0.23362714052200317, "learning_rate": 1.9148530975773903e-06, "loss": 0.0028, "step": 175390 }, { "epoch": 1.4810749192543962, "grad_norm": 0.07329986244440079, "learning_rate": 1.9142732491873945e-06, "loss": 0.0143, "step": 175400 }, { "epoch": 1.48115935910156, "grad_norm": 0.29082778096199036, "learning_rate": 1.913693467818753e-06, "loss": 0.0045, "step": 175410 }, { "epoch": 1.481243798948724, "grad_norm": 0.09982442855834961, "learning_rate": 1.913113753484055e-06, "loss": 0.0075, "step": 175420 }, { "epoch": 1.4813282387958877, "grad_norm": 0.4718012511730194, "learning_rate": 1.9125341061958956e-06, "loss": 0.0132, "step": 175430 }, { "epoch": 1.4814126786430517, "grad_norm": 0.1705264300107956, "learning_rate": 1.9119545259668625e-06, "loss": 0.0036, "step": 175440 }, { "epoch": 1.4814971184902155, "grad_norm": 0.24192333221435547, "learning_rate": 1.9113750128095424e-06, "loss": 0.0093, "step": 175450 }, { "epoch": 1.4815815583373795, "grad_norm": 0.308542400598526, "learning_rate": 1.9107955667365267e-06, "loss": 0.0089, "step": 175460 }, { "epoch": 1.4816659981845433, "grad_norm": 0.5445318818092346, "learning_rate": 1.9102161877603975e-06, "loss": 0.006, "step": 175470 }, { "epoch": 1.4817504380317073, "grad_norm": 0.27839601039886475, "learning_rate": 1.90963687589374e-06, "loss": 0.0066, "step": 175480 }, { "epoch": 1.481834877878871, "grad_norm": 0.08933939784765244, "learning_rate": 1.9090576311491343e-06, "loss": 0.0062, "step": 175490 }, { "epoch": 1.4819193177260348, "grad_norm": 0.1284913569688797, "learning_rate": 1.9084784535391654e-06, "loss": 0.0047, "step": 175500 }, { "epoch": 1.4820037575731988, "grad_norm": 0.24954167008399963, "learning_rate": 1.9078993430764094e-06, "loss": 0.0058, "step": 175510 }, { "epoch": 1.4820881974203628, "grad_norm": 0.16916219890117645, "learning_rate": 1.907320299773447e-06, "loss": 0.0086, "step": 175520 }, { "epoch": 1.4821726372675266, "grad_norm": 0.5119155645370483, "learning_rate": 1.9067413236428546e-06, "loss": 0.01, "step": 175530 }, { "epoch": 1.4822570771146903, "grad_norm": 0.03500356525182724, "learning_rate": 1.9061624146972063e-06, "loss": 0.0065, "step": 175540 }, { "epoch": 1.4823415169618543, "grad_norm": 0.2870025932788849, "learning_rate": 1.9055835729490768e-06, "loss": 0.0123, "step": 175550 }, { "epoch": 1.4824259568090181, "grad_norm": 0.12890252470970154, "learning_rate": 1.9050047984110359e-06, "loss": 0.008, "step": 175560 }, { "epoch": 1.4825103966561821, "grad_norm": 0.3510376811027527, "learning_rate": 1.9044260910956586e-06, "loss": 0.0081, "step": 175570 }, { "epoch": 1.482594836503346, "grad_norm": 0.3026556968688965, "learning_rate": 1.9038474510155102e-06, "loss": 0.0033, "step": 175580 }, { "epoch": 1.4826792763505097, "grad_norm": 0.2513856887817383, "learning_rate": 1.9032688781831626e-06, "loss": 0.0065, "step": 175590 }, { "epoch": 1.4827637161976737, "grad_norm": 0.20641633868217468, "learning_rate": 1.902690372611179e-06, "loss": 0.0116, "step": 175600 }, { "epoch": 1.4828481560448377, "grad_norm": 0.05757198482751846, "learning_rate": 1.9021119343121268e-06, "loss": 0.0078, "step": 175610 }, { "epoch": 1.4829325958920014, "grad_norm": 0.2010006308555603, "learning_rate": 1.9015335632985694e-06, "loss": 0.0052, "step": 175620 }, { "epoch": 1.4830170357391652, "grad_norm": 0.17202772200107574, "learning_rate": 1.9009552595830677e-06, "loss": 0.009, "step": 175630 }, { "epoch": 1.4831014755863292, "grad_norm": 0.125372514128685, "learning_rate": 1.900377023178181e-06, "loss": 0.0125, "step": 175640 }, { "epoch": 1.483185915433493, "grad_norm": 0.28453555703163147, "learning_rate": 1.8997988540964718e-06, "loss": 0.0061, "step": 175650 }, { "epoch": 1.483270355280657, "grad_norm": 0.2216700315475464, "learning_rate": 1.8992207523504947e-06, "loss": 0.0059, "step": 175660 }, { "epoch": 1.4833547951278208, "grad_norm": 0.6899259686470032, "learning_rate": 1.898642717952809e-06, "loss": 0.0061, "step": 175670 }, { "epoch": 1.4834392349749848, "grad_norm": 0.6490880846977234, "learning_rate": 1.8980647509159678e-06, "loss": 0.0076, "step": 175680 }, { "epoch": 1.4835236748221485, "grad_norm": 0.3719674050807953, "learning_rate": 1.897486851252523e-06, "loss": 0.005, "step": 175690 }, { "epoch": 1.4836081146693125, "grad_norm": 0.23262707889080048, "learning_rate": 1.8969090189750295e-06, "loss": 0.0071, "step": 175700 }, { "epoch": 1.4836925545164763, "grad_norm": 0.21772529184818268, "learning_rate": 1.8963312540960366e-06, "loss": 0.0063, "step": 175710 }, { "epoch": 1.48377699436364, "grad_norm": 0.4752659499645233, "learning_rate": 1.8957535566280927e-06, "loss": 0.0064, "step": 175720 }, { "epoch": 1.483861434210804, "grad_norm": 0.38872456550598145, "learning_rate": 1.8951759265837438e-06, "loss": 0.0049, "step": 175730 }, { "epoch": 1.483945874057968, "grad_norm": 0.041432131081819534, "learning_rate": 1.8945983639755388e-06, "loss": 0.0099, "step": 175740 }, { "epoch": 1.4840303139051318, "grad_norm": 0.14003272354602814, "learning_rate": 1.89402086881602e-06, "loss": 0.0035, "step": 175750 }, { "epoch": 1.4841147537522956, "grad_norm": 0.11617660522460938, "learning_rate": 1.8934434411177328e-06, "loss": 0.0086, "step": 175760 }, { "epoch": 1.4841991935994596, "grad_norm": 0.3409498929977417, "learning_rate": 1.8928660808932154e-06, "loss": 0.01, "step": 175770 }, { "epoch": 1.4842836334466234, "grad_norm": 0.0008266603108495474, "learning_rate": 1.8922887881550122e-06, "loss": 0.0058, "step": 175780 }, { "epoch": 1.4843680732937874, "grad_norm": 0.5724523663520813, "learning_rate": 1.8917115629156613e-06, "loss": 0.0079, "step": 175790 }, { "epoch": 1.4844525131409512, "grad_norm": 0.2536029815673828, "learning_rate": 1.891134405187695e-06, "loss": 0.0059, "step": 175800 }, { "epoch": 1.4845369529881152, "grad_norm": 0.31580039858818054, "learning_rate": 1.8905573149836538e-06, "loss": 0.0092, "step": 175810 }, { "epoch": 1.484621392835279, "grad_norm": 0.4541650414466858, "learning_rate": 1.8899802923160686e-06, "loss": 0.0114, "step": 175820 }, { "epoch": 1.484705832682443, "grad_norm": 0.3761887848377228, "learning_rate": 1.8894033371974762e-06, "loss": 0.008, "step": 175830 }, { "epoch": 1.4847902725296067, "grad_norm": 0.053066596388816833, "learning_rate": 1.8888264496404047e-06, "loss": 0.0056, "step": 175840 }, { "epoch": 1.4848747123767705, "grad_norm": 0.6134157776832581, "learning_rate": 1.888249629657386e-06, "loss": 0.0079, "step": 175850 }, { "epoch": 1.4849591522239345, "grad_norm": 0.03209901228547096, "learning_rate": 1.8876728772609481e-06, "loss": 0.0054, "step": 175860 }, { "epoch": 1.4850435920710985, "grad_norm": 0.17765289545059204, "learning_rate": 1.8870961924636178e-06, "loss": 0.0052, "step": 175870 }, { "epoch": 1.4851280319182623, "grad_norm": 0.07280891388654709, "learning_rate": 1.886519575277918e-06, "loss": 0.0109, "step": 175880 }, { "epoch": 1.485212471765426, "grad_norm": 0.5129287242889404, "learning_rate": 1.885943025716377e-06, "loss": 0.0076, "step": 175890 }, { "epoch": 1.48529691161259, "grad_norm": 0.06380970031023026, "learning_rate": 1.8853665437915136e-06, "loss": 0.007, "step": 175900 }, { "epoch": 1.4853813514597538, "grad_norm": 0.26829275488853455, "learning_rate": 1.8847901295158526e-06, "loss": 0.0047, "step": 175910 }, { "epoch": 1.4854657913069178, "grad_norm": 0.014447527006268501, "learning_rate": 1.8842137829019113e-06, "loss": 0.0084, "step": 175920 }, { "epoch": 1.4855502311540816, "grad_norm": 0.07191641628742218, "learning_rate": 1.8836375039622068e-06, "loss": 0.0053, "step": 175930 }, { "epoch": 1.4856346710012454, "grad_norm": 0.6996273994445801, "learning_rate": 1.8830612927092584e-06, "loss": 0.0051, "step": 175940 }, { "epoch": 1.4857191108484094, "grad_norm": 0.09435036033391953, "learning_rate": 1.88248514915558e-06, "loss": 0.0053, "step": 175950 }, { "epoch": 1.4858035506955733, "grad_norm": 0.2350306361913681, "learning_rate": 1.8819090733136859e-06, "loss": 0.0042, "step": 175960 }, { "epoch": 1.4858879905427371, "grad_norm": 0.28483647108078003, "learning_rate": 1.8813330651960855e-06, "loss": 0.0055, "step": 175970 }, { "epoch": 1.485972430389901, "grad_norm": 0.26495814323425293, "learning_rate": 1.8807571248152933e-06, "loss": 0.0053, "step": 175980 }, { "epoch": 1.486056870237065, "grad_norm": 0.28908807039260864, "learning_rate": 1.8801812521838159e-06, "loss": 0.0058, "step": 175990 }, { "epoch": 1.4861413100842287, "grad_norm": 0.20592033863067627, "learning_rate": 1.879605447314164e-06, "loss": 0.0081, "step": 176000 }, { "epoch": 1.4862257499313927, "grad_norm": 0.28620657324790955, "learning_rate": 1.879029710218841e-06, "loss": 0.0114, "step": 176010 }, { "epoch": 1.4863101897785564, "grad_norm": 0.4294893443584442, "learning_rate": 1.8784540409103542e-06, "loss": 0.0056, "step": 176020 }, { "epoch": 1.4863946296257204, "grad_norm": 0.20276734232902527, "learning_rate": 1.8778784394012068e-06, "loss": 0.0047, "step": 176030 }, { "epoch": 1.4864790694728842, "grad_norm": 0.31707948446273804, "learning_rate": 1.8773029057038988e-06, "loss": 0.0047, "step": 176040 }, { "epoch": 1.4865635093200482, "grad_norm": 0.2039443999528885, "learning_rate": 1.8767274398309326e-06, "loss": 0.0058, "step": 176050 }, { "epoch": 1.486647949167212, "grad_norm": 0.16876648366451263, "learning_rate": 1.8761520417948048e-06, "loss": 0.0072, "step": 176060 }, { "epoch": 1.4867323890143758, "grad_norm": 0.18474741280078888, "learning_rate": 1.875576711608016e-06, "loss": 0.0053, "step": 176070 }, { "epoch": 1.4868168288615398, "grad_norm": 0.30756887793540955, "learning_rate": 1.8750014492830586e-06, "loss": 0.0095, "step": 176080 }, { "epoch": 1.4869012687087038, "grad_norm": 0.40457773208618164, "learning_rate": 1.8744262548324315e-06, "loss": 0.0057, "step": 176090 }, { "epoch": 1.4869857085558675, "grad_norm": 0.14230197668075562, "learning_rate": 1.873851128268624e-06, "loss": 0.0059, "step": 176100 }, { "epoch": 1.4870701484030313, "grad_norm": 0.11180715262889862, "learning_rate": 1.8732760696041325e-06, "loss": 0.0034, "step": 176110 }, { "epoch": 1.4871545882501953, "grad_norm": 0.3020135462284088, "learning_rate": 1.872701078851441e-06, "loss": 0.0054, "step": 176120 }, { "epoch": 1.487239028097359, "grad_norm": 0.33399873971939087, "learning_rate": 1.8721261560230425e-06, "loss": 0.0104, "step": 176130 }, { "epoch": 1.487323467944523, "grad_norm": 0.0734756663441658, "learning_rate": 1.8715513011314236e-06, "loss": 0.0029, "step": 176140 }, { "epoch": 1.4874079077916869, "grad_norm": 0.28031376004219055, "learning_rate": 1.8709765141890667e-06, "loss": 0.006, "step": 176150 }, { "epoch": 1.4874923476388509, "grad_norm": 0.3449932932853699, "learning_rate": 1.8704017952084613e-06, "loss": 0.0062, "step": 176160 }, { "epoch": 1.4875767874860146, "grad_norm": 0.174894779920578, "learning_rate": 1.8698271442020854e-06, "loss": 0.0047, "step": 176170 }, { "epoch": 1.4876612273331786, "grad_norm": 0.4790152907371521, "learning_rate": 1.8692525611824242e-06, "loss": 0.0064, "step": 176180 }, { "epoch": 1.4877456671803424, "grad_norm": 0.5437216758728027, "learning_rate": 1.8686780461619559e-06, "loss": 0.0047, "step": 176190 }, { "epoch": 1.4878301070275062, "grad_norm": 0.2042780965566635, "learning_rate": 1.8681035991531588e-06, "loss": 0.0037, "step": 176200 }, { "epoch": 1.4879145468746702, "grad_norm": 0.2248106449842453, "learning_rate": 1.867529220168508e-06, "loss": 0.0057, "step": 176210 }, { "epoch": 1.4879989867218342, "grad_norm": 0.12696565687656403, "learning_rate": 1.8669549092204826e-06, "loss": 0.0054, "step": 176220 }, { "epoch": 1.488083426568998, "grad_norm": 0.5393412709236145, "learning_rate": 1.8663806663215523e-06, "loss": 0.0092, "step": 176230 }, { "epoch": 1.4881678664161617, "grad_norm": 0.21065936982631683, "learning_rate": 1.8658064914841938e-06, "loss": 0.0098, "step": 176240 }, { "epoch": 1.4882523062633257, "grad_norm": 0.7732351422309875, "learning_rate": 1.8652323847208747e-06, "loss": 0.0078, "step": 176250 }, { "epoch": 1.4883367461104895, "grad_norm": 0.24986420571804047, "learning_rate": 1.864658346044067e-06, "loss": 0.0045, "step": 176260 }, { "epoch": 1.4884211859576535, "grad_norm": 0.15617235004901886, "learning_rate": 1.8640843754662379e-06, "loss": 0.0045, "step": 176270 }, { "epoch": 1.4885056258048173, "grad_norm": 0.15519000589847565, "learning_rate": 1.8635104729998533e-06, "loss": 0.0061, "step": 176280 }, { "epoch": 1.488590065651981, "grad_norm": 0.1927206814289093, "learning_rate": 1.8629366386573788e-06, "loss": 0.0039, "step": 176290 }, { "epoch": 1.488674505499145, "grad_norm": 0.0682678371667862, "learning_rate": 1.8623628724512754e-06, "loss": 0.0101, "step": 176300 }, { "epoch": 1.488758945346309, "grad_norm": 0.04303274303674698, "learning_rate": 1.8617891743940097e-06, "loss": 0.0054, "step": 176310 }, { "epoch": 1.4888433851934728, "grad_norm": 0.34558501839637756, "learning_rate": 1.861215544498038e-06, "loss": 0.008, "step": 176320 }, { "epoch": 1.4889278250406366, "grad_norm": 0.31569188833236694, "learning_rate": 1.8606419827758232e-06, "loss": 0.0066, "step": 176330 }, { "epoch": 1.4890122648878006, "grad_norm": 0.019175654277205467, "learning_rate": 1.8600684892398197e-06, "loss": 0.0058, "step": 176340 }, { "epoch": 1.4890967047349644, "grad_norm": 0.328366756439209, "learning_rate": 1.8594950639024866e-06, "loss": 0.0073, "step": 176350 }, { "epoch": 1.4891811445821284, "grad_norm": 0.4151538014411926, "learning_rate": 1.8589217067762776e-06, "loss": 0.0069, "step": 176360 }, { "epoch": 1.4892655844292921, "grad_norm": 0.1911289244890213, "learning_rate": 1.8583484178736455e-06, "loss": 0.0058, "step": 176370 }, { "epoch": 1.4893500242764561, "grad_norm": 0.11684004962444305, "learning_rate": 1.857775197207042e-06, "loss": 0.0062, "step": 176380 }, { "epoch": 1.48943446412362, "grad_norm": 0.4258705973625183, "learning_rate": 1.857202044788915e-06, "loss": 0.0051, "step": 176390 }, { "epoch": 1.489518903970784, "grad_norm": 0.5268795490264893, "learning_rate": 1.8566289606317173e-06, "loss": 0.0073, "step": 176400 }, { "epoch": 1.4896033438179477, "grad_norm": 0.17808513343334198, "learning_rate": 1.8560559447478932e-06, "loss": 0.0042, "step": 176410 }, { "epoch": 1.4896877836651115, "grad_norm": 0.24900569021701813, "learning_rate": 1.855482997149891e-06, "loss": 0.0054, "step": 176420 }, { "epoch": 1.4897722235122755, "grad_norm": 0.12694256007671356, "learning_rate": 1.8549101178501545e-06, "loss": 0.003, "step": 176430 }, { "epoch": 1.4898566633594394, "grad_norm": 0.663051187992096, "learning_rate": 1.8543373068611248e-06, "loss": 0.007, "step": 176440 }, { "epoch": 1.4899411032066032, "grad_norm": 0.24109862744808197, "learning_rate": 1.853764564195243e-06, "loss": 0.0079, "step": 176450 }, { "epoch": 1.490025543053767, "grad_norm": 0.14300672709941864, "learning_rate": 1.853191889864952e-06, "loss": 0.0062, "step": 176460 }, { "epoch": 1.490109982900931, "grad_norm": 0.24296945333480835, "learning_rate": 1.8526192838826863e-06, "loss": 0.0045, "step": 176470 }, { "epoch": 1.4901944227480948, "grad_norm": 0.2065054178237915, "learning_rate": 1.8520467462608865e-06, "loss": 0.0173, "step": 176480 }, { "epoch": 1.4902788625952588, "grad_norm": 0.18281027674674988, "learning_rate": 1.8514742770119848e-06, "loss": 0.004, "step": 176490 }, { "epoch": 1.4903633024424225, "grad_norm": 0.18338719010353088, "learning_rate": 1.8509018761484181e-06, "loss": 0.0024, "step": 176500 }, { "epoch": 1.4904477422895863, "grad_norm": 0.07219190895557404, "learning_rate": 1.8503295436826175e-06, "loss": 0.006, "step": 176510 }, { "epoch": 1.4905321821367503, "grad_norm": 0.08199362456798553, "learning_rate": 1.8497572796270136e-06, "loss": 0.0144, "step": 176520 }, { "epoch": 1.4906166219839143, "grad_norm": 0.3769758641719818, "learning_rate": 1.8491850839940362e-06, "loss": 0.0091, "step": 176530 }, { "epoch": 1.490701061831078, "grad_norm": 0.3476056158542633, "learning_rate": 1.8486129567961114e-06, "loss": 0.0071, "step": 176540 }, { "epoch": 1.4907855016782419, "grad_norm": 0.17519010603427887, "learning_rate": 1.8480408980456687e-06, "loss": 0.0066, "step": 176550 }, { "epoch": 1.4908699415254059, "grad_norm": 0.20988591015338898, "learning_rate": 1.8474689077551306e-06, "loss": 0.0051, "step": 176560 }, { "epoch": 1.4909543813725696, "grad_norm": 0.07428756356239319, "learning_rate": 1.8468969859369235e-06, "loss": 0.0107, "step": 176570 }, { "epoch": 1.4910388212197336, "grad_norm": 0.2539692521095276, "learning_rate": 1.846325132603466e-06, "loss": 0.0085, "step": 176580 }, { "epoch": 1.4911232610668974, "grad_norm": 0.3305675983428955, "learning_rate": 1.845753347767182e-06, "loss": 0.0063, "step": 176590 }, { "epoch": 1.4912077009140614, "grad_norm": 0.1033259704709053, "learning_rate": 1.8451816314404886e-06, "loss": 0.0097, "step": 176600 }, { "epoch": 1.4912921407612252, "grad_norm": 0.09674569219350815, "learning_rate": 1.8446099836358039e-06, "loss": 0.0048, "step": 176610 }, { "epoch": 1.4913765806083892, "grad_norm": 0.3259795308113098, "learning_rate": 1.844038404365544e-06, "loss": 0.0035, "step": 176620 }, { "epoch": 1.491461020455553, "grad_norm": 0.12969821691513062, "learning_rate": 1.8434668936421214e-06, "loss": 0.0069, "step": 176630 }, { "epoch": 1.4915454603027167, "grad_norm": 0.36223089694976807, "learning_rate": 1.8428954514779524e-06, "loss": 0.0084, "step": 176640 }, { "epoch": 1.4916299001498807, "grad_norm": 0.19845740497112274, "learning_rate": 1.8423240778854456e-06, "loss": 0.009, "step": 176650 }, { "epoch": 1.4917143399970447, "grad_norm": 0.14224602282047272, "learning_rate": 1.8417527728770152e-06, "loss": 0.0048, "step": 176660 }, { "epoch": 1.4917987798442085, "grad_norm": 0.5060102939605713, "learning_rate": 1.841181536465065e-06, "loss": 0.0081, "step": 176670 }, { "epoch": 1.4918832196913723, "grad_norm": 0.42155885696411133, "learning_rate": 1.8406103686620064e-06, "loss": 0.008, "step": 176680 }, { "epoch": 1.4919676595385363, "grad_norm": 0.3921205997467041, "learning_rate": 1.8400392694802433e-06, "loss": 0.0045, "step": 176690 }, { "epoch": 1.4920520993857, "grad_norm": 0.5338680744171143, "learning_rate": 1.8394682389321794e-06, "loss": 0.0086, "step": 176700 }, { "epoch": 1.492136539232864, "grad_norm": 0.36904749274253845, "learning_rate": 1.838897277030216e-06, "loss": 0.0063, "step": 176710 }, { "epoch": 1.4922209790800278, "grad_norm": 0.17848561704158783, "learning_rate": 1.8383263837867582e-06, "loss": 0.0036, "step": 176720 }, { "epoch": 1.4923054189271918, "grad_norm": 0.10593569278717041, "learning_rate": 1.8377555592142032e-06, "loss": 0.0048, "step": 176730 }, { "epoch": 1.4923898587743556, "grad_norm": 0.22444255650043488, "learning_rate": 1.8371848033249472e-06, "loss": 0.0074, "step": 176740 }, { "epoch": 1.4924742986215196, "grad_norm": 0.2654896080493927, "learning_rate": 1.8366141161313916e-06, "loss": 0.0053, "step": 176750 }, { "epoch": 1.4925587384686834, "grad_norm": 0.5376129746437073, "learning_rate": 1.8360434976459291e-06, "loss": 0.0079, "step": 176760 }, { "epoch": 1.4926431783158471, "grad_norm": 0.1806333363056183, "learning_rate": 1.835472947880953e-06, "loss": 0.0071, "step": 176770 }, { "epoch": 1.4927276181630111, "grad_norm": 0.7868080735206604, "learning_rate": 1.8349024668488545e-06, "loss": 0.0131, "step": 176780 }, { "epoch": 1.4928120580101751, "grad_norm": 0.9302087426185608, "learning_rate": 1.8343320545620275e-06, "loss": 0.0064, "step": 176790 }, { "epoch": 1.492896497857339, "grad_norm": 0.37966403365135193, "learning_rate": 1.8337617110328581e-06, "loss": 0.0095, "step": 176800 }, { "epoch": 1.4929809377045027, "grad_norm": 0.03402959555387497, "learning_rate": 1.8331914362737368e-06, "loss": 0.0029, "step": 176810 }, { "epoch": 1.4930653775516667, "grad_norm": 0.19667859375476837, "learning_rate": 1.8326212302970474e-06, "loss": 0.0105, "step": 176820 }, { "epoch": 1.4931498173988305, "grad_norm": 0.017970172688364983, "learning_rate": 1.8320510931151776e-06, "loss": 0.0093, "step": 176830 }, { "epoch": 1.4932342572459945, "grad_norm": 0.20462949573993683, "learning_rate": 1.831481024740508e-06, "loss": 0.0047, "step": 176840 }, { "epoch": 1.4933186970931582, "grad_norm": 0.1370173990726471, "learning_rate": 1.8309110251854216e-06, "loss": 0.0052, "step": 176850 }, { "epoch": 1.493403136940322, "grad_norm": 0.05571551248431206, "learning_rate": 1.830341094462299e-06, "loss": 0.0036, "step": 176860 }, { "epoch": 1.493487576787486, "grad_norm": 0.7117086052894592, "learning_rate": 1.829771232583516e-06, "loss": 0.0102, "step": 176870 }, { "epoch": 1.49357201663465, "grad_norm": 0.41439390182495117, "learning_rate": 1.8292014395614537e-06, "loss": 0.0103, "step": 176880 }, { "epoch": 1.4936564564818138, "grad_norm": 0.3291180729866028, "learning_rate": 1.8286317154084854e-06, "loss": 0.0042, "step": 176890 }, { "epoch": 1.4937408963289776, "grad_norm": 0.037804991006851196, "learning_rate": 1.8280620601369876e-06, "loss": 0.0035, "step": 176900 }, { "epoch": 1.4938253361761415, "grad_norm": 0.15515655279159546, "learning_rate": 1.8274924737593303e-06, "loss": 0.0109, "step": 176910 }, { "epoch": 1.4939097760233053, "grad_norm": 0.4345700740814209, "learning_rate": 1.8269229562878875e-06, "loss": 0.0062, "step": 176920 }, { "epoch": 1.4939942158704693, "grad_norm": 0.2843278646469116, "learning_rate": 1.8263535077350285e-06, "loss": 0.0052, "step": 176930 }, { "epoch": 1.494078655717633, "grad_norm": 0.01954762451350689, "learning_rate": 1.825784128113121e-06, "loss": 0.0052, "step": 176940 }, { "epoch": 1.494163095564797, "grad_norm": 0.1294500231742859, "learning_rate": 1.8252148174345296e-06, "loss": 0.0065, "step": 176950 }, { "epoch": 1.4942475354119609, "grad_norm": 0.0009647057740949094, "learning_rate": 1.8246455757116238e-06, "loss": 0.0036, "step": 176960 }, { "epoch": 1.4943319752591249, "grad_norm": 0.6477166414260864, "learning_rate": 1.8240764029567654e-06, "loss": 0.0118, "step": 176970 }, { "epoch": 1.4944164151062886, "grad_norm": 0.0022123500239104033, "learning_rate": 1.823507299182315e-06, "loss": 0.0078, "step": 176980 }, { "epoch": 1.4945008549534524, "grad_norm": 0.3854433298110962, "learning_rate": 1.8229382644006367e-06, "loss": 0.0036, "step": 176990 }, { "epoch": 1.4945852948006164, "grad_norm": 0.2520790100097656, "learning_rate": 1.8223692986240887e-06, "loss": 0.0034, "step": 177000 }, { "epoch": 1.4946697346477804, "grad_norm": 0.08631137758493423, "learning_rate": 1.8218004018650277e-06, "loss": 0.0095, "step": 177010 }, { "epoch": 1.4947541744949442, "grad_norm": 0.16456376016139984, "learning_rate": 1.8212315741358095e-06, "loss": 0.0044, "step": 177020 }, { "epoch": 1.494838614342108, "grad_norm": 0.21660608053207397, "learning_rate": 1.8206628154487916e-06, "loss": 0.0087, "step": 177030 }, { "epoch": 1.494923054189272, "grad_norm": 0.41978439688682556, "learning_rate": 1.8200941258163241e-06, "loss": 0.0064, "step": 177040 }, { "epoch": 1.4950074940364357, "grad_norm": 0.09377728402614594, "learning_rate": 1.8195255052507626e-06, "loss": 0.005, "step": 177050 }, { "epoch": 1.4950919338835997, "grad_norm": 0.6167098879814148, "learning_rate": 1.8189569537644537e-06, "loss": 0.006, "step": 177060 }, { "epoch": 1.4951763737307635, "grad_norm": 0.025516334921121597, "learning_rate": 1.8183884713697492e-06, "loss": 0.0033, "step": 177070 }, { "epoch": 1.4952608135779275, "grad_norm": 0.11643707752227783, "learning_rate": 1.817820058078995e-06, "loss": 0.0055, "step": 177080 }, { "epoch": 1.4953452534250913, "grad_norm": 0.25893697142601013, "learning_rate": 1.8172517139045376e-06, "loss": 0.0057, "step": 177090 }, { "epoch": 1.4954296932722553, "grad_norm": 0.08727622032165527, "learning_rate": 1.8166834388587207e-06, "loss": 0.0094, "step": 177100 }, { "epoch": 1.495514133119419, "grad_norm": 0.021924281492829323, "learning_rate": 1.816115232953885e-06, "loss": 0.0049, "step": 177110 }, { "epoch": 1.4955985729665828, "grad_norm": 0.2772539556026459, "learning_rate": 1.8155470962023758e-06, "loss": 0.0053, "step": 177120 }, { "epoch": 1.4956830128137468, "grad_norm": 0.20327036082744598, "learning_rate": 1.8149790286165298e-06, "loss": 0.0066, "step": 177130 }, { "epoch": 1.4957674526609106, "grad_norm": 0.2234804630279541, "learning_rate": 1.814411030208688e-06, "loss": 0.0091, "step": 177140 }, { "epoch": 1.4958518925080746, "grad_norm": 0.34316545724868774, "learning_rate": 1.8138431009911844e-06, "loss": 0.0073, "step": 177150 }, { "epoch": 1.4959363323552384, "grad_norm": 0.42421984672546387, "learning_rate": 1.8132752409763571e-06, "loss": 0.0078, "step": 177160 }, { "epoch": 1.4960207722024024, "grad_norm": 0.05719650536775589, "learning_rate": 1.812707450176539e-06, "loss": 0.0078, "step": 177170 }, { "epoch": 1.4961052120495661, "grad_norm": 0.14940178394317627, "learning_rate": 1.8121397286040614e-06, "loss": 0.0065, "step": 177180 }, { "epoch": 1.4961896518967301, "grad_norm": 0.2296491414308548, "learning_rate": 1.8115720762712545e-06, "loss": 0.0135, "step": 177190 }, { "epoch": 1.496274091743894, "grad_norm": 0.22110970318317413, "learning_rate": 1.8110044931904503e-06, "loss": 0.0063, "step": 177200 }, { "epoch": 1.4963585315910577, "grad_norm": 0.2708217203617096, "learning_rate": 1.8104369793739746e-06, "loss": 0.0047, "step": 177210 }, { "epoch": 1.4964429714382217, "grad_norm": 0.39164918661117554, "learning_rate": 1.8098695348341528e-06, "loss": 0.0063, "step": 177220 }, { "epoch": 1.4965274112853857, "grad_norm": 0.9462864995002747, "learning_rate": 1.8093021595833127e-06, "loss": 0.005, "step": 177230 }, { "epoch": 1.4966118511325495, "grad_norm": 0.22334709763526917, "learning_rate": 1.808734853633774e-06, "loss": 0.0091, "step": 177240 }, { "epoch": 1.4966962909797132, "grad_norm": 0.09363444149494171, "learning_rate": 1.8081676169978617e-06, "loss": 0.0045, "step": 177250 }, { "epoch": 1.4967807308268772, "grad_norm": 0.38183602690696716, "learning_rate": 1.8076004496878953e-06, "loss": 0.0057, "step": 177260 }, { "epoch": 1.496865170674041, "grad_norm": 0.06738314032554626, "learning_rate": 1.8070333517161926e-06, "loss": 0.0055, "step": 177270 }, { "epoch": 1.496949610521205, "grad_norm": 0.1875198930501938, "learning_rate": 1.8064663230950696e-06, "loss": 0.0054, "step": 177280 }, { "epoch": 1.4970340503683688, "grad_norm": 0.3854020833969116, "learning_rate": 1.8058993638368455e-06, "loss": 0.0135, "step": 177290 }, { "epoch": 1.4971184902155328, "grad_norm": 0.0558084174990654, "learning_rate": 1.8053324739538307e-06, "loss": 0.0082, "step": 177300 }, { "epoch": 1.4972029300626966, "grad_norm": 0.0009157112799584866, "learning_rate": 1.804765653458342e-06, "loss": 0.0094, "step": 177310 }, { "epoch": 1.4972873699098606, "grad_norm": 0.21126198768615723, "learning_rate": 1.8041989023626883e-06, "loss": 0.0059, "step": 177320 }, { "epoch": 1.4973718097570243, "grad_norm": 0.358438640832901, "learning_rate": 1.8036322206791796e-06, "loss": 0.0044, "step": 177330 }, { "epoch": 1.497456249604188, "grad_norm": 0.2514943480491638, "learning_rate": 1.8030656084201236e-06, "loss": 0.0056, "step": 177340 }, { "epoch": 1.497540689451352, "grad_norm": 0.6273219585418701, "learning_rate": 1.8024990655978263e-06, "loss": 0.0102, "step": 177350 }, { "epoch": 1.497625129298516, "grad_norm": 0.3321569859981537, "learning_rate": 1.8019325922245956e-06, "loss": 0.0037, "step": 177360 }, { "epoch": 1.4977095691456799, "grad_norm": 0.3224666118621826, "learning_rate": 1.801366188312732e-06, "loss": 0.0054, "step": 177370 }, { "epoch": 1.4977940089928437, "grad_norm": 0.49755051732063293, "learning_rate": 1.8007998538745407e-06, "loss": 0.0039, "step": 177380 }, { "epoch": 1.4978784488400076, "grad_norm": 0.2693933844566345, "learning_rate": 1.80023358892232e-06, "loss": 0.0091, "step": 177390 }, { "epoch": 1.4979628886871714, "grad_norm": 0.25379347801208496, "learning_rate": 1.7996673934683712e-06, "loss": 0.0067, "step": 177400 }, { "epoch": 1.4980473285343354, "grad_norm": 0.17392617464065552, "learning_rate": 1.7991012675249913e-06, "loss": 0.0105, "step": 177410 }, { "epoch": 1.4981317683814992, "grad_norm": 0.18529777228832245, "learning_rate": 1.7985352111044757e-06, "loss": 0.0066, "step": 177420 }, { "epoch": 1.498216208228663, "grad_norm": 0.23832449316978455, "learning_rate": 1.7979692242191172e-06, "loss": 0.0084, "step": 177430 }, { "epoch": 1.498300648075827, "grad_norm": 0.05279122665524483, "learning_rate": 1.797403306881213e-06, "loss": 0.0073, "step": 177440 }, { "epoch": 1.498385087922991, "grad_norm": 0.2649533748626709, "learning_rate": 1.796837459103053e-06, "loss": 0.0059, "step": 177450 }, { "epoch": 1.4984695277701547, "grad_norm": 0.42859381437301636, "learning_rate": 1.796271680896925e-06, "loss": 0.0084, "step": 177460 }, { "epoch": 1.4985539676173185, "grad_norm": 0.13946476578712463, "learning_rate": 1.7957059722751208e-06, "loss": 0.0093, "step": 177470 }, { "epoch": 1.4986384074644825, "grad_norm": 0.27646082639694214, "learning_rate": 1.7951403332499252e-06, "loss": 0.0067, "step": 177480 }, { "epoch": 1.4987228473116463, "grad_norm": 0.4067254960536957, "learning_rate": 1.7945747638336263e-06, "loss": 0.0055, "step": 177490 }, { "epoch": 1.4988072871588103, "grad_norm": 0.2134573608636856, "learning_rate": 1.7940092640385066e-06, "loss": 0.0036, "step": 177500 }, { "epoch": 1.498891727005974, "grad_norm": 0.26709625124931335, "learning_rate": 1.7934438338768489e-06, "loss": 0.0036, "step": 177510 }, { "epoch": 1.498976166853138, "grad_norm": 0.42883580923080444, "learning_rate": 1.7928784733609321e-06, "loss": 0.0055, "step": 177520 }, { "epoch": 1.4990606067003018, "grad_norm": 0.3119252026081085, "learning_rate": 1.7923131825030399e-06, "loss": 0.0038, "step": 177530 }, { "epoch": 1.4991450465474658, "grad_norm": 0.08670633286237717, "learning_rate": 1.791747961315446e-06, "loss": 0.0036, "step": 177540 }, { "epoch": 1.4992294863946296, "grad_norm": 0.047412510961294174, "learning_rate": 1.7911828098104306e-06, "loss": 0.0103, "step": 177550 }, { "epoch": 1.4993139262417934, "grad_norm": 0.7850528359413147, "learning_rate": 1.7906177280002668e-06, "loss": 0.0089, "step": 177560 }, { "epoch": 1.4993983660889574, "grad_norm": 0.29923897981643677, "learning_rate": 1.790052715897227e-06, "loss": 0.0142, "step": 177570 }, { "epoch": 1.4994828059361214, "grad_norm": 0.16819946467876434, "learning_rate": 1.789487773513588e-06, "loss": 0.0079, "step": 177580 }, { "epoch": 1.4995672457832852, "grad_norm": 0.44716528058052063, "learning_rate": 1.7889229008616127e-06, "loss": 0.0075, "step": 177590 }, { "epoch": 1.499651685630449, "grad_norm": 0.5724977254867554, "learning_rate": 1.7883580979535758e-06, "loss": 0.0198, "step": 177600 }, { "epoch": 1.499736125477613, "grad_norm": 0.24805974960327148, "learning_rate": 1.7877933648017414e-06, "loss": 0.0044, "step": 177610 }, { "epoch": 1.4998205653247767, "grad_norm": 0.39290371537208557, "learning_rate": 1.7872287014183786e-06, "loss": 0.0061, "step": 177620 }, { "epoch": 1.4999050051719407, "grad_norm": 0.5264554023742676, "learning_rate": 1.7866641078157481e-06, "loss": 0.0097, "step": 177630 }, { "epoch": 1.4999894450191045, "grad_norm": 0.013208514079451561, "learning_rate": 1.7860995840061168e-06, "loss": 0.0113, "step": 177640 }, { "epoch": 1.5000738848662682, "grad_norm": 0.2908952832221985, "learning_rate": 1.7855351300017437e-06, "loss": 0.006, "step": 177650 }, { "epoch": 1.5001583247134322, "grad_norm": 0.19806423783302307, "learning_rate": 1.7849707458148884e-06, "loss": 0.0029, "step": 177660 }, { "epoch": 1.5002427645605962, "grad_norm": 0.33477941155433655, "learning_rate": 1.7844064314578085e-06, "loss": 0.0091, "step": 177670 }, { "epoch": 1.50032720440776, "grad_norm": 0.12884773313999176, "learning_rate": 1.7838421869427637e-06, "loss": 0.0046, "step": 177680 }, { "epoch": 1.5004116442549238, "grad_norm": 0.14063860476016998, "learning_rate": 1.7832780122820076e-06, "loss": 0.0059, "step": 177690 }, { "epoch": 1.5004960841020878, "grad_norm": 0.6109681129455566, "learning_rate": 1.782713907487792e-06, "loss": 0.0207, "step": 177700 }, { "epoch": 1.5005805239492518, "grad_norm": 0.00672922981902957, "learning_rate": 1.782149872572373e-06, "loss": 0.0115, "step": 177710 }, { "epoch": 1.5006649637964156, "grad_norm": 0.26242733001708984, "learning_rate": 1.781585907547997e-06, "loss": 0.0019, "step": 177720 }, { "epoch": 1.5007494036435793, "grad_norm": 0.1687518060207367, "learning_rate": 1.781022012426918e-06, "loss": 0.0038, "step": 177730 }, { "epoch": 1.5008338434907433, "grad_norm": 0.19377483427524567, "learning_rate": 1.7804581872213812e-06, "loss": 0.0054, "step": 177740 }, { "epoch": 1.500918283337907, "grad_norm": 0.4697199761867523, "learning_rate": 1.7798944319436323e-06, "loss": 0.008, "step": 177750 }, { "epoch": 1.501002723185071, "grad_norm": 0.3030054271221161, "learning_rate": 1.779330746605915e-06, "loss": 0.0074, "step": 177760 }, { "epoch": 1.5010871630322349, "grad_norm": 0.11413846164941788, "learning_rate": 1.7787671312204758e-06, "loss": 0.0063, "step": 177770 }, { "epoch": 1.5011716028793987, "grad_norm": 0.0040780119597911835, "learning_rate": 1.7782035857995527e-06, "loss": 0.0039, "step": 177780 }, { "epoch": 1.5012560427265627, "grad_norm": 0.21088619530200958, "learning_rate": 1.7776401103553891e-06, "loss": 0.0064, "step": 177790 }, { "epoch": 1.5013404825737267, "grad_norm": 0.09663679450750351, "learning_rate": 1.7770767049002218e-06, "loss": 0.0046, "step": 177800 }, { "epoch": 1.5014249224208904, "grad_norm": 0.4959263205528259, "learning_rate": 1.7765133694462866e-06, "loss": 0.0062, "step": 177810 }, { "epoch": 1.5015093622680542, "grad_norm": 0.2702939808368683, "learning_rate": 1.775950104005822e-06, "loss": 0.0082, "step": 177820 }, { "epoch": 1.5015938021152182, "grad_norm": 0.0532090850174427, "learning_rate": 1.7753869085910608e-06, "loss": 0.0072, "step": 177830 }, { "epoch": 1.5016782419623822, "grad_norm": 0.2310670167207718, "learning_rate": 1.7748237832142344e-06, "loss": 0.0068, "step": 177840 }, { "epoch": 1.501762681809546, "grad_norm": 0.2075091153383255, "learning_rate": 1.774260727887574e-06, "loss": 0.0035, "step": 177850 }, { "epoch": 1.5018471216567097, "grad_norm": 0.18962600827217102, "learning_rate": 1.7736977426233105e-06, "loss": 0.0112, "step": 177860 }, { "epoch": 1.5019315615038735, "grad_norm": 0.0885227620601654, "learning_rate": 1.773134827433669e-06, "loss": 0.008, "step": 177870 }, { "epoch": 1.5020160013510375, "grad_norm": 0.22725877165794373, "learning_rate": 1.77257198233088e-06, "loss": 0.0252, "step": 177880 }, { "epoch": 1.5021004411982015, "grad_norm": 1.8911651372909546, "learning_rate": 1.7720092073271643e-06, "loss": 0.0082, "step": 177890 }, { "epoch": 1.5021848810453653, "grad_norm": 0.12867403030395508, "learning_rate": 1.7714465024347505e-06, "loss": 0.0055, "step": 177900 }, { "epoch": 1.502269320892529, "grad_norm": 0.058805473148822784, "learning_rate": 1.7708838676658535e-06, "loss": 0.0065, "step": 177910 }, { "epoch": 1.502353760739693, "grad_norm": 0.07785753160715103, "learning_rate": 1.7703213030326994e-06, "loss": 0.0042, "step": 177920 }, { "epoch": 1.502438200586857, "grad_norm": 0.030492564663290977, "learning_rate": 1.7697588085475042e-06, "loss": 0.0061, "step": 177930 }, { "epoch": 1.5025226404340208, "grad_norm": 0.7631382346153259, "learning_rate": 1.7691963842224846e-06, "loss": 0.0057, "step": 177940 }, { "epoch": 1.5026070802811846, "grad_norm": 0.1963300257921219, "learning_rate": 1.768634030069859e-06, "loss": 0.0065, "step": 177950 }, { "epoch": 1.5026915201283486, "grad_norm": 0.2955942749977112, "learning_rate": 1.7680717461018382e-06, "loss": 0.0052, "step": 177960 }, { "epoch": 1.5027759599755126, "grad_norm": 0.18044282495975494, "learning_rate": 1.7675095323306385e-06, "loss": 0.0119, "step": 177970 }, { "epoch": 1.5028603998226764, "grad_norm": 0.38125887513160706, "learning_rate": 1.7669473887684695e-06, "loss": 0.0113, "step": 177980 }, { "epoch": 1.5029448396698402, "grad_norm": 0.12212861329317093, "learning_rate": 1.7663853154275406e-06, "loss": 0.0066, "step": 177990 }, { "epoch": 1.503029279517004, "grad_norm": 0.39982369542121887, "learning_rate": 1.7658233123200586e-06, "loss": 0.0067, "step": 178000 }, { "epoch": 1.503113719364168, "grad_norm": 0.36057260632514954, "learning_rate": 1.765261379458233e-06, "loss": 0.0064, "step": 178010 }, { "epoch": 1.503198159211332, "grad_norm": 0.7422138452529907, "learning_rate": 1.7646995168542653e-06, "loss": 0.0167, "step": 178020 }, { "epoch": 1.5032825990584957, "grad_norm": 0.4277142584323883, "learning_rate": 1.7641377245203633e-06, "loss": 0.0078, "step": 178030 }, { "epoch": 1.5033670389056595, "grad_norm": 0.25009259581565857, "learning_rate": 1.7635760024687266e-06, "loss": 0.0058, "step": 178040 }, { "epoch": 1.5034514787528235, "grad_norm": 0.13236060738563538, "learning_rate": 1.7630143507115543e-06, "loss": 0.0054, "step": 178050 }, { "epoch": 1.5035359185999875, "grad_norm": 0.23999102413654327, "learning_rate": 1.7624527692610487e-06, "loss": 0.0036, "step": 178060 }, { "epoch": 1.5036203584471513, "grad_norm": 0.016143180429935455, "learning_rate": 1.7618912581294057e-06, "loss": 0.005, "step": 178070 }, { "epoch": 1.503704798294315, "grad_norm": 0.35850971937179565, "learning_rate": 1.7613298173288208e-06, "loss": 0.0054, "step": 178080 }, { "epoch": 1.5037892381414788, "grad_norm": 0.3217639625072479, "learning_rate": 1.760768446871487e-06, "loss": 0.0044, "step": 178090 }, { "epoch": 1.5038736779886428, "grad_norm": 0.37029680609703064, "learning_rate": 1.7602071467696004e-06, "loss": 0.0052, "step": 178100 }, { "epoch": 1.5039581178358068, "grad_norm": 0.9143804907798767, "learning_rate": 1.7596459170353487e-06, "loss": 0.0058, "step": 178110 }, { "epoch": 1.5040425576829706, "grad_norm": 0.27345311641693115, "learning_rate": 1.7590847576809257e-06, "loss": 0.0048, "step": 178120 }, { "epoch": 1.5041269975301343, "grad_norm": 0.19979703426361084, "learning_rate": 1.7585236687185159e-06, "loss": 0.0061, "step": 178130 }, { "epoch": 1.5042114373772983, "grad_norm": 1.1803957223892212, "learning_rate": 1.7579626501603092e-06, "loss": 0.0117, "step": 178140 }, { "epoch": 1.5042958772244623, "grad_norm": 0.28651362657546997, "learning_rate": 1.7574017020184896e-06, "loss": 0.0115, "step": 178150 }, { "epoch": 1.5043803170716261, "grad_norm": 0.16006416082382202, "learning_rate": 1.756840824305241e-06, "loss": 0.0044, "step": 178160 }, { "epoch": 1.50446475691879, "grad_norm": 0.1270243227481842, "learning_rate": 1.7562800170327442e-06, "loss": 0.0046, "step": 178170 }, { "epoch": 1.504549196765954, "grad_norm": 0.1770380586385727, "learning_rate": 1.75571928021318e-06, "loss": 0.0076, "step": 178180 }, { "epoch": 1.5046336366131179, "grad_norm": 0.21863645315170288, "learning_rate": 1.755158613858729e-06, "loss": 0.0041, "step": 178190 }, { "epoch": 1.5047180764602817, "grad_norm": 0.08087597042322159, "learning_rate": 1.754598017981567e-06, "loss": 0.0105, "step": 178200 }, { "epoch": 1.5048025163074454, "grad_norm": 0.43173113465309143, "learning_rate": 1.7540374925938725e-06, "loss": 0.0097, "step": 178210 }, { "epoch": 1.5048869561546092, "grad_norm": 0.17255710065364838, "learning_rate": 1.753477037707818e-06, "loss": 0.0069, "step": 178220 }, { "epoch": 1.5049713960017732, "grad_norm": 0.44346240162849426, "learning_rate": 1.7529166533355779e-06, "loss": 0.0038, "step": 178230 }, { "epoch": 1.5050558358489372, "grad_norm": 0.0037693374324589968, "learning_rate": 1.75235633948932e-06, "loss": 0.0051, "step": 178240 }, { "epoch": 1.505140275696101, "grad_norm": 0.12907522916793823, "learning_rate": 1.7517960961812192e-06, "loss": 0.0051, "step": 178250 }, { "epoch": 1.5052247155432648, "grad_norm": 0.22496634721755981, "learning_rate": 1.7512359234234399e-06, "loss": 0.0075, "step": 178260 }, { "epoch": 1.5053091553904288, "grad_norm": 0.5104352831840515, "learning_rate": 1.7506758212281522e-06, "loss": 0.0067, "step": 178270 }, { "epoch": 1.5053935952375928, "grad_norm": 0.13137726485729218, "learning_rate": 1.75011578960752e-06, "loss": 0.0061, "step": 178280 }, { "epoch": 1.5054780350847565, "grad_norm": 0.785013735294342, "learning_rate": 1.7495558285737047e-06, "loss": 0.0095, "step": 178290 }, { "epoch": 1.5055624749319203, "grad_norm": 0.2670069932937622, "learning_rate": 1.7489959381388727e-06, "loss": 0.0054, "step": 178300 }, { "epoch": 1.5056469147790843, "grad_norm": 0.2315712422132492, "learning_rate": 1.7484361183151827e-06, "loss": 0.0084, "step": 178310 }, { "epoch": 1.505731354626248, "grad_norm": 0.4971472918987274, "learning_rate": 1.7478763691147937e-06, "loss": 0.0069, "step": 178320 }, { "epoch": 1.505815794473412, "grad_norm": 0.3121132254600525, "learning_rate": 1.7473166905498622e-06, "loss": 0.0052, "step": 178330 }, { "epoch": 1.5059002343205758, "grad_norm": 0.3512655794620514, "learning_rate": 1.7467570826325476e-06, "loss": 0.0094, "step": 178340 }, { "epoch": 1.5059846741677396, "grad_norm": 0.0522635243833065, "learning_rate": 1.7461975453750001e-06, "loss": 0.0094, "step": 178350 }, { "epoch": 1.5060691140149036, "grad_norm": 0.6611731052398682, "learning_rate": 1.7456380787893772e-06, "loss": 0.0174, "step": 178360 }, { "epoch": 1.5061535538620676, "grad_norm": 0.428382933139801, "learning_rate": 1.7450786828878263e-06, "loss": 0.0039, "step": 178370 }, { "epoch": 1.5062379937092314, "grad_norm": 0.3294414281845093, "learning_rate": 1.7445193576825015e-06, "loss": 0.0099, "step": 178380 }, { "epoch": 1.5063224335563952, "grad_norm": 0.19326700270175934, "learning_rate": 1.743960103185549e-06, "loss": 0.0071, "step": 178390 }, { "epoch": 1.5064068734035592, "grad_norm": 0.1310228854417801, "learning_rate": 1.7434009194091162e-06, "loss": 0.0059, "step": 178400 }, { "epoch": 1.5064913132507232, "grad_norm": 0.05616351217031479, "learning_rate": 1.7428418063653474e-06, "loss": 0.0077, "step": 178410 }, { "epoch": 1.506575753097887, "grad_norm": 0.00781372282654047, "learning_rate": 1.7422827640663858e-06, "loss": 0.007, "step": 178420 }, { "epoch": 1.5066601929450507, "grad_norm": 0.18528980016708374, "learning_rate": 1.7417237925243768e-06, "loss": 0.0222, "step": 178430 }, { "epoch": 1.5067446327922145, "grad_norm": 0.42930349707603455, "learning_rate": 1.741164891751458e-06, "loss": 0.0085, "step": 178440 }, { "epoch": 1.5068290726393785, "grad_norm": 0.35635173320770264, "learning_rate": 1.740606061759771e-06, "loss": 0.0099, "step": 178450 }, { "epoch": 1.5069135124865425, "grad_norm": 0.01627339981496334, "learning_rate": 1.7400473025614506e-06, "loss": 0.0077, "step": 178460 }, { "epoch": 1.5069979523337063, "grad_norm": 0.48912280797958374, "learning_rate": 1.7394886141686369e-06, "loss": 0.0057, "step": 178470 }, { "epoch": 1.50708239218087, "grad_norm": 0.2428792119026184, "learning_rate": 1.7389299965934626e-06, "loss": 0.0054, "step": 178480 }, { "epoch": 1.507166832028034, "grad_norm": 0.21652352809906006, "learning_rate": 1.7383714498480598e-06, "loss": 0.0089, "step": 178490 }, { "epoch": 1.507251271875198, "grad_norm": 0.4276844263076782, "learning_rate": 1.7378129739445593e-06, "loss": 0.0107, "step": 178500 }, { "epoch": 1.5073357117223618, "grad_norm": 0.400876522064209, "learning_rate": 1.7372545688950943e-06, "loss": 0.0027, "step": 178510 }, { "epoch": 1.5074201515695256, "grad_norm": 0.13923749327659607, "learning_rate": 1.7366962347117916e-06, "loss": 0.006, "step": 178520 }, { "epoch": 1.5075045914166896, "grad_norm": 0.011137212626636028, "learning_rate": 1.7361379714067761e-06, "loss": 0.0042, "step": 178530 }, { "epoch": 1.5075890312638536, "grad_norm": 0.3359355628490448, "learning_rate": 1.735579778992177e-06, "loss": 0.0062, "step": 178540 }, { "epoch": 1.5076734711110173, "grad_norm": 0.042328789830207825, "learning_rate": 1.7350216574801154e-06, "loss": 0.0095, "step": 178550 }, { "epoch": 1.5077579109581811, "grad_norm": 0.26546427607536316, "learning_rate": 1.7344636068827153e-06, "loss": 0.01, "step": 178560 }, { "epoch": 1.507842350805345, "grad_norm": 0.2456781417131424, "learning_rate": 1.733905627212094e-06, "loss": 0.0062, "step": 178570 }, { "epoch": 1.507926790652509, "grad_norm": 0.24363918602466583, "learning_rate": 1.7333477184803753e-06, "loss": 0.0037, "step": 178580 }, { "epoch": 1.508011230499673, "grad_norm": 1.0241082906723022, "learning_rate": 1.7327898806996728e-06, "loss": 0.0118, "step": 178590 }, { "epoch": 1.5080956703468367, "grad_norm": 0.11663303524255753, "learning_rate": 1.732232113882107e-06, "loss": 0.0036, "step": 178600 }, { "epoch": 1.5081801101940004, "grad_norm": 0.36379849910736084, "learning_rate": 1.7316744180397877e-06, "loss": 0.0102, "step": 178610 }, { "epoch": 1.5082645500411644, "grad_norm": 0.026347393169999123, "learning_rate": 1.7311167931848322e-06, "loss": 0.0043, "step": 178620 }, { "epoch": 1.5083489898883284, "grad_norm": 0.007415571715682745, "learning_rate": 1.7305592393293498e-06, "loss": 0.0041, "step": 178630 }, { "epoch": 1.5084334297354922, "grad_norm": 0.10095656663179398, "learning_rate": 1.730001756485451e-06, "loss": 0.0045, "step": 178640 }, { "epoch": 1.508517869582656, "grad_norm": 0.20463934540748596, "learning_rate": 1.7294443446652436e-06, "loss": 0.0042, "step": 178650 }, { "epoch": 1.5086023094298198, "grad_norm": 0.48569557070732117, "learning_rate": 1.7288870038808336e-06, "loss": 0.0069, "step": 178660 }, { "epoch": 1.5086867492769838, "grad_norm": 0.17575916647911072, "learning_rate": 1.7283297341443294e-06, "loss": 0.006, "step": 178670 }, { "epoch": 1.5087711891241478, "grad_norm": 0.1903999149799347, "learning_rate": 1.727772535467831e-06, "loss": 0.0118, "step": 178680 }, { "epoch": 1.5088556289713115, "grad_norm": 0.04950203001499176, "learning_rate": 1.727215407863444e-06, "loss": 0.0039, "step": 178690 }, { "epoch": 1.5089400688184753, "grad_norm": 0.2017175406217575, "learning_rate": 1.7266583513432662e-06, "loss": 0.0073, "step": 178700 }, { "epoch": 1.5090245086656393, "grad_norm": 0.22587953507900238, "learning_rate": 1.7261013659194003e-06, "loss": 0.0066, "step": 178710 }, { "epoch": 1.5091089485128033, "grad_norm": 0.3539923429489136, "learning_rate": 1.7255444516039415e-06, "loss": 0.0057, "step": 178720 }, { "epoch": 1.509193388359967, "grad_norm": 0.14327681064605713, "learning_rate": 1.7249876084089862e-06, "loss": 0.0048, "step": 178730 }, { "epoch": 1.5092778282071309, "grad_norm": 0.1106756180524826, "learning_rate": 1.724430836346629e-06, "loss": 0.0081, "step": 178740 }, { "epoch": 1.5093622680542949, "grad_norm": 0.013245422393083572, "learning_rate": 1.7238741354289606e-06, "loss": 0.0067, "step": 178750 }, { "epoch": 1.5094467079014589, "grad_norm": 0.3365938365459442, "learning_rate": 1.7233175056680773e-06, "loss": 0.0076, "step": 178760 }, { "epoch": 1.5095311477486226, "grad_norm": 0.13584092259407043, "learning_rate": 1.7227609470760641e-06, "loss": 0.0044, "step": 178770 }, { "epoch": 1.5096155875957864, "grad_norm": 0.5771170854568481, "learning_rate": 1.7222044596650134e-06, "loss": 0.005, "step": 178780 }, { "epoch": 1.5097000274429502, "grad_norm": 0.1675954908132553, "learning_rate": 1.721648043447008e-06, "loss": 0.002, "step": 178790 }, { "epoch": 1.5097844672901142, "grad_norm": 0.4787731170654297, "learning_rate": 1.7210916984341392e-06, "loss": 0.0077, "step": 178800 }, { "epoch": 1.5098689071372782, "grad_norm": 0.5248401165008545, "learning_rate": 1.7205354246384831e-06, "loss": 0.0054, "step": 178810 }, { "epoch": 1.509953346984442, "grad_norm": 0.6271154880523682, "learning_rate": 1.7199792220721272e-06, "loss": 0.0081, "step": 178820 }, { "epoch": 1.5100377868316057, "grad_norm": 0.39074909687042236, "learning_rate": 1.7194230907471492e-06, "loss": 0.0025, "step": 178830 }, { "epoch": 1.5101222266787697, "grad_norm": 0.4354795217514038, "learning_rate": 1.7188670306756312e-06, "loss": 0.0067, "step": 178840 }, { "epoch": 1.5102066665259337, "grad_norm": 0.16728155314922333, "learning_rate": 1.7183110418696465e-06, "loss": 0.0049, "step": 178850 }, { "epoch": 1.5102911063730975, "grad_norm": 0.05353054031729698, "learning_rate": 1.7177551243412754e-06, "loss": 0.0039, "step": 178860 }, { "epoch": 1.5103755462202613, "grad_norm": 0.5618659257888794, "learning_rate": 1.717199278102591e-06, "loss": 0.0142, "step": 178870 }, { "epoch": 1.5104599860674253, "grad_norm": 0.07617322355508804, "learning_rate": 1.7166435031656648e-06, "loss": 0.0031, "step": 178880 }, { "epoch": 1.510544425914589, "grad_norm": 0.33604225516319275, "learning_rate": 1.7160877995425696e-06, "loss": 0.0049, "step": 178890 }, { "epoch": 1.510628865761753, "grad_norm": 0.28602543473243713, "learning_rate": 1.7155321672453724e-06, "loss": 0.0128, "step": 178900 }, { "epoch": 1.5107133056089168, "grad_norm": 0.33077412843704224, "learning_rate": 1.714976606286145e-06, "loss": 0.0061, "step": 178910 }, { "epoch": 1.5107977454560806, "grad_norm": 0.15020154416561127, "learning_rate": 1.7144211166769508e-06, "loss": 0.0052, "step": 178920 }, { "epoch": 1.5108821853032446, "grad_norm": 0.19328223168849945, "learning_rate": 1.7138656984298585e-06, "loss": 0.007, "step": 178930 }, { "epoch": 1.5109666251504086, "grad_norm": 0.19484075903892517, "learning_rate": 1.7133103515569283e-06, "loss": 0.0034, "step": 178940 }, { "epoch": 1.5110510649975724, "grad_norm": 0.09358223527669907, "learning_rate": 1.7127550760702254e-06, "loss": 0.003, "step": 178950 }, { "epoch": 1.5111355048447361, "grad_norm": 0.3044334352016449, "learning_rate": 1.712199871981809e-06, "loss": 0.0058, "step": 178960 }, { "epoch": 1.5112199446919001, "grad_norm": 0.16103920340538025, "learning_rate": 1.7116447393037372e-06, "loss": 0.0067, "step": 178970 }, { "epoch": 1.5113043845390641, "grad_norm": 0.3850385844707489, "learning_rate": 1.7110896780480673e-06, "loss": 0.0072, "step": 178980 }, { "epoch": 1.511388824386228, "grad_norm": 0.1241820752620697, "learning_rate": 1.7105346882268547e-06, "loss": 0.0061, "step": 178990 }, { "epoch": 1.5114732642333917, "grad_norm": 0.03724660724401474, "learning_rate": 1.7099797698521564e-06, "loss": 0.0105, "step": 179000 }, { "epoch": 1.5115577040805555, "grad_norm": 0.24938175082206726, "learning_rate": 1.709424922936021e-06, "loss": 0.0038, "step": 179010 }, { "epoch": 1.5116421439277195, "grad_norm": 0.41554954648017883, "learning_rate": 1.708870147490504e-06, "loss": 0.0051, "step": 179020 }, { "epoch": 1.5117265837748834, "grad_norm": 0.3847859501838684, "learning_rate": 1.7083154435276512e-06, "loss": 0.0039, "step": 179030 }, { "epoch": 1.5118110236220472, "grad_norm": 0.2342602014541626, "learning_rate": 1.7077608110595145e-06, "loss": 0.0057, "step": 179040 }, { "epoch": 1.511895463469211, "grad_norm": 0.28564849495887756, "learning_rate": 1.7072062500981378e-06, "loss": 0.0082, "step": 179050 }, { "epoch": 1.511979903316375, "grad_norm": 0.36025604605674744, "learning_rate": 1.7066517606555667e-06, "loss": 0.007, "step": 179060 }, { "epoch": 1.512064343163539, "grad_norm": 0.49092668294906616, "learning_rate": 1.7060973427438431e-06, "loss": 0.0103, "step": 179070 }, { "epoch": 1.5121487830107028, "grad_norm": 0.35714036226272583, "learning_rate": 1.7055429963750115e-06, "loss": 0.0068, "step": 179080 }, { "epoch": 1.5122332228578665, "grad_norm": 0.21519559621810913, "learning_rate": 1.7049887215611094e-06, "loss": 0.0092, "step": 179090 }, { "epoch": 1.5123176627050305, "grad_norm": 0.7916640043258667, "learning_rate": 1.7044345183141791e-06, "loss": 0.0055, "step": 179100 }, { "epoch": 1.5124021025521945, "grad_norm": 0.23572075366973877, "learning_rate": 1.7038803866462555e-06, "loss": 0.0026, "step": 179110 }, { "epoch": 1.5124865423993583, "grad_norm": 0.17052143812179565, "learning_rate": 1.7033263265693745e-06, "loss": 0.0067, "step": 179120 }, { "epoch": 1.512570982246522, "grad_norm": 0.341373473405838, "learning_rate": 1.7027723380955697e-06, "loss": 0.0041, "step": 179130 }, { "epoch": 1.5126554220936859, "grad_norm": 0.2435263693332672, "learning_rate": 1.7022184212368732e-06, "loss": 0.0052, "step": 179140 }, { "epoch": 1.5127398619408499, "grad_norm": 0.21489112079143524, "learning_rate": 1.7016645760053174e-06, "loss": 0.0041, "step": 179150 }, { "epoch": 1.5128243017880139, "grad_norm": 0.6554380059242249, "learning_rate": 1.7011108024129302e-06, "loss": 0.0066, "step": 179160 }, { "epoch": 1.5129087416351776, "grad_norm": 0.15543875098228455, "learning_rate": 1.700557100471742e-06, "loss": 0.009, "step": 179170 }, { "epoch": 1.5129931814823414, "grad_norm": 0.4524429440498352, "learning_rate": 1.7000034701937756e-06, "loss": 0.0072, "step": 179180 }, { "epoch": 1.5130776213295054, "grad_norm": 0.1319241225719452, "learning_rate": 1.6994499115910595e-06, "loss": 0.0032, "step": 179190 }, { "epoch": 1.5131620611766694, "grad_norm": 0.3339824378490448, "learning_rate": 1.6988964246756145e-06, "loss": 0.0074, "step": 179200 }, { "epoch": 1.5132465010238332, "grad_norm": 0.023392729461193085, "learning_rate": 1.6983430094594632e-06, "loss": 0.0135, "step": 179210 }, { "epoch": 1.513330940870997, "grad_norm": 0.476606160402298, "learning_rate": 1.697789665954625e-06, "loss": 0.0078, "step": 179220 }, { "epoch": 1.513415380718161, "grad_norm": 0.46539533138275146, "learning_rate": 1.6972363941731167e-06, "loss": 0.0068, "step": 179230 }, { "epoch": 1.5134998205653247, "grad_norm": 0.02996796742081642, "learning_rate": 1.696683194126959e-06, "loss": 0.0063, "step": 179240 }, { "epoch": 1.5135842604124887, "grad_norm": 0.3053198456764221, "learning_rate": 1.6961300658281637e-06, "loss": 0.005, "step": 179250 }, { "epoch": 1.5136687002596525, "grad_norm": 0.326327919960022, "learning_rate": 1.695577009288748e-06, "loss": 0.0054, "step": 179260 }, { "epoch": 1.5137531401068163, "grad_norm": 0.08598463982343674, "learning_rate": 1.6950240245207211e-06, "loss": 0.0047, "step": 179270 }, { "epoch": 1.5138375799539803, "grad_norm": 0.24848559498786926, "learning_rate": 1.6944711115360962e-06, "loss": 0.0087, "step": 179280 }, { "epoch": 1.5139220198011443, "grad_norm": 0.0360129289329052, "learning_rate": 1.693918270346882e-06, "loss": 0.0076, "step": 179290 }, { "epoch": 1.514006459648308, "grad_norm": 0.0021191479172557592, "learning_rate": 1.6933655009650852e-06, "loss": 0.0084, "step": 179300 }, { "epoch": 1.5140908994954718, "grad_norm": 0.33128103613853455, "learning_rate": 1.6928128034027103e-06, "loss": 0.0049, "step": 179310 }, { "epoch": 1.5141753393426358, "grad_norm": 0.17942658066749573, "learning_rate": 1.6922601776717657e-06, "loss": 0.0129, "step": 179320 }, { "epoch": 1.5142597791897998, "grad_norm": 0.10602273792028427, "learning_rate": 1.6917076237842518e-06, "loss": 0.0043, "step": 179330 }, { "epoch": 1.5143442190369636, "grad_norm": 0.5092529654502869, "learning_rate": 1.6911551417521689e-06, "loss": 0.0059, "step": 179340 }, { "epoch": 1.5144286588841274, "grad_norm": 0.5306909680366516, "learning_rate": 1.69060273158752e-06, "loss": 0.0098, "step": 179350 }, { "epoch": 1.5145130987312911, "grad_norm": 0.21360963582992554, "learning_rate": 1.6900503933023e-06, "loss": 0.012, "step": 179360 }, { "epoch": 1.5145975385784551, "grad_norm": 0.5526509284973145, "learning_rate": 1.6894981269085088e-06, "loss": 0.0045, "step": 179370 }, { "epoch": 1.5146819784256191, "grad_norm": 0.1878785938024521, "learning_rate": 1.6889459324181395e-06, "loss": 0.006, "step": 179380 }, { "epoch": 1.514766418272783, "grad_norm": 0.2116079181432724, "learning_rate": 1.6883938098431862e-06, "loss": 0.005, "step": 179390 }, { "epoch": 1.5148508581199467, "grad_norm": 0.19629140198230743, "learning_rate": 1.6878417591956391e-06, "loss": 0.0022, "step": 179400 }, { "epoch": 1.5149352979671107, "grad_norm": 0.0010784867918118834, "learning_rate": 1.6872897804874921e-06, "loss": 0.0061, "step": 179410 }, { "epoch": 1.5150197378142747, "grad_norm": 0.33144411444664, "learning_rate": 1.686737873730731e-06, "loss": 0.0063, "step": 179420 }, { "epoch": 1.5151041776614385, "grad_norm": 0.9840737581253052, "learning_rate": 1.6861860389373453e-06, "loss": 0.0077, "step": 179430 }, { "epoch": 1.5151886175086022, "grad_norm": 0.035200100392103195, "learning_rate": 1.68563427611932e-06, "loss": 0.0031, "step": 179440 }, { "epoch": 1.5152730573557662, "grad_norm": 0.5687510967254639, "learning_rate": 1.6850825852886394e-06, "loss": 0.0117, "step": 179450 }, { "epoch": 1.5153574972029302, "grad_norm": 0.3011528551578522, "learning_rate": 1.6845309664572852e-06, "loss": 0.0049, "step": 179460 }, { "epoch": 1.515441937050094, "grad_norm": 0.5991604924201965, "learning_rate": 1.6839794196372372e-06, "loss": 0.007, "step": 179470 }, { "epoch": 1.5155263768972578, "grad_norm": 0.028844639658927917, "learning_rate": 1.6834279448404788e-06, "loss": 0.0048, "step": 179480 }, { "epoch": 1.5156108167444216, "grad_norm": 0.048613689839839935, "learning_rate": 1.682876542078984e-06, "loss": 0.0032, "step": 179490 }, { "epoch": 1.5156952565915855, "grad_norm": 0.052958548069000244, "learning_rate": 1.6823252113647326e-06, "loss": 0.0075, "step": 179500 }, { "epoch": 1.5157796964387495, "grad_norm": 0.7330794930458069, "learning_rate": 1.6817739527096966e-06, "loss": 0.0081, "step": 179510 }, { "epoch": 1.5158641362859133, "grad_norm": 0.2479766607284546, "learning_rate": 1.6812227661258512e-06, "loss": 0.0067, "step": 179520 }, { "epoch": 1.515948576133077, "grad_norm": 0.19754888117313385, "learning_rate": 1.6806716516251675e-06, "loss": 0.0102, "step": 179530 }, { "epoch": 1.516033015980241, "grad_norm": 0.5003214478492737, "learning_rate": 1.6801206092196159e-06, "loss": 0.0082, "step": 179540 }, { "epoch": 1.516117455827405, "grad_norm": 0.16396678984165192, "learning_rate": 1.6795696389211618e-06, "loss": 0.005, "step": 179550 }, { "epoch": 1.5162018956745689, "grad_norm": 0.9580753445625305, "learning_rate": 1.6790187407417774e-06, "loss": 0.0183, "step": 179560 }, { "epoch": 1.5162863355217326, "grad_norm": 0.5669896006584167, "learning_rate": 1.6784679146934246e-06, "loss": 0.0075, "step": 179570 }, { "epoch": 1.5163707753688964, "grad_norm": 0.02116372436285019, "learning_rate": 1.6779171607880668e-06, "loss": 0.0049, "step": 179580 }, { "epoch": 1.5164552152160604, "grad_norm": 0.06342777609825134, "learning_rate": 1.6773664790376687e-06, "loss": 0.0054, "step": 179590 }, { "epoch": 1.5165396550632244, "grad_norm": 0.05658808350563049, "learning_rate": 1.6768158694541886e-06, "loss": 0.0066, "step": 179600 }, { "epoch": 1.5166240949103882, "grad_norm": 0.7522358894348145, "learning_rate": 1.6762653320495887e-06, "loss": 0.0072, "step": 179610 }, { "epoch": 1.516708534757552, "grad_norm": 0.2500704824924469, "learning_rate": 1.6757148668358242e-06, "loss": 0.0086, "step": 179620 }, { "epoch": 1.516792974604716, "grad_norm": 0.3600168824195862, "learning_rate": 1.6751644738248513e-06, "loss": 0.0061, "step": 179630 }, { "epoch": 1.51687741445188, "grad_norm": 0.008441085927188396, "learning_rate": 1.6746141530286236e-06, "loss": 0.0047, "step": 179640 }, { "epoch": 1.5169618542990437, "grad_norm": 0.3344208002090454, "learning_rate": 1.6740639044590966e-06, "loss": 0.0072, "step": 179650 }, { "epoch": 1.5170462941462075, "grad_norm": 0.16215580701828003, "learning_rate": 1.6735137281282183e-06, "loss": 0.0028, "step": 179660 }, { "epoch": 1.5171307339933715, "grad_norm": 0.33826136589050293, "learning_rate": 1.672963624047942e-06, "loss": 0.0105, "step": 179670 }, { "epoch": 1.5172151738405355, "grad_norm": 0.14488860964775085, "learning_rate": 1.672413592230212e-06, "loss": 0.0077, "step": 179680 }, { "epoch": 1.5172996136876993, "grad_norm": 0.12038686871528625, "learning_rate": 1.6718636326869791e-06, "loss": 0.0068, "step": 179690 }, { "epoch": 1.517384053534863, "grad_norm": 0.6749753355979919, "learning_rate": 1.6713137454301875e-06, "loss": 0.0065, "step": 179700 }, { "epoch": 1.5174684933820268, "grad_norm": 0.29417410492897034, "learning_rate": 1.670763930471776e-06, "loss": 0.0043, "step": 179710 }, { "epoch": 1.5175529332291908, "grad_norm": 0.22362622618675232, "learning_rate": 1.670214187823691e-06, "loss": 0.0051, "step": 179720 }, { "epoch": 1.5176373730763548, "grad_norm": 0.24391040205955505, "learning_rate": 1.6696645174978699e-06, "loss": 0.008, "step": 179730 }, { "epoch": 1.5177218129235186, "grad_norm": 0.3919472098350525, "learning_rate": 1.6691149195062551e-06, "loss": 0.0074, "step": 179740 }, { "epoch": 1.5178062527706824, "grad_norm": 0.14191178977489471, "learning_rate": 1.6685653938607798e-06, "loss": 0.0091, "step": 179750 }, { "epoch": 1.5178906926178464, "grad_norm": 0.05788086727261543, "learning_rate": 1.668015940573383e-06, "loss": 0.0029, "step": 179760 }, { "epoch": 1.5179751324650104, "grad_norm": 0.022678107023239136, "learning_rate": 1.6674665596559974e-06, "loss": 0.0053, "step": 179770 }, { "epoch": 1.5180595723121741, "grad_norm": 0.012475046329200268, "learning_rate": 1.6669172511205551e-06, "loss": 0.0086, "step": 179780 }, { "epoch": 1.518144012159338, "grad_norm": 0.15886741876602173, "learning_rate": 1.6663680149789857e-06, "loss": 0.0057, "step": 179790 }, { "epoch": 1.518228452006502, "grad_norm": 0.09898660331964493, "learning_rate": 1.6658188512432212e-06, "loss": 0.0059, "step": 179800 }, { "epoch": 1.5183128918536657, "grad_norm": 0.007997593842446804, "learning_rate": 1.6652697599251883e-06, "loss": 0.0075, "step": 179810 }, { "epoch": 1.5183973317008297, "grad_norm": 0.34582340717315674, "learning_rate": 1.6647207410368104e-06, "loss": 0.0079, "step": 179820 }, { "epoch": 1.5184817715479935, "grad_norm": 0.04509438946843147, "learning_rate": 1.6641717945900172e-06, "loss": 0.0034, "step": 179830 }, { "epoch": 1.5185662113951572, "grad_norm": 0.9971319437026978, "learning_rate": 1.6636229205967268e-06, "loss": 0.0088, "step": 179840 }, { "epoch": 1.5186506512423212, "grad_norm": 0.07826360315084457, "learning_rate": 1.6630741190688649e-06, "loss": 0.0063, "step": 179850 }, { "epoch": 1.5187350910894852, "grad_norm": 0.026205459609627724, "learning_rate": 1.662525390018349e-06, "loss": 0.005, "step": 179860 }, { "epoch": 1.518819530936649, "grad_norm": 0.33526816964149475, "learning_rate": 1.661976733457098e-06, "loss": 0.0068, "step": 179870 }, { "epoch": 1.5189039707838128, "grad_norm": 0.160208061337471, "learning_rate": 1.6614281493970264e-06, "loss": 0.0112, "step": 179880 }, { "epoch": 1.5189884106309768, "grad_norm": 0.24012009799480438, "learning_rate": 1.660879637850053e-06, "loss": 0.0089, "step": 179890 }, { "epoch": 1.5190728504781408, "grad_norm": 0.1807190626859665, "learning_rate": 1.6603311988280885e-06, "loss": 0.0058, "step": 179900 }, { "epoch": 1.5191572903253046, "grad_norm": 0.35763710737228394, "learning_rate": 1.659782832343047e-06, "loss": 0.0048, "step": 179910 }, { "epoch": 1.5192417301724683, "grad_norm": 0.17158964276313782, "learning_rate": 1.6592345384068366e-06, "loss": 0.0072, "step": 179920 }, { "epoch": 1.519326170019632, "grad_norm": 0.5175285935401917, "learning_rate": 1.658686317031369e-06, "loss": 0.0046, "step": 179930 }, { "epoch": 1.519410609866796, "grad_norm": 0.0856962725520134, "learning_rate": 1.6581381682285496e-06, "loss": 0.0077, "step": 179940 }, { "epoch": 1.51949504971396, "grad_norm": 0.45959702134132385, "learning_rate": 1.6575900920102843e-06, "loss": 0.0128, "step": 179950 }, { "epoch": 1.5195794895611239, "grad_norm": 0.3491264283657074, "learning_rate": 1.657042088388478e-06, "loss": 0.0093, "step": 179960 }, { "epoch": 1.5196639294082877, "grad_norm": 0.2671937644481659, "learning_rate": 1.6564941573750304e-06, "loss": 0.0061, "step": 179970 }, { "epoch": 1.5197483692554516, "grad_norm": 0.5105799436569214, "learning_rate": 1.655946298981846e-06, "loss": 0.0097, "step": 179980 }, { "epoch": 1.5198328091026156, "grad_norm": 0.1810491383075714, "learning_rate": 1.6553985132208218e-06, "loss": 0.0039, "step": 179990 }, { "epoch": 1.5199172489497794, "grad_norm": 0.39841940999031067, "learning_rate": 1.6548508001038571e-06, "loss": 0.0116, "step": 180000 }, { "epoch": 1.5200016887969432, "grad_norm": 0.4545797109603882, "learning_rate": 1.6543031596428467e-06, "loss": 0.003, "step": 180010 }, { "epoch": 1.5200861286441072, "grad_norm": 0.04692938178777695, "learning_rate": 1.653755591849689e-06, "loss": 0.0048, "step": 180020 }, { "epoch": 1.5201705684912712, "grad_norm": 0.14972032606601715, "learning_rate": 1.6532080967362712e-06, "loss": 0.0062, "step": 180030 }, { "epoch": 1.520255008338435, "grad_norm": 0.3690081834793091, "learning_rate": 1.652660674314489e-06, "loss": 0.0086, "step": 180040 }, { "epoch": 1.5203394481855987, "grad_norm": 0.5201815962791443, "learning_rate": 1.6521133245962312e-06, "loss": 0.0071, "step": 180050 }, { "epoch": 1.5204238880327625, "grad_norm": 0.6726642847061157, "learning_rate": 1.6515660475933842e-06, "loss": 0.0083, "step": 180060 }, { "epoch": 1.5205083278799265, "grad_norm": 0.10679678618907928, "learning_rate": 1.6510188433178381e-06, "loss": 0.0109, "step": 180070 }, { "epoch": 1.5205927677270905, "grad_norm": 0.022644367069005966, "learning_rate": 1.650471711781475e-06, "loss": 0.0049, "step": 180080 }, { "epoch": 1.5206772075742543, "grad_norm": 0.006402365863323212, "learning_rate": 1.6499246529961816e-06, "loss": 0.0062, "step": 180090 }, { "epoch": 1.520761647421418, "grad_norm": 0.40099799633026123, "learning_rate": 1.6493776669738381e-06, "loss": 0.003, "step": 180100 }, { "epoch": 1.520846087268582, "grad_norm": 0.14470359683036804, "learning_rate": 1.6488307537263249e-06, "loss": 0.0046, "step": 180110 }, { "epoch": 1.520930527115746, "grad_norm": 0.11694858968257904, "learning_rate": 1.6482839132655194e-06, "loss": 0.0047, "step": 180120 }, { "epoch": 1.5210149669629098, "grad_norm": 0.16050344705581665, "learning_rate": 1.6477371456033025e-06, "loss": 0.0093, "step": 180130 }, { "epoch": 1.5210994068100736, "grad_norm": 0.5986270904541016, "learning_rate": 1.6471904507515456e-06, "loss": 0.0092, "step": 180140 }, { "epoch": 1.5211838466572376, "grad_norm": 0.1818699836730957, "learning_rate": 1.646643828722127e-06, "loss": 0.0061, "step": 180150 }, { "epoch": 1.5212682865044014, "grad_norm": 0.0560908280313015, "learning_rate": 1.6460972795269176e-06, "loss": 0.0059, "step": 180160 }, { "epoch": 1.5213527263515654, "grad_norm": 0.25896111130714417, "learning_rate": 1.6455508031777862e-06, "loss": 0.0043, "step": 180170 }, { "epoch": 1.5214371661987292, "grad_norm": 0.34996864199638367, "learning_rate": 1.6450043996866054e-06, "loss": 0.0127, "step": 180180 }, { "epoch": 1.521521606045893, "grad_norm": 0.4394262731075287, "learning_rate": 1.6444580690652418e-06, "loss": 0.0082, "step": 180190 }, { "epoch": 1.521606045893057, "grad_norm": 0.3263855278491974, "learning_rate": 1.643911811325561e-06, "loss": 0.0038, "step": 180200 }, { "epoch": 1.521690485740221, "grad_norm": 0.14708806574344635, "learning_rate": 1.6433656264794262e-06, "loss": 0.0047, "step": 180210 }, { "epoch": 1.5217749255873847, "grad_norm": 0.2622779905796051, "learning_rate": 1.6428195145387043e-06, "loss": 0.0054, "step": 180220 }, { "epoch": 1.5218593654345485, "grad_norm": 0.3329395651817322, "learning_rate": 1.6422734755152524e-06, "loss": 0.0069, "step": 180230 }, { "epoch": 1.5219438052817125, "grad_norm": 0.019547374919056892, "learning_rate": 1.641727509420935e-06, "loss": 0.0072, "step": 180240 }, { "epoch": 1.5220282451288765, "grad_norm": 0.020750749856233597, "learning_rate": 1.6411816162676053e-06, "loss": 0.0066, "step": 180250 }, { "epoch": 1.5221126849760402, "grad_norm": 0.1498749852180481, "learning_rate": 1.6406357960671249e-06, "loss": 0.0037, "step": 180260 }, { "epoch": 1.522197124823204, "grad_norm": 0.1615142524242401, "learning_rate": 1.6400900488313464e-06, "loss": 0.0037, "step": 180270 }, { "epoch": 1.5222815646703678, "grad_norm": 0.27161502838134766, "learning_rate": 1.6395443745721235e-06, "loss": 0.0045, "step": 180280 }, { "epoch": 1.5223660045175318, "grad_norm": 0.25651779770851135, "learning_rate": 1.6389987733013085e-06, "loss": 0.0063, "step": 180290 }, { "epoch": 1.5224504443646958, "grad_norm": 0.36613908410072327, "learning_rate": 1.6384532450307494e-06, "loss": 0.0108, "step": 180300 }, { "epoch": 1.5225348842118596, "grad_norm": 0.684516429901123, "learning_rate": 1.6379077897722984e-06, "loss": 0.0119, "step": 180310 }, { "epoch": 1.5226193240590233, "grad_norm": 0.21726621687412262, "learning_rate": 1.6373624075378004e-06, "loss": 0.0062, "step": 180320 }, { "epoch": 1.5227037639061873, "grad_norm": 0.5247267484664917, "learning_rate": 1.636817098339103e-06, "loss": 0.0049, "step": 180330 }, { "epoch": 1.5227882037533513, "grad_norm": 0.30675962567329407, "learning_rate": 1.6362718621880486e-06, "loss": 0.0037, "step": 180340 }, { "epoch": 1.522872643600515, "grad_norm": 0.09532471746206284, "learning_rate": 1.6357266990964805e-06, "loss": 0.0088, "step": 180350 }, { "epoch": 1.5229570834476789, "grad_norm": 0.28831803798675537, "learning_rate": 1.6351816090762373e-06, "loss": 0.0026, "step": 180360 }, { "epoch": 1.5230415232948429, "grad_norm": 0.2082306146621704, "learning_rate": 1.6346365921391622e-06, "loss": 0.0039, "step": 180370 }, { "epoch": 1.5231259631420069, "grad_norm": 0.4562481939792633, "learning_rate": 1.6340916482970887e-06, "loss": 0.0082, "step": 180380 }, { "epoch": 1.5232104029891707, "grad_norm": 0.3807069957256317, "learning_rate": 1.6335467775618563e-06, "loss": 0.0065, "step": 180390 }, { "epoch": 1.5232948428363344, "grad_norm": 0.44970476627349854, "learning_rate": 1.6330019799452985e-06, "loss": 0.0095, "step": 180400 }, { "epoch": 1.5233792826834982, "grad_norm": 0.1470201015472412, "learning_rate": 1.6324572554592455e-06, "loss": 0.0095, "step": 180410 }, { "epoch": 1.5234637225306622, "grad_norm": 0.5007562041282654, "learning_rate": 1.6319126041155325e-06, "loss": 0.0106, "step": 180420 }, { "epoch": 1.5235481623778262, "grad_norm": 0.2754678428173065, "learning_rate": 1.6313680259259878e-06, "loss": 0.0075, "step": 180430 }, { "epoch": 1.52363260222499, "grad_norm": 0.9701370596885681, "learning_rate": 1.6308235209024398e-06, "loss": 0.0084, "step": 180440 }, { "epoch": 1.5237170420721537, "grad_norm": 0.32346656918525696, "learning_rate": 1.6302790890567122e-06, "loss": 0.0049, "step": 180450 }, { "epoch": 1.5238014819193177, "grad_norm": 0.21680013835430145, "learning_rate": 1.629734730400634e-06, "loss": 0.0079, "step": 180460 }, { "epoch": 1.5238859217664817, "grad_norm": 0.3867475390434265, "learning_rate": 1.6291904449460255e-06, "loss": 0.0061, "step": 180470 }, { "epoch": 1.5239703616136455, "grad_norm": 0.19052232801914215, "learning_rate": 1.6286462327047108e-06, "loss": 0.0048, "step": 180480 }, { "epoch": 1.5240548014608093, "grad_norm": 0.5283961296081543, "learning_rate": 1.6281020936885078e-06, "loss": 0.0091, "step": 180490 }, { "epoch": 1.524139241307973, "grad_norm": 0.12101590633392334, "learning_rate": 1.6275580279092384e-06, "loss": 0.0038, "step": 180500 }, { "epoch": 1.524223681155137, "grad_norm": 0.03816083446145058, "learning_rate": 1.6270140353787167e-06, "loss": 0.0053, "step": 180510 }, { "epoch": 1.524308121002301, "grad_norm": 0.2972598969936371, "learning_rate": 1.62647011610876e-06, "loss": 0.0063, "step": 180520 }, { "epoch": 1.5243925608494648, "grad_norm": 0.9454037547111511, "learning_rate": 1.62592627011118e-06, "loss": 0.0124, "step": 180530 }, { "epoch": 1.5244770006966286, "grad_norm": 0.11874444782733917, "learning_rate": 1.6253824973977888e-06, "loss": 0.0046, "step": 180540 }, { "epoch": 1.5245614405437926, "grad_norm": 0.2831798493862152, "learning_rate": 1.6248387979804003e-06, "loss": 0.0041, "step": 180550 }, { "epoch": 1.5246458803909566, "grad_norm": 0.6722997426986694, "learning_rate": 1.6242951718708193e-06, "loss": 0.0065, "step": 180560 }, { "epoch": 1.5247303202381204, "grad_norm": 0.02051200345158577, "learning_rate": 1.6237516190808573e-06, "loss": 0.0062, "step": 180570 }, { "epoch": 1.5248147600852842, "grad_norm": 0.09986267983913422, "learning_rate": 1.6232081396223165e-06, "loss": 0.0038, "step": 180580 }, { "epoch": 1.5248991999324482, "grad_norm": 0.11719875782728195, "learning_rate": 1.6226647335070045e-06, "loss": 0.0041, "step": 180590 }, { "epoch": 1.5249836397796122, "grad_norm": 0.0348307304084301, "learning_rate": 1.6221214007467229e-06, "loss": 0.0066, "step": 180600 }, { "epoch": 1.525068079626776, "grad_norm": 0.33829501271247864, "learning_rate": 1.6215781413532717e-06, "loss": 0.0167, "step": 180610 }, { "epoch": 1.5251525194739397, "grad_norm": 0.3004664480686188, "learning_rate": 1.6210349553384497e-06, "loss": 0.0074, "step": 180620 }, { "epoch": 1.5252369593211035, "grad_norm": 0.748430609703064, "learning_rate": 1.6204918427140575e-06, "loss": 0.0084, "step": 180630 }, { "epoch": 1.5253213991682675, "grad_norm": 0.2826945185661316, "learning_rate": 1.6199488034918899e-06, "loss": 0.0038, "step": 180640 }, { "epoch": 1.5254058390154315, "grad_norm": 0.08425314724445343, "learning_rate": 1.6194058376837402e-06, "loss": 0.0037, "step": 180650 }, { "epoch": 1.5254902788625953, "grad_norm": 0.3960583209991455, "learning_rate": 1.6188629453014044e-06, "loss": 0.005, "step": 180660 }, { "epoch": 1.525574718709759, "grad_norm": 0.31874004006385803, "learning_rate": 1.6183201263566729e-06, "loss": 0.0074, "step": 180670 }, { "epoch": 1.525659158556923, "grad_norm": 0.21077895164489746, "learning_rate": 1.617777380861335e-06, "loss": 0.0047, "step": 180680 }, { "epoch": 1.525743598404087, "grad_norm": 0.30820074677467346, "learning_rate": 1.6172347088271777e-06, "loss": 0.0059, "step": 180690 }, { "epoch": 1.5258280382512508, "grad_norm": 0.04147082567214966, "learning_rate": 1.616692110265991e-06, "loss": 0.0053, "step": 180700 }, { "epoch": 1.5259124780984146, "grad_norm": 0.1519881933927536, "learning_rate": 1.6161495851895565e-06, "loss": 0.0061, "step": 180710 }, { "epoch": 1.5259969179455786, "grad_norm": 0.13885752856731415, "learning_rate": 1.6156071336096619e-06, "loss": 0.0101, "step": 180720 }, { "epoch": 1.5260813577927423, "grad_norm": 0.25564321875572205, "learning_rate": 1.6150647555380844e-06, "loss": 0.0076, "step": 180730 }, { "epoch": 1.5261657976399063, "grad_norm": 0.10442036390304565, "learning_rate": 1.6145224509866086e-06, "loss": 0.0072, "step": 180740 }, { "epoch": 1.5262502374870701, "grad_norm": 0.44799429178237915, "learning_rate": 1.6139802199670118e-06, "loss": 0.0111, "step": 180750 }, { "epoch": 1.526334677334234, "grad_norm": 0.2592011094093323, "learning_rate": 1.6134380624910706e-06, "loss": 0.0036, "step": 180760 }, { "epoch": 1.5264191171813979, "grad_norm": 0.24479974806308746, "learning_rate": 1.6128959785705606e-06, "loss": 0.0091, "step": 180770 }, { "epoch": 1.5265035570285619, "grad_norm": 0.026348968967795372, "learning_rate": 1.612353968217254e-06, "loss": 0.0049, "step": 180780 }, { "epoch": 1.5265879968757257, "grad_norm": 0.017397506162524223, "learning_rate": 1.611812031442927e-06, "loss": 0.005, "step": 180790 }, { "epoch": 1.5266724367228894, "grad_norm": 0.1326274573802948, "learning_rate": 1.6112701682593468e-06, "loss": 0.0158, "step": 180800 }, { "epoch": 1.5267568765700534, "grad_norm": 0.36447155475616455, "learning_rate": 1.6107283786782857e-06, "loss": 0.0115, "step": 180810 }, { "epoch": 1.5268413164172174, "grad_norm": 0.1700226068496704, "learning_rate": 1.6101866627115076e-06, "loss": 0.0068, "step": 180820 }, { "epoch": 1.5269257562643812, "grad_norm": 0.01627267524600029, "learning_rate": 1.6096450203707826e-06, "loss": 0.0045, "step": 180830 }, { "epoch": 1.527010196111545, "grad_norm": 0.2569153904914856, "learning_rate": 1.6091034516678733e-06, "loss": 0.0046, "step": 180840 }, { "epoch": 1.5270946359587088, "grad_norm": 0.17743755877017975, "learning_rate": 1.6085619566145417e-06, "loss": 0.0048, "step": 180850 }, { "epoch": 1.5271790758058728, "grad_norm": 0.0887247696518898, "learning_rate": 1.6080205352225481e-06, "loss": 0.016, "step": 180860 }, { "epoch": 1.5272635156530368, "grad_norm": 0.24022969603538513, "learning_rate": 1.6074791875036543e-06, "loss": 0.0066, "step": 180870 }, { "epoch": 1.5273479555002005, "grad_norm": 0.17710088193416595, "learning_rate": 1.6069379134696184e-06, "loss": 0.0054, "step": 180880 }, { "epoch": 1.5274323953473643, "grad_norm": 0.2300417423248291, "learning_rate": 1.6063967131321933e-06, "loss": 0.0052, "step": 180890 }, { "epoch": 1.5275168351945283, "grad_norm": 0.0296848826110363, "learning_rate": 1.6058555865031384e-06, "loss": 0.0051, "step": 180900 }, { "epoch": 1.5276012750416923, "grad_norm": 0.05720755085349083, "learning_rate": 1.6053145335942023e-06, "loss": 0.0086, "step": 180910 }, { "epoch": 1.527685714888856, "grad_norm": 0.10886809229850769, "learning_rate": 1.6047735544171428e-06, "loss": 0.0082, "step": 180920 }, { "epoch": 1.5277701547360198, "grad_norm": 0.6308034062385559, "learning_rate": 1.6042326489837024e-06, "loss": 0.0068, "step": 180930 }, { "epoch": 1.5278545945831838, "grad_norm": 0.33450278639793396, "learning_rate": 1.6036918173056343e-06, "loss": 0.0039, "step": 180940 }, { "epoch": 1.5279390344303478, "grad_norm": 0.2798093557357788, "learning_rate": 1.603151059394683e-06, "loss": 0.0049, "step": 180950 }, { "epoch": 1.5280234742775116, "grad_norm": 0.2752441167831421, "learning_rate": 1.602610375262596e-06, "loss": 0.0042, "step": 180960 }, { "epoch": 1.5281079141246754, "grad_norm": 0.337896466255188, "learning_rate": 1.6020697649211137e-06, "loss": 0.0055, "step": 180970 }, { "epoch": 1.5281923539718392, "grad_norm": 0.2811625301837921, "learning_rate": 1.6015292283819818e-06, "loss": 0.0039, "step": 180980 }, { "epoch": 1.5282767938190032, "grad_norm": 0.3734920620918274, "learning_rate": 1.6009887656569378e-06, "loss": 0.0119, "step": 180990 }, { "epoch": 1.5283612336661672, "grad_norm": 0.35011744499206543, "learning_rate": 1.6004483767577223e-06, "loss": 0.0034, "step": 181000 }, { "epoch": 1.528445673513331, "grad_norm": 0.17423240840435028, "learning_rate": 1.5999080616960705e-06, "loss": 0.0047, "step": 181010 }, { "epoch": 1.5285301133604947, "grad_norm": 0.5947525501251221, "learning_rate": 1.5993678204837177e-06, "loss": 0.0067, "step": 181020 }, { "epoch": 1.5286145532076587, "grad_norm": 0.5091376900672913, "learning_rate": 1.5988276531324009e-06, "loss": 0.005, "step": 181030 }, { "epoch": 1.5286989930548227, "grad_norm": 0.28608205914497375, "learning_rate": 1.5982875596538483e-06, "loss": 0.0129, "step": 181040 }, { "epoch": 1.5287834329019865, "grad_norm": 0.3191129267215729, "learning_rate": 1.5977475400597946e-06, "loss": 0.0078, "step": 181050 }, { "epoch": 1.5288678727491503, "grad_norm": 0.22697527706623077, "learning_rate": 1.5972075943619657e-06, "loss": 0.0059, "step": 181060 }, { "epoch": 1.5289523125963143, "grad_norm": 0.5001623034477234, "learning_rate": 1.5966677225720923e-06, "loss": 0.006, "step": 181070 }, { "epoch": 1.529036752443478, "grad_norm": 0.007713539991527796, "learning_rate": 1.5961279247018984e-06, "loss": 0.0064, "step": 181080 }, { "epoch": 1.529121192290642, "grad_norm": 0.9896690845489502, "learning_rate": 1.5955882007631085e-06, "loss": 0.0142, "step": 181090 }, { "epoch": 1.5292056321378058, "grad_norm": 0.1255379319190979, "learning_rate": 1.5950485507674435e-06, "loss": 0.0043, "step": 181100 }, { "epoch": 1.5292900719849696, "grad_norm": 0.32436603307724, "learning_rate": 1.594508974726628e-06, "loss": 0.0054, "step": 181110 }, { "epoch": 1.5293745118321336, "grad_norm": 0.5632831454277039, "learning_rate": 1.5939694726523796e-06, "loss": 0.0051, "step": 181120 }, { "epoch": 1.5294589516792976, "grad_norm": 0.07239369302988052, "learning_rate": 1.5934300445564145e-06, "loss": 0.005, "step": 181130 }, { "epoch": 1.5295433915264613, "grad_norm": 0.32274192571640015, "learning_rate": 1.5928906904504527e-06, "loss": 0.0043, "step": 181140 }, { "epoch": 1.5296278313736251, "grad_norm": 0.28417184948921204, "learning_rate": 1.5923514103462052e-06, "loss": 0.0061, "step": 181150 }, { "epoch": 1.5297122712207891, "grad_norm": 0.04883464053273201, "learning_rate": 1.5918122042553884e-06, "loss": 0.0073, "step": 181160 }, { "epoch": 1.5297967110679531, "grad_norm": 0.3798840045928955, "learning_rate": 1.591273072189712e-06, "loss": 0.0065, "step": 181170 }, { "epoch": 1.529881150915117, "grad_norm": 0.197473406791687, "learning_rate": 1.5907340141608863e-06, "loss": 0.0045, "step": 181180 }, { "epoch": 1.5299655907622807, "grad_norm": 0.07799477875232697, "learning_rate": 1.5901950301806173e-06, "loss": 0.0043, "step": 181190 }, { "epoch": 1.5300500306094444, "grad_norm": 0.05406039208173752, "learning_rate": 1.5896561202606147e-06, "loss": 0.0035, "step": 181200 }, { "epoch": 1.5301344704566084, "grad_norm": 0.42562925815582275, "learning_rate": 1.5891172844125813e-06, "loss": 0.006, "step": 181210 }, { "epoch": 1.5302189103037724, "grad_norm": 0.34089821577072144, "learning_rate": 1.588578522648223e-06, "loss": 0.0071, "step": 181220 }, { "epoch": 1.5303033501509362, "grad_norm": 0.2014852911233902, "learning_rate": 1.5880398349792403e-06, "loss": 0.0074, "step": 181230 }, { "epoch": 1.5303877899981, "grad_norm": 0.131217822432518, "learning_rate": 1.587501221417333e-06, "loss": 0.0066, "step": 181240 }, { "epoch": 1.530472229845264, "grad_norm": 0.04688268154859543, "learning_rate": 1.5869626819742002e-06, "loss": 0.0048, "step": 181250 }, { "epoch": 1.530556669692428, "grad_norm": 0.05344127118587494, "learning_rate": 1.5864242166615363e-06, "loss": 0.0045, "step": 181260 }, { "epoch": 1.5306411095395918, "grad_norm": 0.2110603302717209, "learning_rate": 1.585885825491041e-06, "loss": 0.0082, "step": 181270 }, { "epoch": 1.5307255493867555, "grad_norm": 0.25027111172676086, "learning_rate": 1.5853475084744042e-06, "loss": 0.0065, "step": 181280 }, { "epoch": 1.5308099892339195, "grad_norm": 0.544302761554718, "learning_rate": 1.584809265623321e-06, "loss": 0.0086, "step": 181290 }, { "epoch": 1.5308944290810833, "grad_norm": 0.11388113349676132, "learning_rate": 1.5842710969494795e-06, "loss": 0.0123, "step": 181300 }, { "epoch": 1.5309788689282473, "grad_norm": 0.1634352058172226, "learning_rate": 1.583733002464572e-06, "loss": 0.0073, "step": 181310 }, { "epoch": 1.531063308775411, "grad_norm": 0.14619354903697968, "learning_rate": 1.5831949821802829e-06, "loss": 0.007, "step": 181320 }, { "epoch": 1.5311477486225749, "grad_norm": 0.08780988305807114, "learning_rate": 1.5826570361082988e-06, "loss": 0.0081, "step": 181330 }, { "epoch": 1.5312321884697389, "grad_norm": 0.266803115606308, "learning_rate": 1.5821191642603035e-06, "loss": 0.006, "step": 181340 }, { "epoch": 1.5313166283169029, "grad_norm": 0.31926003098487854, "learning_rate": 1.5815813666479779e-06, "loss": 0.0098, "step": 181350 }, { "epoch": 1.5314010681640666, "grad_norm": 0.7437564730644226, "learning_rate": 1.5810436432830062e-06, "loss": 0.0098, "step": 181360 }, { "epoch": 1.5314855080112304, "grad_norm": 0.11628583818674088, "learning_rate": 1.5805059941770645e-06, "loss": 0.0065, "step": 181370 }, { "epoch": 1.5315699478583944, "grad_norm": 0.19015589356422424, "learning_rate": 1.5799684193418335e-06, "loss": 0.0067, "step": 181380 }, { "epoch": 1.5316543877055584, "grad_norm": 0.5066962242126465, "learning_rate": 1.5794309187889856e-06, "loss": 0.012, "step": 181390 }, { "epoch": 1.5317388275527222, "grad_norm": 0.13251622021198273, "learning_rate": 1.5788934925301991e-06, "loss": 0.0097, "step": 181400 }, { "epoch": 1.531823267399886, "grad_norm": 0.18732190132141113, "learning_rate": 1.5783561405771453e-06, "loss": 0.0053, "step": 181410 }, { "epoch": 1.5319077072470497, "grad_norm": 0.3282115161418915, "learning_rate": 1.5778188629414943e-06, "loss": 0.0069, "step": 181420 }, { "epoch": 1.5319921470942137, "grad_norm": 0.3357868492603302, "learning_rate": 1.577281659634915e-06, "loss": 0.0054, "step": 181430 }, { "epoch": 1.5320765869413777, "grad_norm": 0.28468772768974304, "learning_rate": 1.5767445306690781e-06, "loss": 0.0061, "step": 181440 }, { "epoch": 1.5321610267885415, "grad_norm": 0.12481248378753662, "learning_rate": 1.5762074760556474e-06, "loss": 0.0067, "step": 181450 }, { "epoch": 1.5322454666357053, "grad_norm": 0.11901991069316864, "learning_rate": 1.5756704958062896e-06, "loss": 0.0105, "step": 181460 }, { "epoch": 1.5323299064828693, "grad_norm": 0.35857921838760376, "learning_rate": 1.5751335899326675e-06, "loss": 0.0051, "step": 181470 }, { "epoch": 1.5324143463300333, "grad_norm": 1.0624727010726929, "learning_rate": 1.57459675844644e-06, "loss": 0.0144, "step": 181480 }, { "epoch": 1.532498786177197, "grad_norm": 0.11194229125976562, "learning_rate": 1.574060001359271e-06, "loss": 0.0075, "step": 181490 }, { "epoch": 1.5325832260243608, "grad_norm": 0.23667843639850616, "learning_rate": 1.5735233186828163e-06, "loss": 0.0076, "step": 181500 }, { "epoch": 1.5326676658715248, "grad_norm": 0.17762938141822815, "learning_rate": 1.5729867104287332e-06, "loss": 0.0046, "step": 181510 }, { "epoch": 1.5327521057186888, "grad_norm": 0.23187080025672913, "learning_rate": 1.5724501766086752e-06, "loss": 0.004, "step": 181520 }, { "epoch": 1.5328365455658526, "grad_norm": 0.34487470984458923, "learning_rate": 1.571913717234298e-06, "loss": 0.0067, "step": 181530 }, { "epoch": 1.5329209854130164, "grad_norm": 0.2746559679508209, "learning_rate": 1.571377332317251e-06, "loss": 0.0027, "step": 181540 }, { "epoch": 1.5330054252601801, "grad_norm": 0.03374538570642471, "learning_rate": 1.5708410218691878e-06, "loss": 0.0034, "step": 181550 }, { "epoch": 1.5330898651073441, "grad_norm": 0.48191431164741516, "learning_rate": 1.570304785901754e-06, "loss": 0.0086, "step": 181560 }, { "epoch": 1.5331743049545081, "grad_norm": 0.1998276263475418, "learning_rate": 1.569768624426598e-06, "loss": 0.0046, "step": 181570 }, { "epoch": 1.533258744801672, "grad_norm": 0.4578544795513153, "learning_rate": 1.5692325374553646e-06, "loss": 0.0046, "step": 181580 }, { "epoch": 1.5333431846488357, "grad_norm": 0.21654023230075836, "learning_rate": 1.5686965249996954e-06, "loss": 0.0075, "step": 181590 }, { "epoch": 1.5334276244959997, "grad_norm": 0.2594819664955139, "learning_rate": 1.5681605870712369e-06, "loss": 0.0057, "step": 181600 }, { "epoch": 1.5335120643431637, "grad_norm": 0.24205970764160156, "learning_rate": 1.567624723681625e-06, "loss": 0.005, "step": 181610 }, { "epoch": 1.5335965041903274, "grad_norm": 0.12149693071842194, "learning_rate": 1.5670889348425029e-06, "loss": 0.0078, "step": 181620 }, { "epoch": 1.5336809440374912, "grad_norm": 1.734309434890747, "learning_rate": 1.5665532205655032e-06, "loss": 0.0157, "step": 181630 }, { "epoch": 1.5337653838846552, "grad_norm": 0.30916693806648254, "learning_rate": 1.5660175808622664e-06, "loss": 0.0049, "step": 181640 }, { "epoch": 1.533849823731819, "grad_norm": 0.8423190712928772, "learning_rate": 1.5654820157444233e-06, "loss": 0.0032, "step": 181650 }, { "epoch": 1.533934263578983, "grad_norm": 0.41615352034568787, "learning_rate": 1.5649465252236073e-06, "loss": 0.006, "step": 181660 }, { "epoch": 1.5340187034261468, "grad_norm": 0.20530255138874054, "learning_rate": 1.5644111093114478e-06, "loss": 0.0077, "step": 181670 }, { "epoch": 1.5341031432733105, "grad_norm": 0.9097073674201965, "learning_rate": 1.563875768019576e-06, "loss": 0.0138, "step": 181680 }, { "epoch": 1.5341875831204745, "grad_norm": 0.09272824227809906, "learning_rate": 1.563340501359617e-06, "loss": 0.0056, "step": 181690 }, { "epoch": 1.5342720229676385, "grad_norm": 0.13292020559310913, "learning_rate": 1.5628053093431993e-06, "loss": 0.0031, "step": 181700 }, { "epoch": 1.5343564628148023, "grad_norm": 0.13098233938217163, "learning_rate": 1.5622701919819467e-06, "loss": 0.0086, "step": 181710 }, { "epoch": 1.534440902661966, "grad_norm": 0.21693986654281616, "learning_rate": 1.5617351492874789e-06, "loss": 0.0043, "step": 181720 }, { "epoch": 1.53452534250913, "grad_norm": 0.12921862304210663, "learning_rate": 1.5612001812714206e-06, "loss": 0.0107, "step": 181730 }, { "epoch": 1.534609782356294, "grad_norm": 0.05129246786236763, "learning_rate": 1.5606652879453903e-06, "loss": 0.0069, "step": 181740 }, { "epoch": 1.5346942222034579, "grad_norm": 0.8393809199333191, "learning_rate": 1.5601304693210045e-06, "loss": 0.0076, "step": 181750 }, { "epoch": 1.5347786620506216, "grad_norm": 0.43301257491111755, "learning_rate": 1.5595957254098787e-06, "loss": 0.0041, "step": 181760 }, { "epoch": 1.5348631018977854, "grad_norm": 0.4527101516723633, "learning_rate": 1.5590610562236303e-06, "loss": 0.0044, "step": 181770 }, { "epoch": 1.5349475417449494, "grad_norm": 0.30091729760169983, "learning_rate": 1.5585264617738694e-06, "loss": 0.0059, "step": 181780 }, { "epoch": 1.5350319815921134, "grad_norm": 0.0028909663669764996, "learning_rate": 1.5579919420722095e-06, "loss": 0.0063, "step": 181790 }, { "epoch": 1.5351164214392772, "grad_norm": 0.2050795555114746, "learning_rate": 1.557457497130258e-06, "loss": 0.0091, "step": 181800 }, { "epoch": 1.535200861286441, "grad_norm": 0.07866008579730988, "learning_rate": 1.5569231269596258e-06, "loss": 0.0064, "step": 181810 }, { "epoch": 1.535285301133605, "grad_norm": 0.1741754114627838, "learning_rate": 1.5563888315719195e-06, "loss": 0.0055, "step": 181820 }, { "epoch": 1.535369740980769, "grad_norm": 0.17533858120441437, "learning_rate": 1.5558546109787386e-06, "loss": 0.0065, "step": 181830 }, { "epoch": 1.5354541808279327, "grad_norm": 0.22850769758224487, "learning_rate": 1.5553204651916914e-06, "loss": 0.0069, "step": 181840 }, { "epoch": 1.5355386206750965, "grad_norm": 0.3690071403980255, "learning_rate": 1.5547863942223761e-06, "loss": 0.006, "step": 181850 }, { "epoch": 1.5356230605222605, "grad_norm": 0.5272737741470337, "learning_rate": 1.5542523980823964e-06, "loss": 0.0053, "step": 181860 }, { "epoch": 1.5357075003694245, "grad_norm": 0.14672307670116425, "learning_rate": 1.5537184767833468e-06, "loss": 0.0037, "step": 181870 }, { "epoch": 1.5357919402165883, "grad_norm": 0.34312736988067627, "learning_rate": 1.553184630336827e-06, "loss": 0.0048, "step": 181880 }, { "epoch": 1.535876380063752, "grad_norm": 0.0956125557422638, "learning_rate": 1.5526508587544309e-06, "loss": 0.0085, "step": 181890 }, { "epoch": 1.5359608199109158, "grad_norm": 0.13410580158233643, "learning_rate": 1.5521171620477516e-06, "loss": 0.0038, "step": 181900 }, { "epoch": 1.5360452597580798, "grad_norm": 0.3745381832122803, "learning_rate": 1.5515835402283797e-06, "loss": 0.0032, "step": 181910 }, { "epoch": 1.5361296996052438, "grad_norm": 0.2770187556743622, "learning_rate": 1.5510499933079075e-06, "loss": 0.0063, "step": 181920 }, { "epoch": 1.5362141394524076, "grad_norm": 0.2417888045310974, "learning_rate": 1.5505165212979217e-06, "loss": 0.0079, "step": 181930 }, { "epoch": 1.5362985792995714, "grad_norm": 0.2266893833875656, "learning_rate": 1.5499831242100123e-06, "loss": 0.0056, "step": 181940 }, { "epoch": 1.5363830191467354, "grad_norm": 0.030243368819355965, "learning_rate": 1.5494498020557614e-06, "loss": 0.006, "step": 181950 }, { "epoch": 1.5364674589938994, "grad_norm": 0.18121473491191864, "learning_rate": 1.5489165548467528e-06, "loss": 0.004, "step": 181960 }, { "epoch": 1.5365518988410631, "grad_norm": 0.5775722861289978, "learning_rate": 1.5483833825945705e-06, "loss": 0.0067, "step": 181970 }, { "epoch": 1.536636338688227, "grad_norm": 0.0733988955616951, "learning_rate": 1.547850285310794e-06, "loss": 0.0088, "step": 181980 }, { "epoch": 1.5367207785353907, "grad_norm": 1.5763813257217407, "learning_rate": 1.547317263007002e-06, "loss": 0.0151, "step": 181990 }, { "epoch": 1.5368052183825547, "grad_norm": 0.10907049477100372, "learning_rate": 1.5467843156947704e-06, "loss": 0.0047, "step": 182000 }, { "epoch": 1.5368896582297187, "grad_norm": 0.2721031606197357, "learning_rate": 1.5462514433856768e-06, "loss": 0.0043, "step": 182010 }, { "epoch": 1.5369740980768825, "grad_norm": 0.30177098512649536, "learning_rate": 1.5457186460912925e-06, "loss": 0.0072, "step": 182020 }, { "epoch": 1.5370585379240462, "grad_norm": 0.12036392837762833, "learning_rate": 1.5451859238231937e-06, "loss": 0.0046, "step": 182030 }, { "epoch": 1.5371429777712102, "grad_norm": 0.22166572511196136, "learning_rate": 1.544653276592946e-06, "loss": 0.0102, "step": 182040 }, { "epoch": 1.5372274176183742, "grad_norm": 0.2805483043193817, "learning_rate": 1.5441207044121238e-06, "loss": 0.0075, "step": 182050 }, { "epoch": 1.537311857465538, "grad_norm": 0.17162984609603882, "learning_rate": 1.5435882072922908e-06, "loss": 0.0062, "step": 182060 }, { "epoch": 1.5373962973127018, "grad_norm": 0.5620067715644836, "learning_rate": 1.543055785245014e-06, "loss": 0.0156, "step": 182070 }, { "epoch": 1.5374807371598658, "grad_norm": 0.17872437834739685, "learning_rate": 1.5425234382818572e-06, "loss": 0.0109, "step": 182080 }, { "epoch": 1.5375651770070298, "grad_norm": 0.25885075330734253, "learning_rate": 1.5419911664143812e-06, "loss": 0.0116, "step": 182090 }, { "epoch": 1.5376496168541935, "grad_norm": 0.010009351186454296, "learning_rate": 1.5414589696541499e-06, "loss": 0.0072, "step": 182100 }, { "epoch": 1.5377340567013573, "grad_norm": 0.3563211262226105, "learning_rate": 1.5409268480127194e-06, "loss": 0.004, "step": 182110 }, { "epoch": 1.537818496548521, "grad_norm": 0.12378837168216705, "learning_rate": 1.5403948015016506e-06, "loss": 0.0043, "step": 182120 }, { "epoch": 1.537902936395685, "grad_norm": 0.3612392842769623, "learning_rate": 1.539862830132498e-06, "loss": 0.0099, "step": 182130 }, { "epoch": 1.537987376242849, "grad_norm": 0.23559007048606873, "learning_rate": 1.5393309339168154e-06, "loss": 0.003, "step": 182140 }, { "epoch": 1.5380718160900129, "grad_norm": 0.6154828071594238, "learning_rate": 1.5387991128661544e-06, "loss": 0.0085, "step": 182150 }, { "epoch": 1.5381562559371766, "grad_norm": 0.31050002574920654, "learning_rate": 1.5382673669920689e-06, "loss": 0.0071, "step": 182160 }, { "epoch": 1.5382406957843406, "grad_norm": 0.08870343118906021, "learning_rate": 1.537735696306107e-06, "loss": 0.0097, "step": 182170 }, { "epoch": 1.5383251356315046, "grad_norm": 0.49270913004875183, "learning_rate": 1.537204100819814e-06, "loss": 0.0048, "step": 182180 }, { "epoch": 1.5384095754786684, "grad_norm": 0.13187582790851593, "learning_rate": 1.5366725805447408e-06, "loss": 0.0113, "step": 182190 }, { "epoch": 1.5384940153258322, "grad_norm": 0.7108381390571594, "learning_rate": 1.5361411354924277e-06, "loss": 0.0077, "step": 182200 }, { "epoch": 1.5385784551729962, "grad_norm": 0.10785526037216187, "learning_rate": 1.5356097656744207e-06, "loss": 0.012, "step": 182210 }, { "epoch": 1.53866289502016, "grad_norm": 0.17275820672512054, "learning_rate": 1.5350784711022592e-06, "loss": 0.0051, "step": 182220 }, { "epoch": 1.538747334867324, "grad_norm": 0.12700217962265015, "learning_rate": 1.5345472517874839e-06, "loss": 0.005, "step": 182230 }, { "epoch": 1.5388317747144877, "grad_norm": 0.009587367996573448, "learning_rate": 1.5340161077416304e-06, "loss": 0.0078, "step": 182240 }, { "epoch": 1.5389162145616515, "grad_norm": 0.001183592714369297, "learning_rate": 1.5334850389762385e-06, "loss": 0.0047, "step": 182250 }, { "epoch": 1.5390006544088155, "grad_norm": 0.2017734944820404, "learning_rate": 1.5329540455028397e-06, "loss": 0.0098, "step": 182260 }, { "epoch": 1.5390850942559795, "grad_norm": 0.12072140723466873, "learning_rate": 1.5324231273329698e-06, "loss": 0.0045, "step": 182270 }, { "epoch": 1.5391695341031433, "grad_norm": 0.28708505630493164, "learning_rate": 1.5318922844781576e-06, "loss": 0.0088, "step": 182280 }, { "epoch": 1.539253973950307, "grad_norm": 0.49442043900489807, "learning_rate": 1.5313615169499358e-06, "loss": 0.0077, "step": 182290 }, { "epoch": 1.539338413797471, "grad_norm": 0.1997571438550949, "learning_rate": 1.5308308247598307e-06, "loss": 0.0112, "step": 182300 }, { "epoch": 1.539422853644635, "grad_norm": 0.2113378942012787, "learning_rate": 1.5303002079193702e-06, "loss": 0.0052, "step": 182310 }, { "epoch": 1.5395072934917988, "grad_norm": 0.3885255753993988, "learning_rate": 1.5297696664400775e-06, "loss": 0.0067, "step": 182320 }, { "epoch": 1.5395917333389626, "grad_norm": 0.3034818172454834, "learning_rate": 1.529239200333475e-06, "loss": 0.006, "step": 182330 }, { "epoch": 1.5396761731861264, "grad_norm": 0.6324769854545593, "learning_rate": 1.5287088096110874e-06, "loss": 0.0112, "step": 182340 }, { "epoch": 1.5397606130332904, "grad_norm": 0.06230868771672249, "learning_rate": 1.5281784942844313e-06, "loss": 0.0066, "step": 182350 }, { "epoch": 1.5398450528804544, "grad_norm": 0.1496696174144745, "learning_rate": 1.527648254365029e-06, "loss": 0.0045, "step": 182360 }, { "epoch": 1.5399294927276181, "grad_norm": 0.3337460458278656, "learning_rate": 1.5271180898643927e-06, "loss": 0.0068, "step": 182370 }, { "epoch": 1.540013932574782, "grad_norm": 0.04086686298251152, "learning_rate": 1.526588000794042e-06, "loss": 0.0049, "step": 182380 }, { "epoch": 1.540098372421946, "grad_norm": 0.4911125898361206, "learning_rate": 1.5260579871654885e-06, "loss": 0.0078, "step": 182390 }, { "epoch": 1.54018281226911, "grad_norm": 0.16413439810276031, "learning_rate": 1.525528048990243e-06, "loss": 0.002, "step": 182400 }, { "epoch": 1.5402672521162737, "grad_norm": 0.12931528687477112, "learning_rate": 1.5249981862798163e-06, "loss": 0.007, "step": 182410 }, { "epoch": 1.5403516919634375, "grad_norm": 0.03193406015634537, "learning_rate": 1.5244683990457154e-06, "loss": 0.007, "step": 182420 }, { "epoch": 1.5404361318106015, "grad_norm": 0.025694677606225014, "learning_rate": 1.5239386872994505e-06, "loss": 0.0085, "step": 182430 }, { "epoch": 1.5405205716577655, "grad_norm": 0.09980913251638412, "learning_rate": 1.5234090510525235e-06, "loss": 0.0113, "step": 182440 }, { "epoch": 1.5406050115049292, "grad_norm": 0.1516667604446411, "learning_rate": 1.5228794903164407e-06, "loss": 0.0166, "step": 182450 }, { "epoch": 1.540689451352093, "grad_norm": 0.1616479456424713, "learning_rate": 1.522350005102703e-06, "loss": 0.0088, "step": 182460 }, { "epoch": 1.5407738911992568, "grad_norm": 0.17630545794963837, "learning_rate": 1.5218205954228105e-06, "loss": 0.0076, "step": 182470 }, { "epoch": 1.5408583310464208, "grad_norm": 0.15363992750644684, "learning_rate": 1.5212912612882602e-06, "loss": 0.0051, "step": 182480 }, { "epoch": 1.5409427708935848, "grad_norm": 0.27118831872940063, "learning_rate": 1.520762002710553e-06, "loss": 0.0059, "step": 182490 }, { "epoch": 1.5410272107407486, "grad_norm": 0.05323941260576248, "learning_rate": 1.52023281970118e-06, "loss": 0.0132, "step": 182500 }, { "epoch": 1.5411116505879123, "grad_norm": 0.13651438057422638, "learning_rate": 1.519703712271639e-06, "loss": 0.0042, "step": 182510 }, { "epoch": 1.5411960904350763, "grad_norm": 0.061123400926589966, "learning_rate": 1.5191746804334183e-06, "loss": 0.0102, "step": 182520 }, { "epoch": 1.5412805302822403, "grad_norm": 0.10618561506271362, "learning_rate": 1.5186457241980117e-06, "loss": 0.0055, "step": 182530 }, { "epoch": 1.541364970129404, "grad_norm": 0.4148130416870117, "learning_rate": 1.5181168435769073e-06, "loss": 0.0035, "step": 182540 }, { "epoch": 1.5414494099765679, "grad_norm": 0.3450680673122406, "learning_rate": 1.5175880385815911e-06, "loss": 0.0095, "step": 182550 }, { "epoch": 1.5415338498237319, "grad_norm": 0.11931399255990982, "learning_rate": 1.517059309223549e-06, "loss": 0.0052, "step": 182560 }, { "epoch": 1.5416182896708956, "grad_norm": 0.3084392249584198, "learning_rate": 1.5165306555142633e-06, "loss": 0.0058, "step": 182570 }, { "epoch": 1.5417027295180596, "grad_norm": 0.0738208070397377, "learning_rate": 1.5160020774652195e-06, "loss": 0.0091, "step": 182580 }, { "epoch": 1.5417871693652234, "grad_norm": 0.14298757910728455, "learning_rate": 1.5154735750878957e-06, "loss": 0.0056, "step": 182590 }, { "epoch": 1.5418716092123872, "grad_norm": 0.1462310552597046, "learning_rate": 1.5149451483937727e-06, "loss": 0.0071, "step": 182600 }, { "epoch": 1.5419560490595512, "grad_norm": 0.692438006401062, "learning_rate": 1.5144167973943257e-06, "loss": 0.0117, "step": 182610 }, { "epoch": 1.5420404889067152, "grad_norm": 0.2554200291633606, "learning_rate": 1.513888522101033e-06, "loss": 0.005, "step": 182620 }, { "epoch": 1.542124928753879, "grad_norm": 0.1587078869342804, "learning_rate": 1.5133603225253674e-06, "loss": 0.0072, "step": 182630 }, { "epoch": 1.5422093686010427, "grad_norm": 0.17654122412204742, "learning_rate": 1.5128321986788008e-06, "loss": 0.0038, "step": 182640 }, { "epoch": 1.5422938084482067, "grad_norm": 0.3566851019859314, "learning_rate": 1.5123041505728047e-06, "loss": 0.0053, "step": 182650 }, { "epoch": 1.5423782482953707, "grad_norm": 0.30707859992980957, "learning_rate": 1.5117761782188456e-06, "loss": 0.0077, "step": 182660 }, { "epoch": 1.5424626881425345, "grad_norm": 0.19108153879642487, "learning_rate": 1.5112482816283953e-06, "loss": 0.0031, "step": 182670 }, { "epoch": 1.5425471279896983, "grad_norm": 0.06350579112768173, "learning_rate": 1.5107204608129156e-06, "loss": 0.0112, "step": 182680 }, { "epoch": 1.542631567836862, "grad_norm": 0.1564265936613083, "learning_rate": 1.5101927157838736e-06, "loss": 0.0073, "step": 182690 }, { "epoch": 1.542716007684026, "grad_norm": 0.06985651701688766, "learning_rate": 1.5096650465527296e-06, "loss": 0.0057, "step": 182700 }, { "epoch": 1.54280044753119, "grad_norm": 0.29341450333595276, "learning_rate": 1.5091374531309489e-06, "loss": 0.0034, "step": 182710 }, { "epoch": 1.5428848873783538, "grad_norm": 0.5167660117149353, "learning_rate": 1.5086099355299838e-06, "loss": 0.004, "step": 182720 }, { "epoch": 1.5429693272255176, "grad_norm": 0.13019497692584991, "learning_rate": 1.508082493761297e-06, "loss": 0.0016, "step": 182730 }, { "epoch": 1.5430537670726816, "grad_norm": 0.19669800996780396, "learning_rate": 1.5075551278363414e-06, "loss": 0.0022, "step": 182740 }, { "epoch": 1.5431382069198456, "grad_norm": 0.4352327287197113, "learning_rate": 1.5070278377665741e-06, "loss": 0.0134, "step": 182750 }, { "epoch": 1.5432226467670094, "grad_norm": 0.2607961595058441, "learning_rate": 1.506500623563447e-06, "loss": 0.0028, "step": 182760 }, { "epoch": 1.5433070866141732, "grad_norm": 0.3654850721359253, "learning_rate": 1.5059734852384083e-06, "loss": 0.007, "step": 182770 }, { "epoch": 1.5433915264613371, "grad_norm": 0.5982361435890198, "learning_rate": 1.5054464228029115e-06, "loss": 0.0096, "step": 182780 }, { "epoch": 1.5434759663085011, "grad_norm": 0.0749545767903328, "learning_rate": 1.5049194362684023e-06, "loss": 0.0092, "step": 182790 }, { "epoch": 1.543560406155665, "grad_norm": 0.484649658203125, "learning_rate": 1.5043925256463266e-06, "loss": 0.0052, "step": 182800 }, { "epoch": 1.5436448460028287, "grad_norm": 0.08391869813203812, "learning_rate": 1.503865690948127e-06, "loss": 0.0082, "step": 182810 }, { "epoch": 1.5437292858499925, "grad_norm": 0.0958244651556015, "learning_rate": 1.5033389321852504e-06, "loss": 0.0055, "step": 182820 }, { "epoch": 1.5438137256971565, "grad_norm": 0.11706115305423737, "learning_rate": 1.5028122493691339e-06, "loss": 0.0032, "step": 182830 }, { "epoch": 1.5438981655443205, "grad_norm": 0.09436862915754318, "learning_rate": 1.5022856425112198e-06, "loss": 0.0109, "step": 182840 }, { "epoch": 1.5439826053914842, "grad_norm": 0.05942439287900925, "learning_rate": 1.5017591116229436e-06, "loss": 0.0062, "step": 182850 }, { "epoch": 1.544067045238648, "grad_norm": 0.49460965394973755, "learning_rate": 1.5012326567157443e-06, "loss": 0.0143, "step": 182860 }, { "epoch": 1.544151485085812, "grad_norm": 0.38388344645500183, "learning_rate": 1.5007062778010544e-06, "loss": 0.0064, "step": 182870 }, { "epoch": 1.544235924932976, "grad_norm": 0.4182949364185333, "learning_rate": 1.5001799748903073e-06, "loss": 0.0137, "step": 182880 }, { "epoch": 1.5443203647801398, "grad_norm": 0.17095120251178741, "learning_rate": 1.4996537479949336e-06, "loss": 0.0047, "step": 182890 }, { "epoch": 1.5444048046273036, "grad_norm": 0.05932672321796417, "learning_rate": 1.4991275971263619e-06, "loss": 0.0052, "step": 182900 }, { "epoch": 1.5444892444744673, "grad_norm": 0.0014870514860376716, "learning_rate": 1.4986015222960231e-06, "loss": 0.0068, "step": 182910 }, { "epoch": 1.5445736843216313, "grad_norm": 0.210834801197052, "learning_rate": 1.4980755235153405e-06, "loss": 0.0091, "step": 182920 }, { "epoch": 1.5446581241687953, "grad_norm": 0.16757263243198395, "learning_rate": 1.4975496007957406e-06, "loss": 0.0038, "step": 182930 }, { "epoch": 1.544742564015959, "grad_norm": 0.3033149540424347, "learning_rate": 1.497023754148645e-06, "loss": 0.0057, "step": 182940 }, { "epoch": 1.5448270038631229, "grad_norm": 0.18100646138191223, "learning_rate": 1.4964979835854766e-06, "loss": 0.0041, "step": 182950 }, { "epoch": 1.5449114437102869, "grad_norm": 0.09044187515974045, "learning_rate": 1.4959722891176543e-06, "loss": 0.0047, "step": 182960 }, { "epoch": 1.5449958835574509, "grad_norm": 0.3510759174823761, "learning_rate": 1.4954466707565957e-06, "loss": 0.0082, "step": 182970 }, { "epoch": 1.5450803234046147, "grad_norm": 0.3364013731479645, "learning_rate": 1.4949211285137156e-06, "loss": 0.0036, "step": 182980 }, { "epoch": 1.5451647632517784, "grad_norm": 0.023599233478307724, "learning_rate": 1.4943956624004318e-06, "loss": 0.0048, "step": 182990 }, { "epoch": 1.5452492030989424, "grad_norm": 0.13338787853717804, "learning_rate": 1.4938702724281563e-06, "loss": 0.0071, "step": 183000 }, { "epoch": 1.5452492030989424, "eval_loss": 0.0064361728727817535, "eval_runtime": 2.9243, "eval_samples_per_second": 68.393, "eval_steps_per_second": 34.196, "step": 183000 }, { "epoch": 1.5453336429461064, "grad_norm": 0.009834099560976028, "learning_rate": 1.4933449586082982e-06, "loss": 0.0113, "step": 183010 }, { "epoch": 1.5454180827932702, "grad_norm": 0.37218335270881653, "learning_rate": 1.4928197209522705e-06, "loss": 0.0081, "step": 183020 }, { "epoch": 1.545502522640434, "grad_norm": 0.45669376850128174, "learning_rate": 1.4922945594714799e-06, "loss": 0.009, "step": 183030 }, { "epoch": 1.5455869624875977, "grad_norm": 0.23101258277893066, "learning_rate": 1.4917694741773326e-06, "loss": 0.0092, "step": 183040 }, { "epoch": 1.5456714023347617, "grad_norm": 0.18849056959152222, "learning_rate": 1.4912444650812318e-06, "loss": 0.004, "step": 183050 }, { "epoch": 1.5457558421819257, "grad_norm": 0.13034558296203613, "learning_rate": 1.490719532194584e-06, "loss": 0.0097, "step": 183060 }, { "epoch": 1.5458402820290895, "grad_norm": 0.3416181206703186, "learning_rate": 1.4901946755287872e-06, "loss": 0.0069, "step": 183070 }, { "epoch": 1.5459247218762533, "grad_norm": 0.7118712067604065, "learning_rate": 1.4896698950952443e-06, "loss": 0.0079, "step": 183080 }, { "epoch": 1.5460091617234173, "grad_norm": 0.14049649238586426, "learning_rate": 1.4891451909053506e-06, "loss": 0.0055, "step": 183090 }, { "epoch": 1.5460936015705813, "grad_norm": 0.2440481334924698, "learning_rate": 1.4886205629705052e-06, "loss": 0.0106, "step": 183100 }, { "epoch": 1.546178041417745, "grad_norm": 0.6740014553070068, "learning_rate": 1.4880960113021015e-06, "loss": 0.0076, "step": 183110 }, { "epoch": 1.5462624812649088, "grad_norm": 0.20434759557247162, "learning_rate": 1.4875715359115327e-06, "loss": 0.0038, "step": 183120 }, { "epoch": 1.5463469211120728, "grad_norm": 0.3243677318096161, "learning_rate": 1.4870471368101908e-06, "loss": 0.0053, "step": 183130 }, { "epoch": 1.5464313609592366, "grad_norm": 0.2535132169723511, "learning_rate": 1.4865228140094633e-06, "loss": 0.0118, "step": 183140 }, { "epoch": 1.5465158008064006, "grad_norm": 0.010929102078080177, "learning_rate": 1.4859985675207421e-06, "loss": 0.0098, "step": 183150 }, { "epoch": 1.5466002406535644, "grad_norm": 0.2328515499830246, "learning_rate": 1.4854743973554108e-06, "loss": 0.0051, "step": 183160 }, { "epoch": 1.5466846805007282, "grad_norm": 0.047529540956020355, "learning_rate": 1.4849503035248563e-06, "loss": 0.0088, "step": 183170 }, { "epoch": 1.5467691203478922, "grad_norm": 0.27670493721961975, "learning_rate": 1.4844262860404596e-06, "loss": 0.0055, "step": 183180 }, { "epoch": 1.5468535601950562, "grad_norm": 0.4534301459789276, "learning_rate": 1.4839023449136052e-06, "loss": 0.0057, "step": 183190 }, { "epoch": 1.54693800004222, "grad_norm": 0.3363713324069977, "learning_rate": 1.483378480155671e-06, "loss": 0.0051, "step": 183200 }, { "epoch": 1.5470224398893837, "grad_norm": 0.06924743950366974, "learning_rate": 1.4828546917780356e-06, "loss": 0.0034, "step": 183210 }, { "epoch": 1.5471068797365477, "grad_norm": 0.11274223774671555, "learning_rate": 1.4823309797920743e-06, "loss": 0.0037, "step": 183220 }, { "epoch": 1.5471913195837117, "grad_norm": 0.4013146460056305, "learning_rate": 1.4818073442091645e-06, "loss": 0.0057, "step": 183230 }, { "epoch": 1.5472757594308755, "grad_norm": 0.04521733894944191, "learning_rate": 1.4812837850406786e-06, "loss": 0.0065, "step": 183240 }, { "epoch": 1.5473601992780393, "grad_norm": 0.1843959391117096, "learning_rate": 1.4807603022979867e-06, "loss": 0.0062, "step": 183250 }, { "epoch": 1.547444639125203, "grad_norm": 0.06363624334335327, "learning_rate": 1.4802368959924606e-06, "loss": 0.0142, "step": 183260 }, { "epoch": 1.547529078972367, "grad_norm": 0.2153386026620865, "learning_rate": 1.4797135661354667e-06, "loss": 0.009, "step": 183270 }, { "epoch": 1.547613518819531, "grad_norm": 0.2189788669347763, "learning_rate": 1.4791903127383738e-06, "loss": 0.0049, "step": 183280 }, { "epoch": 1.5476979586666948, "grad_norm": 0.015937432646751404, "learning_rate": 1.4786671358125465e-06, "loss": 0.0057, "step": 183290 }, { "epoch": 1.5477823985138586, "grad_norm": 0.025913473218679428, "learning_rate": 1.478144035369347e-06, "loss": 0.0046, "step": 183300 }, { "epoch": 1.5478668383610226, "grad_norm": 0.21658611297607422, "learning_rate": 1.477621011420136e-06, "loss": 0.0027, "step": 183310 }, { "epoch": 1.5479512782081866, "grad_norm": 0.6794124245643616, "learning_rate": 1.477098063976276e-06, "loss": 0.0067, "step": 183320 }, { "epoch": 1.5480357180553503, "grad_norm": 0.2779865860939026, "learning_rate": 1.4765751930491233e-06, "loss": 0.0049, "step": 183330 }, { "epoch": 1.5481201579025141, "grad_norm": 0.12787047028541565, "learning_rate": 1.4760523986500364e-06, "loss": 0.0048, "step": 183340 }, { "epoch": 1.5482045977496781, "grad_norm": 0.13399994373321533, "learning_rate": 1.4755296807903696e-06, "loss": 0.0034, "step": 183350 }, { "epoch": 1.548289037596842, "grad_norm": 0.20008307695388794, "learning_rate": 1.4750070394814758e-06, "loss": 0.0035, "step": 183360 }, { "epoch": 1.5483734774440059, "grad_norm": 0.6166975498199463, "learning_rate": 1.4744844747347065e-06, "loss": 0.0073, "step": 183370 }, { "epoch": 1.5484579172911697, "grad_norm": 0.27974602580070496, "learning_rate": 1.4739619865614108e-06, "loss": 0.0103, "step": 183380 }, { "epoch": 1.5485423571383334, "grad_norm": 0.4088631570339203, "learning_rate": 1.4734395749729397e-06, "loss": 0.0109, "step": 183390 }, { "epoch": 1.5486267969854974, "grad_norm": 0.17042435705661774, "learning_rate": 1.4729172399806364e-06, "loss": 0.007, "step": 183400 }, { "epoch": 1.5487112368326614, "grad_norm": 0.32824549078941345, "learning_rate": 1.47239498159585e-06, "loss": 0.0066, "step": 183410 }, { "epoch": 1.5487956766798252, "grad_norm": 0.3886638283729553, "learning_rate": 1.4718727998299199e-06, "loss": 0.0101, "step": 183420 }, { "epoch": 1.548880116526989, "grad_norm": 0.3844357430934906, "learning_rate": 1.4713506946941908e-06, "loss": 0.0043, "step": 183430 }, { "epoch": 1.548964556374153, "grad_norm": 0.2011071890592575, "learning_rate": 1.470828666200002e-06, "loss": 0.0048, "step": 183440 }, { "epoch": 1.549048996221317, "grad_norm": 0.1647409051656723, "learning_rate": 1.4703067143586913e-06, "loss": 0.0025, "step": 183450 }, { "epoch": 1.5491334360684808, "grad_norm": 0.11129502952098846, "learning_rate": 1.469784839181594e-06, "loss": 0.0051, "step": 183460 }, { "epoch": 1.5492178759156445, "grad_norm": 0.35567760467529297, "learning_rate": 1.4692630406800474e-06, "loss": 0.0085, "step": 183470 }, { "epoch": 1.5493023157628085, "grad_norm": 0.07500595599412918, "learning_rate": 1.468741318865385e-06, "loss": 0.0102, "step": 183480 }, { "epoch": 1.5493867556099723, "grad_norm": 0.28306224942207336, "learning_rate": 1.4682196737489352e-06, "loss": 0.007, "step": 183490 }, { "epoch": 1.5494711954571363, "grad_norm": 0.037637531757354736, "learning_rate": 1.4676981053420325e-06, "loss": 0.0056, "step": 183500 }, { "epoch": 1.5495556353043, "grad_norm": 0.2634180784225464, "learning_rate": 1.4671766136560013e-06, "loss": 0.0041, "step": 183510 }, { "epoch": 1.5496400751514638, "grad_norm": 0.4491437077522278, "learning_rate": 1.4666551987021714e-06, "loss": 0.0078, "step": 183520 }, { "epoch": 1.5497245149986278, "grad_norm": 0.14662984013557434, "learning_rate": 1.4661338604918674e-06, "loss": 0.0053, "step": 183530 }, { "epoch": 1.5498089548457918, "grad_norm": 0.3975732624530792, "learning_rate": 1.4656125990364113e-06, "loss": 0.0063, "step": 183540 }, { "epoch": 1.5498933946929556, "grad_norm": 0.5288156867027283, "learning_rate": 1.4650914143471234e-06, "loss": 0.0052, "step": 183550 }, { "epoch": 1.5499778345401194, "grad_norm": 0.05074087157845497, "learning_rate": 1.4645703064353273e-06, "loss": 0.0065, "step": 183560 }, { "epoch": 1.5500622743872834, "grad_norm": 0.13390493392944336, "learning_rate": 1.4640492753123387e-06, "loss": 0.0054, "step": 183570 }, { "epoch": 1.5501467142344474, "grad_norm": 0.5009125471115112, "learning_rate": 1.4635283209894762e-06, "loss": 0.0101, "step": 183580 }, { "epoch": 1.5502311540816112, "grad_norm": 0.07945496588945389, "learning_rate": 1.463007443478054e-06, "loss": 0.008, "step": 183590 }, { "epoch": 1.550315593928775, "grad_norm": 0.2545928657054901, "learning_rate": 1.4624866427893835e-06, "loss": 0.0081, "step": 183600 }, { "epoch": 1.5504000337759387, "grad_norm": 0.15649232268333435, "learning_rate": 1.4619659189347817e-06, "loss": 0.008, "step": 183610 }, { "epoch": 1.5504844736231027, "grad_norm": 0.25756290555000305, "learning_rate": 1.4614452719255524e-06, "loss": 0.0067, "step": 183620 }, { "epoch": 1.5505689134702667, "grad_norm": 0.07432626187801361, "learning_rate": 1.4609247017730078e-06, "loss": 0.0127, "step": 183630 }, { "epoch": 1.5506533533174305, "grad_norm": 0.1359795480966568, "learning_rate": 1.4604042084884518e-06, "loss": 0.004, "step": 183640 }, { "epoch": 1.5507377931645943, "grad_norm": 0.24171452224254608, "learning_rate": 1.4598837920831927e-06, "loss": 0.0112, "step": 183650 }, { "epoch": 1.5508222330117583, "grad_norm": 0.10655530542135239, "learning_rate": 1.4593634525685301e-06, "loss": 0.0046, "step": 183660 }, { "epoch": 1.5509066728589223, "grad_norm": 0.1088305339217186, "learning_rate": 1.4588431899557704e-06, "loss": 0.0043, "step": 183670 }, { "epoch": 1.550991112706086, "grad_norm": 0.7892287969589233, "learning_rate": 1.4583230042562102e-06, "loss": 0.0098, "step": 183680 }, { "epoch": 1.5510755525532498, "grad_norm": 0.09141211956739426, "learning_rate": 1.457802895481149e-06, "loss": 0.0092, "step": 183690 }, { "epoch": 1.5511599924004138, "grad_norm": 0.11831171810626984, "learning_rate": 1.4572828636418811e-06, "loss": 0.009, "step": 183700 }, { "epoch": 1.5512444322475778, "grad_norm": 0.17702464759349823, "learning_rate": 1.4567629087497054e-06, "loss": 0.011, "step": 183710 }, { "epoch": 1.5513288720947416, "grad_norm": 0.025202546268701553, "learning_rate": 1.4562430308159125e-06, "loss": 0.0064, "step": 183720 }, { "epoch": 1.5514133119419053, "grad_norm": 0.3317025601863861, "learning_rate": 1.4557232298517943e-06, "loss": 0.0034, "step": 183730 }, { "epoch": 1.5514977517890691, "grad_norm": 0.7833624482154846, "learning_rate": 1.455203505868642e-06, "loss": 0.0066, "step": 183740 }, { "epoch": 1.5515821916362331, "grad_norm": 0.13969336450099945, "learning_rate": 1.4546838588777418e-06, "loss": 0.006, "step": 183750 }, { "epoch": 1.5516666314833971, "grad_norm": 0.49142277240753174, "learning_rate": 1.4541642888903835e-06, "loss": 0.0069, "step": 183760 }, { "epoch": 1.551751071330561, "grad_norm": 0.049009211361408234, "learning_rate": 1.45364479591785e-06, "loss": 0.0094, "step": 183770 }, { "epoch": 1.5518355111777247, "grad_norm": 0.3039277195930481, "learning_rate": 1.4531253799714246e-06, "loss": 0.0076, "step": 183780 }, { "epoch": 1.5519199510248887, "grad_norm": 0.22455798089504242, "learning_rate": 1.4526060410623883e-06, "loss": 0.0048, "step": 183790 }, { "epoch": 1.5520043908720527, "grad_norm": 0.195778027176857, "learning_rate": 1.4520867792020227e-06, "loss": 0.0053, "step": 183800 }, { "epoch": 1.5520888307192164, "grad_norm": 0.5908423662185669, "learning_rate": 1.451567594401604e-06, "loss": 0.0072, "step": 183810 }, { "epoch": 1.5521732705663802, "grad_norm": 0.2873724102973938, "learning_rate": 1.4510484866724113e-06, "loss": 0.0068, "step": 183820 }, { "epoch": 1.552257710413544, "grad_norm": 0.35568782687187195, "learning_rate": 1.4505294560257183e-06, "loss": 0.0109, "step": 183830 }, { "epoch": 1.552342150260708, "grad_norm": 0.13511601090431213, "learning_rate": 1.4500105024727967e-06, "loss": 0.0074, "step": 183840 }, { "epoch": 1.552426590107872, "grad_norm": 0.19988910853862762, "learning_rate": 1.449491626024921e-06, "loss": 0.0053, "step": 183850 }, { "epoch": 1.5525110299550358, "grad_norm": 0.18751251697540283, "learning_rate": 1.4489728266933601e-06, "loss": 0.0072, "step": 183860 }, { "epoch": 1.5525954698021995, "grad_norm": 0.1284078061580658, "learning_rate": 1.4484541044893813e-06, "loss": 0.0065, "step": 183870 }, { "epoch": 1.5526799096493635, "grad_norm": 0.3398110866546631, "learning_rate": 1.4479354594242495e-06, "loss": 0.0073, "step": 183880 }, { "epoch": 1.5527643494965275, "grad_norm": 0.06126928701996803, "learning_rate": 1.447416891509234e-06, "loss": 0.0041, "step": 183890 }, { "epoch": 1.5528487893436913, "grad_norm": 0.6130473613739014, "learning_rate": 1.4468984007555936e-06, "loss": 0.0115, "step": 183900 }, { "epoch": 1.552933229190855, "grad_norm": 0.5753805041313171, "learning_rate": 1.4463799871745938e-06, "loss": 0.0115, "step": 183910 }, { "epoch": 1.553017669038019, "grad_norm": 0.4819694757461548, "learning_rate": 1.445861650777491e-06, "loss": 0.0087, "step": 183920 }, { "epoch": 1.553102108885183, "grad_norm": 0.6231630444526672, "learning_rate": 1.4453433915755472e-06, "loss": 0.0067, "step": 183930 }, { "epoch": 1.5531865487323468, "grad_norm": 0.27747485041618347, "learning_rate": 1.4448252095800136e-06, "loss": 0.0067, "step": 183940 }, { "epoch": 1.5532709885795106, "grad_norm": 0.22741685807704926, "learning_rate": 1.4443071048021502e-06, "loss": 0.0067, "step": 183950 }, { "epoch": 1.5533554284266744, "grad_norm": 0.16148357093334198, "learning_rate": 1.443789077253207e-06, "loss": 0.0064, "step": 183960 }, { "epoch": 1.5534398682738384, "grad_norm": 0.23823700845241547, "learning_rate": 1.4432711269444355e-06, "loss": 0.0054, "step": 183970 }, { "epoch": 1.5535243081210024, "grad_norm": 0.36853229999542236, "learning_rate": 1.442753253887087e-06, "loss": 0.0075, "step": 183980 }, { "epoch": 1.5536087479681662, "grad_norm": 0.26721394062042236, "learning_rate": 1.4422354580924081e-06, "loss": 0.0062, "step": 183990 }, { "epoch": 1.55369318781533, "grad_norm": 0.19747482240200043, "learning_rate": 1.441717739571647e-06, "loss": 0.0114, "step": 184000 }, { "epoch": 1.553777627662494, "grad_norm": 0.2406824827194214, "learning_rate": 1.441200098336048e-06, "loss": 0.0047, "step": 184010 }, { "epoch": 1.553862067509658, "grad_norm": 0.34797728061676025, "learning_rate": 1.4406825343968527e-06, "loss": 0.0072, "step": 184020 }, { "epoch": 1.5539465073568217, "grad_norm": 0.08488035202026367, "learning_rate": 1.4401650477653029e-06, "loss": 0.0065, "step": 184030 }, { "epoch": 1.5540309472039855, "grad_norm": 0.2524714171886444, "learning_rate": 1.4396476384526398e-06, "loss": 0.0054, "step": 184040 }, { "epoch": 1.5541153870511495, "grad_norm": 0.547472357749939, "learning_rate": 1.439130306470099e-06, "loss": 0.0083, "step": 184050 }, { "epoch": 1.5541998268983133, "grad_norm": 0.11816929280757904, "learning_rate": 1.4386130518289192e-06, "loss": 0.0045, "step": 184060 }, { "epoch": 1.5542842667454773, "grad_norm": 0.17086246609687805, "learning_rate": 1.4380958745403351e-06, "loss": 0.0071, "step": 184070 }, { "epoch": 1.554368706592641, "grad_norm": 0.4665669798851013, "learning_rate": 1.4375787746155762e-06, "loss": 0.0082, "step": 184080 }, { "epoch": 1.5544531464398048, "grad_norm": 0.25945812463760376, "learning_rate": 1.4370617520658786e-06, "loss": 0.0097, "step": 184090 }, { "epoch": 1.5545375862869688, "grad_norm": 0.25833871960639954, "learning_rate": 1.436544806902469e-06, "loss": 0.006, "step": 184100 }, { "epoch": 1.5546220261341328, "grad_norm": 0.08479230105876923, "learning_rate": 1.4360279391365768e-06, "loss": 0.0088, "step": 184110 }, { "epoch": 1.5547064659812966, "grad_norm": 0.05236386880278587, "learning_rate": 1.435511148779425e-06, "loss": 0.0054, "step": 184120 }, { "epoch": 1.5547909058284604, "grad_norm": 0.24416428804397583, "learning_rate": 1.4349944358422424e-06, "loss": 0.0066, "step": 184130 }, { "epoch": 1.5548753456756244, "grad_norm": 0.47172003984451294, "learning_rate": 1.4344778003362491e-06, "loss": 0.0064, "step": 184140 }, { "epoch": 1.5549597855227884, "grad_norm": 0.39355430006980896, "learning_rate": 1.4339612422726683e-06, "loss": 0.006, "step": 184150 }, { "epoch": 1.5550442253699521, "grad_norm": 0.329139769077301, "learning_rate": 1.4334447616627173e-06, "loss": 0.0078, "step": 184160 }, { "epoch": 1.555128665217116, "grad_norm": 0.28894999623298645, "learning_rate": 1.4329283585176168e-06, "loss": 0.01, "step": 184170 }, { "epoch": 1.5552131050642797, "grad_norm": 0.7263752818107605, "learning_rate": 1.4324120328485814e-06, "loss": 0.0119, "step": 184180 }, { "epoch": 1.5552975449114437, "grad_norm": 0.24286195635795593, "learning_rate": 1.4318957846668252e-06, "loss": 0.0049, "step": 184190 }, { "epoch": 1.5553819847586077, "grad_norm": 0.46114838123321533, "learning_rate": 1.4313796139835618e-06, "loss": 0.0114, "step": 184200 }, { "epoch": 1.5554664246057714, "grad_norm": 0.6278848052024841, "learning_rate": 1.4308635208100002e-06, "loss": 0.0106, "step": 184210 }, { "epoch": 1.5555508644529352, "grad_norm": 0.28295427560806274, "learning_rate": 1.4303475051573528e-06, "loss": 0.0036, "step": 184220 }, { "epoch": 1.5556353043000992, "grad_norm": 0.49964842200279236, "learning_rate": 1.429831567036825e-06, "loss": 0.0064, "step": 184230 }, { "epoch": 1.5557197441472632, "grad_norm": 0.17989645898342133, "learning_rate": 1.4293157064596253e-06, "loss": 0.0072, "step": 184240 }, { "epoch": 1.555804183994427, "grad_norm": 0.4433917999267578, "learning_rate": 1.428799923436957e-06, "loss": 0.0063, "step": 184250 }, { "epoch": 1.5558886238415908, "grad_norm": 0.3155333697795868, "learning_rate": 1.4282842179800222e-06, "loss": 0.0058, "step": 184260 }, { "epoch": 1.5559730636887548, "grad_norm": 0.006862396840006113, "learning_rate": 1.427768590100021e-06, "loss": 0.009, "step": 184270 }, { "epoch": 1.5560575035359188, "grad_norm": 0.2865884602069855, "learning_rate": 1.4272530398081553e-06, "loss": 0.0063, "step": 184280 }, { "epoch": 1.5561419433830825, "grad_norm": 0.6683279275894165, "learning_rate": 1.4267375671156203e-06, "loss": 0.0063, "step": 184290 }, { "epoch": 1.5562263832302463, "grad_norm": 0.3040822744369507, "learning_rate": 1.4262221720336138e-06, "loss": 0.0114, "step": 184300 }, { "epoch": 1.55631082307741, "grad_norm": 0.42329373955726624, "learning_rate": 1.4257068545733304e-06, "loss": 0.0085, "step": 184310 }, { "epoch": 1.556395262924574, "grad_norm": 0.16171887516975403, "learning_rate": 1.4251916147459593e-06, "loss": 0.0057, "step": 184320 }, { "epoch": 1.556479702771738, "grad_norm": 0.012658600695431232, "learning_rate": 1.424676452562696e-06, "loss": 0.0061, "step": 184330 }, { "epoch": 1.5565641426189019, "grad_norm": 0.23903252184391022, "learning_rate": 1.4241613680347272e-06, "loss": 0.0036, "step": 184340 }, { "epoch": 1.5566485824660656, "grad_norm": 0.1676783263683319, "learning_rate": 1.423646361173241e-06, "loss": 0.0082, "step": 184350 }, { "epoch": 1.5567330223132296, "grad_norm": 0.19205820560455322, "learning_rate": 1.4231314319894213e-06, "loss": 0.0059, "step": 184360 }, { "epoch": 1.5568174621603936, "grad_norm": 0.20813573896884918, "learning_rate": 1.4226165804944552e-06, "loss": 0.008, "step": 184370 }, { "epoch": 1.5569019020075574, "grad_norm": 0.41240838170051575, "learning_rate": 1.4221018066995223e-06, "loss": 0.0058, "step": 184380 }, { "epoch": 1.5569863418547212, "grad_norm": 0.03523958846926689, "learning_rate": 1.4215871106158068e-06, "loss": 0.0041, "step": 184390 }, { "epoch": 1.557070781701885, "grad_norm": 0.1831567883491516, "learning_rate": 1.4210724922544845e-06, "loss": 0.0098, "step": 184400 }, { "epoch": 1.557155221549049, "grad_norm": 0.2877165973186493, "learning_rate": 1.4205579516267354e-06, "loss": 0.0043, "step": 184410 }, { "epoch": 1.557239661396213, "grad_norm": 1.1992555856704712, "learning_rate": 1.4200434887437347e-06, "loss": 0.0108, "step": 184420 }, { "epoch": 1.5573241012433767, "grad_norm": 0.41242286562919617, "learning_rate": 1.4195291036166554e-06, "loss": 0.0103, "step": 184430 }, { "epoch": 1.5574085410905405, "grad_norm": 0.23042242228984833, "learning_rate": 1.4190147962566702e-06, "loss": 0.0047, "step": 184440 }, { "epoch": 1.5574929809377045, "grad_norm": 0.3746476173400879, "learning_rate": 1.4185005666749485e-06, "loss": 0.0145, "step": 184450 }, { "epoch": 1.5575774207848685, "grad_norm": 0.19810059666633606, "learning_rate": 1.4179864148826622e-06, "loss": 0.0035, "step": 184460 }, { "epoch": 1.5576618606320323, "grad_norm": 0.25611814856529236, "learning_rate": 1.4174723408909751e-06, "loss": 0.0062, "step": 184470 }, { "epoch": 1.557746300479196, "grad_norm": 0.2597793638706207, "learning_rate": 1.4169583447110562e-06, "loss": 0.0048, "step": 184480 }, { "epoch": 1.55783074032636, "grad_norm": 0.28332579135894775, "learning_rate": 1.4164444263540667e-06, "loss": 0.0053, "step": 184490 }, { "epoch": 1.557915180173524, "grad_norm": 0.32842570543289185, "learning_rate": 1.4159305858311711e-06, "loss": 0.0045, "step": 184500 }, { "epoch": 1.5579996200206878, "grad_norm": 0.12701819837093353, "learning_rate": 1.4154168231535287e-06, "loss": 0.0078, "step": 184510 }, { "epoch": 1.5580840598678516, "grad_norm": 0.2694878876209259, "learning_rate": 1.4149031383322982e-06, "loss": 0.0033, "step": 184520 }, { "epoch": 1.5581684997150154, "grad_norm": 0.09654927998781204, "learning_rate": 1.4143895313786354e-06, "loss": 0.0019, "step": 184530 }, { "epoch": 1.5582529395621794, "grad_norm": 0.0913149043917656, "learning_rate": 1.4138760023036991e-06, "loss": 0.0075, "step": 184540 }, { "epoch": 1.5583373794093434, "grad_norm": 0.2230314314365387, "learning_rate": 1.413362551118641e-06, "loss": 0.0065, "step": 184550 }, { "epoch": 1.5584218192565071, "grad_norm": 0.22033488750457764, "learning_rate": 1.412849177834612e-06, "loss": 0.0071, "step": 184560 }, { "epoch": 1.558506259103671, "grad_norm": 0.24937431514263153, "learning_rate": 1.4123358824627647e-06, "loss": 0.005, "step": 184570 }, { "epoch": 1.558590698950835, "grad_norm": 0.07623331993818283, "learning_rate": 1.4118226650142474e-06, "loss": 0.0054, "step": 184580 }, { "epoch": 1.558675138797999, "grad_norm": 0.2416665405035019, "learning_rate": 1.4113095255002063e-06, "loss": 0.0103, "step": 184590 }, { "epoch": 1.5587595786451627, "grad_norm": 0.003622888820245862, "learning_rate": 1.4107964639317857e-06, "loss": 0.0065, "step": 184600 }, { "epoch": 1.5588440184923265, "grad_norm": 0.1695244312286377, "learning_rate": 1.4102834803201315e-06, "loss": 0.0033, "step": 184610 }, { "epoch": 1.5589284583394905, "grad_norm": 0.49607229232788086, "learning_rate": 1.4097705746763834e-06, "loss": 0.0097, "step": 184620 }, { "epoch": 1.5590128981866542, "grad_norm": 0.26464909315109253, "learning_rate": 1.4092577470116837e-06, "loss": 0.0054, "step": 184630 }, { "epoch": 1.5590973380338182, "grad_norm": 0.04436232149600983, "learning_rate": 1.4087449973371687e-06, "loss": 0.0066, "step": 184640 }, { "epoch": 1.559181777880982, "grad_norm": 0.37000998854637146, "learning_rate": 1.4082323256639779e-06, "loss": 0.01, "step": 184650 }, { "epoch": 1.5592662177281458, "grad_norm": 0.07118766009807587, "learning_rate": 1.4077197320032455e-06, "loss": 0.0054, "step": 184660 }, { "epoch": 1.5593506575753098, "grad_norm": 0.147616907954216, "learning_rate": 1.4072072163661037e-06, "loss": 0.0039, "step": 184670 }, { "epoch": 1.5594350974224738, "grad_norm": 0.38757267594337463, "learning_rate": 1.4066947787636852e-06, "loss": 0.0044, "step": 184680 }, { "epoch": 1.5595195372696375, "grad_norm": 0.20854748785495758, "learning_rate": 1.4061824192071182e-06, "loss": 0.0069, "step": 184690 }, { "epoch": 1.5596039771168013, "grad_norm": 0.16378439962863922, "learning_rate": 1.4056701377075337e-06, "loss": 0.0064, "step": 184700 }, { "epoch": 1.5596884169639653, "grad_norm": 0.10373769700527191, "learning_rate": 1.4051579342760563e-06, "loss": 0.007, "step": 184710 }, { "epoch": 1.5597728568111293, "grad_norm": 0.26578837633132935, "learning_rate": 1.4046458089238135e-06, "loss": 0.005, "step": 184720 }, { "epoch": 1.559857296658293, "grad_norm": 0.23845909535884857, "learning_rate": 1.404133761661925e-06, "loss": 0.0029, "step": 184730 }, { "epoch": 1.5599417365054569, "grad_norm": 0.27057379484176636, "learning_rate": 1.403621792501516e-06, "loss": 0.0064, "step": 184740 }, { "epoch": 1.5600261763526206, "grad_norm": 0.39187881350517273, "learning_rate": 1.4031099014537053e-06, "loss": 0.0082, "step": 184750 }, { "epoch": 1.5601106161997846, "grad_norm": 0.31274551153182983, "learning_rate": 1.4025980885296099e-06, "loss": 0.0066, "step": 184760 }, { "epoch": 1.5601950560469486, "grad_norm": 0.5025918483734131, "learning_rate": 1.4020863537403466e-06, "loss": 0.0102, "step": 184770 }, { "epoch": 1.5602794958941124, "grad_norm": 0.39025476574897766, "learning_rate": 1.4015746970970296e-06, "loss": 0.0047, "step": 184780 }, { "epoch": 1.5603639357412762, "grad_norm": 0.18104472756385803, "learning_rate": 1.4010631186107737e-06, "loss": 0.0054, "step": 184790 }, { "epoch": 1.5604483755884402, "grad_norm": 0.28254711627960205, "learning_rate": 1.4005516182926882e-06, "loss": 0.0071, "step": 184800 }, { "epoch": 1.5605328154356042, "grad_norm": 0.15244343876838684, "learning_rate": 1.4000401961538852e-06, "loss": 0.0037, "step": 184810 }, { "epoch": 1.560617255282768, "grad_norm": 0.6135812997817993, "learning_rate": 1.39952885220547e-06, "loss": 0.0052, "step": 184820 }, { "epoch": 1.5607016951299317, "grad_norm": 1.0612537860870361, "learning_rate": 1.399017586458553e-06, "loss": 0.0092, "step": 184830 }, { "epoch": 1.5607861349770957, "grad_norm": 0.03018118627369404, "learning_rate": 1.3985063989242336e-06, "loss": 0.0087, "step": 184840 }, { "epoch": 1.5608705748242597, "grad_norm": 0.2627928555011749, "learning_rate": 1.397995289613619e-06, "loss": 0.0076, "step": 184850 }, { "epoch": 1.5609550146714235, "grad_norm": 0.21451492607593536, "learning_rate": 1.3974842585378069e-06, "loss": 0.0093, "step": 184860 }, { "epoch": 1.5610394545185873, "grad_norm": 0.37973180413246155, "learning_rate": 1.3969733057078993e-06, "loss": 0.0082, "step": 184870 }, { "epoch": 1.561123894365751, "grad_norm": 0.12351249903440475, "learning_rate": 1.3964624311349921e-06, "loss": 0.0076, "step": 184880 }, { "epoch": 1.561208334212915, "grad_norm": 0.5045075416564941, "learning_rate": 1.3959516348301844e-06, "loss": 0.0052, "step": 184890 }, { "epoch": 1.561292774060079, "grad_norm": 0.14505037665367126, "learning_rate": 1.395440916804568e-06, "loss": 0.006, "step": 184900 }, { "epoch": 1.5613772139072428, "grad_norm": 0.29319390654563904, "learning_rate": 1.3949302770692363e-06, "loss": 0.0106, "step": 184910 }, { "epoch": 1.5614616537544066, "grad_norm": 0.17983731627464294, "learning_rate": 1.3944197156352806e-06, "loss": 0.0052, "step": 184920 }, { "epoch": 1.5615460936015706, "grad_norm": 0.16903385519981384, "learning_rate": 1.393909232513788e-06, "loss": 0.0043, "step": 184930 }, { "epoch": 1.5616305334487346, "grad_norm": 0.23444341123104095, "learning_rate": 1.3933988277158495e-06, "loss": 0.0064, "step": 184940 }, { "epoch": 1.5617149732958984, "grad_norm": 0.09350397437810898, "learning_rate": 1.3928885012525483e-06, "loss": 0.0072, "step": 184950 }, { "epoch": 1.5617994131430621, "grad_norm": 0.3908798396587372, "learning_rate": 1.3923782531349705e-06, "loss": 0.0075, "step": 184960 }, { "epoch": 1.5618838529902261, "grad_norm": 0.08561635762453079, "learning_rate": 1.3918680833741966e-06, "loss": 0.006, "step": 184970 }, { "epoch": 1.56196829283739, "grad_norm": 0.523074209690094, "learning_rate": 1.3913579919813102e-06, "loss": 0.0099, "step": 184980 }, { "epoch": 1.562052732684554, "grad_norm": 0.24363268911838531, "learning_rate": 1.3908479789673883e-06, "loss": 0.0037, "step": 184990 }, { "epoch": 1.5621371725317177, "grad_norm": 0.34708964824676514, "learning_rate": 1.3903380443435093e-06, "loss": 0.0045, "step": 185000 }, { "epoch": 1.5622216123788815, "grad_norm": 0.31449225544929504, "learning_rate": 1.3898281881207481e-06, "loss": 0.0055, "step": 185010 }, { "epoch": 1.5623060522260455, "grad_norm": 0.159745454788208, "learning_rate": 1.389318410310177e-06, "loss": 0.0046, "step": 185020 }, { "epoch": 1.5623904920732095, "grad_norm": 0.2497866451740265, "learning_rate": 1.388808710922872e-06, "loss": 0.0054, "step": 185030 }, { "epoch": 1.5624749319203732, "grad_norm": 0.4274110198020935, "learning_rate": 1.3882990899699e-06, "loss": 0.0076, "step": 185040 }, { "epoch": 1.562559371767537, "grad_norm": 0.1911843866109848, "learning_rate": 1.3877895474623338e-06, "loss": 0.0091, "step": 185050 }, { "epoch": 1.562643811614701, "grad_norm": 0.21577580273151398, "learning_rate": 1.3872800834112365e-06, "loss": 0.0079, "step": 185060 }, { "epoch": 1.562728251461865, "grad_norm": 0.3336068391799927, "learning_rate": 1.3867706978276769e-06, "loss": 0.0074, "step": 185070 }, { "epoch": 1.5628126913090288, "grad_norm": 0.39637577533721924, "learning_rate": 1.3862613907227174e-06, "loss": 0.0036, "step": 185080 }, { "epoch": 1.5628971311561926, "grad_norm": 0.21867962181568146, "learning_rate": 1.38575216210742e-06, "loss": 0.0055, "step": 185090 }, { "epoch": 1.5629815710033563, "grad_norm": 0.45004957914352417, "learning_rate": 1.3852430119928428e-06, "loss": 0.0095, "step": 185100 }, { "epoch": 1.5630660108505203, "grad_norm": 0.11649489402770996, "learning_rate": 1.3847339403900489e-06, "loss": 0.0059, "step": 185110 }, { "epoch": 1.5631504506976843, "grad_norm": 0.6975632905960083, "learning_rate": 1.3842249473100916e-06, "loss": 0.0137, "step": 185120 }, { "epoch": 1.563234890544848, "grad_norm": 0.15513555705547333, "learning_rate": 1.3837160327640287e-06, "loss": 0.0082, "step": 185130 }, { "epoch": 1.5633193303920119, "grad_norm": 0.2618056535720825, "learning_rate": 1.3832071967629124e-06, "loss": 0.0043, "step": 185140 }, { "epoch": 1.5634037702391759, "grad_norm": 0.5416372418403625, "learning_rate": 1.382698439317795e-06, "loss": 0.0057, "step": 185150 }, { "epoch": 1.5634882100863399, "grad_norm": 0.39947837591171265, "learning_rate": 1.3821897604397261e-06, "loss": 0.0048, "step": 185160 }, { "epoch": 1.5635726499335036, "grad_norm": 0.15542641282081604, "learning_rate": 1.381681160139753e-06, "loss": 0.0075, "step": 185170 }, { "epoch": 1.5636570897806674, "grad_norm": 0.21866194903850555, "learning_rate": 1.3811726384289242e-06, "loss": 0.0042, "step": 185180 }, { "epoch": 1.5637415296278314, "grad_norm": 0.29698848724365234, "learning_rate": 1.3806641953182836e-06, "loss": 0.0046, "step": 185190 }, { "epoch": 1.5638259694749954, "grad_norm": 0.2614239752292633, "learning_rate": 1.380155830818876e-06, "loss": 0.0093, "step": 185200 }, { "epoch": 1.5639104093221592, "grad_norm": 0.34667786955833435, "learning_rate": 1.3796475449417407e-06, "loss": 0.0109, "step": 185210 }, { "epoch": 1.563994849169323, "grad_norm": 0.24153585731983185, "learning_rate": 1.3791393376979201e-06, "loss": 0.004, "step": 185220 }, { "epoch": 1.5640792890164867, "grad_norm": 0.08494175970554352, "learning_rate": 1.3786312090984515e-06, "loss": 0.0037, "step": 185230 }, { "epoch": 1.5641637288636507, "grad_norm": 0.2659756541252136, "learning_rate": 1.378123159154371e-06, "loss": 0.0104, "step": 185240 }, { "epoch": 1.5642481687108147, "grad_norm": 0.3227795362472534, "learning_rate": 1.377615187876713e-06, "loss": 0.0035, "step": 185250 }, { "epoch": 1.5643326085579785, "grad_norm": 0.19712555408477783, "learning_rate": 1.377107295276509e-06, "loss": 0.0047, "step": 185260 }, { "epoch": 1.5644170484051423, "grad_norm": 0.25156158208847046, "learning_rate": 1.3765994813647942e-06, "loss": 0.0113, "step": 185270 }, { "epoch": 1.5645014882523063, "grad_norm": 0.37134456634521484, "learning_rate": 1.376091746152594e-06, "loss": 0.0057, "step": 185280 }, { "epoch": 1.5645859280994703, "grad_norm": 0.24687886238098145, "learning_rate": 1.375584089650941e-06, "loss": 0.0054, "step": 185290 }, { "epoch": 1.564670367946634, "grad_norm": 0.17464286088943481, "learning_rate": 1.3750765118708564e-06, "loss": 0.0088, "step": 185300 }, { "epoch": 1.5647548077937978, "grad_norm": 0.18764197826385498, "learning_rate": 1.3745690128233691e-06, "loss": 0.0059, "step": 185310 }, { "epoch": 1.5648392476409616, "grad_norm": 0.41858580708503723, "learning_rate": 1.3740615925195e-06, "loss": 0.0074, "step": 185320 }, { "epoch": 1.5649236874881256, "grad_norm": 0.1823788285255432, "learning_rate": 1.3735542509702698e-06, "loss": 0.0073, "step": 185330 }, { "epoch": 1.5650081273352896, "grad_norm": 0.11589180678129196, "learning_rate": 1.3730469881866964e-06, "loss": 0.0036, "step": 185340 }, { "epoch": 1.5650925671824534, "grad_norm": 0.09331632405519485, "learning_rate": 1.3725398041798015e-06, "loss": 0.0032, "step": 185350 }, { "epoch": 1.5651770070296172, "grad_norm": 0.05650199204683304, "learning_rate": 1.372032698960598e-06, "loss": 0.0075, "step": 185360 }, { "epoch": 1.5652614468767811, "grad_norm": 0.38641124963760376, "learning_rate": 1.3715256725400998e-06, "loss": 0.004, "step": 185370 }, { "epoch": 1.5653458867239451, "grad_norm": 0.1760374903678894, "learning_rate": 1.3710187249293216e-06, "loss": 0.0038, "step": 185380 }, { "epoch": 1.565430326571109, "grad_norm": 0.3875682055950165, "learning_rate": 1.3705118561392716e-06, "loss": 0.006, "step": 185390 }, { "epoch": 1.5655147664182727, "grad_norm": 0.23346009850502014, "learning_rate": 1.3700050661809622e-06, "loss": 0.0043, "step": 185400 }, { "epoch": 1.5655992062654367, "grad_norm": 0.016938136890530586, "learning_rate": 1.3694983550653984e-06, "loss": 0.0092, "step": 185410 }, { "epoch": 1.5656836461126007, "grad_norm": 0.26183193922042847, "learning_rate": 1.3689917228035865e-06, "loss": 0.0042, "step": 185420 }, { "epoch": 1.5657680859597645, "grad_norm": 0.018517641350626945, "learning_rate": 1.3684851694065292e-06, "loss": 0.0096, "step": 185430 }, { "epoch": 1.5658525258069282, "grad_norm": 0.045841433107852936, "learning_rate": 1.3679786948852308e-06, "loss": 0.0071, "step": 185440 }, { "epoch": 1.565936965654092, "grad_norm": 0.10344940423965454, "learning_rate": 1.36747229925069e-06, "loss": 0.0049, "step": 185450 }, { "epoch": 1.566021405501256, "grad_norm": 0.09001466631889343, "learning_rate": 1.3669659825139076e-06, "loss": 0.0077, "step": 185460 }, { "epoch": 1.56610584534842, "grad_norm": 0.18313239514827728, "learning_rate": 1.3664597446858797e-06, "loss": 0.0048, "step": 185470 }, { "epoch": 1.5661902851955838, "grad_norm": 0.3365430533885956, "learning_rate": 1.3659535857776012e-06, "loss": 0.0044, "step": 185480 }, { "epoch": 1.5662747250427476, "grad_norm": 0.09885025024414062, "learning_rate": 1.3654475058000666e-06, "loss": 0.0032, "step": 185490 }, { "epoch": 1.5663591648899116, "grad_norm": 0.2863408923149109, "learning_rate": 1.3649415047642656e-06, "loss": 0.0072, "step": 185500 }, { "epoch": 1.5664436047370756, "grad_norm": 0.6975957751274109, "learning_rate": 1.3644355826811916e-06, "loss": 0.0091, "step": 185510 }, { "epoch": 1.5665280445842393, "grad_norm": 0.027088159695267677, "learning_rate": 1.3639297395618306e-06, "loss": 0.0065, "step": 185520 }, { "epoch": 1.566612484431403, "grad_norm": 0.38387376070022583, "learning_rate": 1.363423975417172e-06, "loss": 0.006, "step": 185530 }, { "epoch": 1.566696924278567, "grad_norm": 0.06239985302090645, "learning_rate": 1.3629182902581978e-06, "loss": 0.0063, "step": 185540 }, { "epoch": 1.5667813641257309, "grad_norm": 0.20794156193733215, "learning_rate": 1.3624126840958946e-06, "loss": 0.0069, "step": 185550 }, { "epoch": 1.5668658039728949, "grad_norm": 0.2444993257522583, "learning_rate": 1.3619071569412422e-06, "loss": 0.0055, "step": 185560 }, { "epoch": 1.5669502438200587, "grad_norm": 0.034786492586135864, "learning_rate": 1.3614017088052211e-06, "loss": 0.0079, "step": 185570 }, { "epoch": 1.5670346836672224, "grad_norm": 0.6268543601036072, "learning_rate": 1.3608963396988078e-06, "loss": 0.0131, "step": 185580 }, { "epoch": 1.5671191235143864, "grad_norm": 0.3297923803329468, "learning_rate": 1.360391049632982e-06, "loss": 0.0032, "step": 185590 }, { "epoch": 1.5672035633615504, "grad_norm": 0.21469122171401978, "learning_rate": 1.3598858386187165e-06, "loss": 0.0079, "step": 185600 }, { "epoch": 1.5672880032087142, "grad_norm": 0.2529645562171936, "learning_rate": 1.3593807066669833e-06, "loss": 0.0047, "step": 185610 }, { "epoch": 1.567372443055878, "grad_norm": 0.4500932991504669, "learning_rate": 1.3588756537887566e-06, "loss": 0.0041, "step": 185620 }, { "epoch": 1.567456882903042, "grad_norm": 0.40937432646751404, "learning_rate": 1.3583706799950024e-06, "loss": 0.0044, "step": 185630 }, { "epoch": 1.567541322750206, "grad_norm": 0.41184136271476746, "learning_rate": 1.357865785296693e-06, "loss": 0.0038, "step": 185640 }, { "epoch": 1.5676257625973697, "grad_norm": 0.7328463792800903, "learning_rate": 1.3573609697047923e-06, "loss": 0.0127, "step": 185650 }, { "epoch": 1.5677102024445335, "grad_norm": 0.11984160542488098, "learning_rate": 1.3568562332302649e-06, "loss": 0.0043, "step": 185660 }, { "epoch": 1.5677946422916973, "grad_norm": 0.17374573647975922, "learning_rate": 1.3563515758840723e-06, "loss": 0.0045, "step": 185670 }, { "epoch": 1.5678790821388613, "grad_norm": 0.0795363262295723, "learning_rate": 1.3558469976771776e-06, "loss": 0.0064, "step": 185680 }, { "epoch": 1.5679635219860253, "grad_norm": 0.4479925334453583, "learning_rate": 1.3553424986205382e-06, "loss": 0.0081, "step": 185690 }, { "epoch": 1.568047961833189, "grad_norm": 0.11220218986272812, "learning_rate": 1.3548380787251148e-06, "loss": 0.0059, "step": 185700 }, { "epoch": 1.5681324016803528, "grad_norm": 0.29899361729621887, "learning_rate": 1.3543337380018595e-06, "loss": 0.0065, "step": 185710 }, { "epoch": 1.5682168415275168, "grad_norm": 0.1460227221250534, "learning_rate": 1.35382947646173e-06, "loss": 0.0063, "step": 185720 }, { "epoch": 1.5683012813746808, "grad_norm": 0.6426126956939697, "learning_rate": 1.3533252941156783e-06, "loss": 0.0133, "step": 185730 }, { "epoch": 1.5683857212218446, "grad_norm": 0.24092672765254974, "learning_rate": 1.3528211909746507e-06, "loss": 0.0076, "step": 185740 }, { "epoch": 1.5684701610690084, "grad_norm": 0.2551863193511963, "learning_rate": 1.3523171670496017e-06, "loss": 0.006, "step": 185750 }, { "epoch": 1.5685546009161724, "grad_norm": 0.6750951409339905, "learning_rate": 1.3518132223514746e-06, "loss": 0.0081, "step": 185760 }, { "epoch": 1.5686390407633364, "grad_norm": 0.37365907430648804, "learning_rate": 1.3513093568912177e-06, "loss": 0.0046, "step": 185770 }, { "epoch": 1.5687234806105002, "grad_norm": 0.24288856983184814, "learning_rate": 1.3508055706797729e-06, "loss": 0.0042, "step": 185780 }, { "epoch": 1.568807920457664, "grad_norm": 0.1440054178237915, "learning_rate": 1.3503018637280846e-06, "loss": 0.0071, "step": 185790 }, { "epoch": 1.5688923603048277, "grad_norm": 0.21784937381744385, "learning_rate": 1.3497982360470924e-06, "loss": 0.0094, "step": 185800 }, { "epoch": 1.5689768001519917, "grad_norm": 0.22214412689208984, "learning_rate": 1.3492946876477342e-06, "loss": 0.0061, "step": 185810 }, { "epoch": 1.5690612399991557, "grad_norm": 0.6734461784362793, "learning_rate": 1.3487912185409458e-06, "loss": 0.005, "step": 185820 }, { "epoch": 1.5691456798463195, "grad_norm": 0.12832613289356232, "learning_rate": 1.3482878287376649e-06, "loss": 0.0148, "step": 185830 }, { "epoch": 1.5692301196934832, "grad_norm": 0.24489113688468933, "learning_rate": 1.3477845182488248e-06, "loss": 0.0085, "step": 185840 }, { "epoch": 1.5693145595406472, "grad_norm": 0.15191325545310974, "learning_rate": 1.3472812870853547e-06, "loss": 0.0053, "step": 185850 }, { "epoch": 1.5693989993878112, "grad_norm": 0.5353206396102905, "learning_rate": 1.3467781352581883e-06, "loss": 0.007, "step": 185860 }, { "epoch": 1.569483439234975, "grad_norm": 0.10664541274309158, "learning_rate": 1.34627506277825e-06, "loss": 0.0134, "step": 185870 }, { "epoch": 1.5695678790821388, "grad_norm": 0.04077569395303726, "learning_rate": 1.3457720696564708e-06, "loss": 0.0036, "step": 185880 }, { "epoch": 1.5696523189293028, "grad_norm": 0.45471683144569397, "learning_rate": 1.3452691559037734e-06, "loss": 0.0071, "step": 185890 }, { "epoch": 1.5697367587764666, "grad_norm": 0.1622418761253357, "learning_rate": 1.3447663215310807e-06, "loss": 0.0107, "step": 185900 }, { "epoch": 1.5698211986236306, "grad_norm": 0.2822846472263336, "learning_rate": 1.3442635665493131e-06, "loss": 0.0042, "step": 185910 }, { "epoch": 1.5699056384707943, "grad_norm": 0.15617841482162476, "learning_rate": 1.3437608909693934e-06, "loss": 0.0049, "step": 185920 }, { "epoch": 1.5699900783179581, "grad_norm": 0.6289536952972412, "learning_rate": 1.343258294802236e-06, "loss": 0.0083, "step": 185930 }, { "epoch": 1.5700745181651221, "grad_norm": 0.2077622413635254, "learning_rate": 1.3427557780587608e-06, "loss": 0.0052, "step": 185940 }, { "epoch": 1.570158958012286, "grad_norm": 0.2009899616241455, "learning_rate": 1.3422533407498806e-06, "loss": 0.0099, "step": 185950 }, { "epoch": 1.5702433978594499, "grad_norm": 0.05548954755067825, "learning_rate": 1.341750982886507e-06, "loss": 0.0082, "step": 185960 }, { "epoch": 1.5703278377066137, "grad_norm": 0.21732491254806519, "learning_rate": 1.341248704479554e-06, "loss": 0.0036, "step": 185970 }, { "epoch": 1.5704122775537777, "grad_norm": 0.003549210261553526, "learning_rate": 1.3407465055399288e-06, "loss": 0.0093, "step": 185980 }, { "epoch": 1.5704967174009417, "grad_norm": 0.7795860767364502, "learning_rate": 1.3402443860785402e-06, "loss": 0.0066, "step": 185990 }, { "epoch": 1.5705811572481054, "grad_norm": 0.06384690850973129, "learning_rate": 1.339742346106292e-06, "loss": 0.0065, "step": 186000 }, { "epoch": 1.5706655970952692, "grad_norm": 0.2875235080718994, "learning_rate": 1.3392403856340913e-06, "loss": 0.0066, "step": 186010 }, { "epoch": 1.570750036942433, "grad_norm": 0.24258597195148468, "learning_rate": 1.338738504672838e-06, "loss": 0.0027, "step": 186020 }, { "epoch": 1.570834476789597, "grad_norm": 0.5344891548156738, "learning_rate": 1.3382367032334353e-06, "loss": 0.0053, "step": 186030 }, { "epoch": 1.570918916636761, "grad_norm": 0.18514904379844666, "learning_rate": 1.3377349813267799e-06, "loss": 0.0115, "step": 186040 }, { "epoch": 1.5710033564839248, "grad_norm": 0.09230940043926239, "learning_rate": 1.3372333389637733e-06, "loss": 0.0064, "step": 186050 }, { "epoch": 1.5710877963310885, "grad_norm": 0.2905524671077728, "learning_rate": 1.3367317761553045e-06, "loss": 0.0033, "step": 186060 }, { "epoch": 1.5711722361782525, "grad_norm": 0.20026053488254547, "learning_rate": 1.3362302929122723e-06, "loss": 0.0037, "step": 186070 }, { "epoch": 1.5712566760254165, "grad_norm": 0.2015465497970581, "learning_rate": 1.3357288892455678e-06, "loss": 0.0075, "step": 186080 }, { "epoch": 1.5713411158725803, "grad_norm": 0.28787606954574585, "learning_rate": 1.3352275651660795e-06, "loss": 0.0068, "step": 186090 }, { "epoch": 1.571425555719744, "grad_norm": 0.5397564768791199, "learning_rate": 1.3347263206846983e-06, "loss": 0.0053, "step": 186100 }, { "epoch": 1.571509995566908, "grad_norm": 0.23359699547290802, "learning_rate": 1.3342251558123098e-06, "loss": 0.006, "step": 186110 }, { "epoch": 1.571594435414072, "grad_norm": 0.4865884482860565, "learning_rate": 1.3337240705598004e-06, "loss": 0.0086, "step": 186120 }, { "epoch": 1.5716788752612358, "grad_norm": 0.1860121786594391, "learning_rate": 1.3332230649380534e-06, "loss": 0.0047, "step": 186130 }, { "epoch": 1.5717633151083996, "grad_norm": 0.6963940262794495, "learning_rate": 1.3327221389579498e-06, "loss": 0.0077, "step": 186140 }, { "epoch": 1.5718477549555634, "grad_norm": 0.14373357594013214, "learning_rate": 1.3322212926303686e-06, "loss": 0.004, "step": 186150 }, { "epoch": 1.5719321948027274, "grad_norm": 0.3341602683067322, "learning_rate": 1.331720525966191e-06, "loss": 0.005, "step": 186160 }, { "epoch": 1.5720166346498914, "grad_norm": 0.17785270512104034, "learning_rate": 1.3312198389762898e-06, "loss": 0.0059, "step": 186170 }, { "epoch": 1.5721010744970552, "grad_norm": 0.1302887350320816, "learning_rate": 1.3307192316715434e-06, "loss": 0.0061, "step": 186180 }, { "epoch": 1.572185514344219, "grad_norm": 0.1627287119626999, "learning_rate": 1.3302187040628234e-06, "loss": 0.0036, "step": 186190 }, { "epoch": 1.572269954191383, "grad_norm": 0.6375956535339355, "learning_rate": 1.329718256161e-06, "loss": 0.0084, "step": 186200 }, { "epoch": 1.572354394038547, "grad_norm": 0.0990666002035141, "learning_rate": 1.3292178879769452e-06, "loss": 0.0083, "step": 186210 }, { "epoch": 1.5724388338857107, "grad_norm": 0.2196425199508667, "learning_rate": 1.3287175995215257e-06, "loss": 0.0094, "step": 186220 }, { "epoch": 1.5725232737328745, "grad_norm": 0.27984365820884705, "learning_rate": 1.3282173908056078e-06, "loss": 0.0071, "step": 186230 }, { "epoch": 1.5726077135800383, "grad_norm": 0.14517073333263397, "learning_rate": 1.327717261840054e-06, "loss": 0.0054, "step": 186240 }, { "epoch": 1.5726921534272023, "grad_norm": 0.22025975584983826, "learning_rate": 1.3272172126357308e-06, "loss": 0.0051, "step": 186250 }, { "epoch": 1.5727765932743663, "grad_norm": 0.24547643959522247, "learning_rate": 1.3267172432034953e-06, "loss": 0.0082, "step": 186260 }, { "epoch": 1.57286103312153, "grad_norm": 0.22447550296783447, "learning_rate": 1.3262173535542095e-06, "loss": 0.0062, "step": 186270 }, { "epoch": 1.5729454729686938, "grad_norm": 0.5847866535186768, "learning_rate": 1.325717543698729e-06, "loss": 0.0077, "step": 186280 }, { "epoch": 1.5730299128158578, "grad_norm": 0.1373290866613388, "learning_rate": 1.3252178136479122e-06, "loss": 0.0089, "step": 186290 }, { "epoch": 1.5731143526630218, "grad_norm": 0.15677939355373383, "learning_rate": 1.324718163412611e-06, "loss": 0.0091, "step": 186300 }, { "epoch": 1.5731987925101856, "grad_norm": 0.16636788845062256, "learning_rate": 1.3242185930036782e-06, "loss": 0.0079, "step": 186310 }, { "epoch": 1.5732832323573493, "grad_norm": 0.07547640055418015, "learning_rate": 1.3237191024319645e-06, "loss": 0.0059, "step": 186320 }, { "epoch": 1.5733676722045133, "grad_norm": 0.05674534663558006, "learning_rate": 1.3232196917083174e-06, "loss": 0.0072, "step": 186330 }, { "epoch": 1.5734521120516773, "grad_norm": 0.3666347563266754, "learning_rate": 1.3227203608435857e-06, "loss": 0.0088, "step": 186340 }, { "epoch": 1.5735365518988411, "grad_norm": 0.1833236962556839, "learning_rate": 1.3222211098486138e-06, "loss": 0.0042, "step": 186350 }, { "epoch": 1.573620991746005, "grad_norm": 0.3182522654533386, "learning_rate": 1.3217219387342466e-06, "loss": 0.0055, "step": 186360 }, { "epoch": 1.5737054315931687, "grad_norm": 0.18954196572303772, "learning_rate": 1.3212228475113253e-06, "loss": 0.009, "step": 186370 }, { "epoch": 1.5737898714403327, "grad_norm": 0.08950723707675934, "learning_rate": 1.32072383619069e-06, "loss": 0.0075, "step": 186380 }, { "epoch": 1.5738743112874967, "grad_norm": 0.3586135804653168, "learning_rate": 1.3202249047831773e-06, "loss": 0.0087, "step": 186390 }, { "epoch": 1.5739587511346604, "grad_norm": 0.17087168991565704, "learning_rate": 1.319726053299627e-06, "loss": 0.006, "step": 186400 }, { "epoch": 1.5740431909818242, "grad_norm": 0.2565563917160034, "learning_rate": 1.3192272817508712e-06, "loss": 0.0073, "step": 186410 }, { "epoch": 1.5741276308289882, "grad_norm": 0.17709310352802277, "learning_rate": 1.3187285901477464e-06, "loss": 0.0067, "step": 186420 }, { "epoch": 1.5742120706761522, "grad_norm": 0.30641260743141174, "learning_rate": 1.318229978501082e-06, "loss": 0.0046, "step": 186430 }, { "epoch": 1.574296510523316, "grad_norm": 0.07049520313739777, "learning_rate": 1.3177314468217067e-06, "loss": 0.0069, "step": 186440 }, { "epoch": 1.5743809503704798, "grad_norm": 0.2652377188205719, "learning_rate": 1.317232995120451e-06, "loss": 0.0037, "step": 186450 }, { "epoch": 1.5744653902176438, "grad_norm": 0.0066289640963077545, "learning_rate": 1.3167346234081402e-06, "loss": 0.0093, "step": 186460 }, { "epoch": 1.5745498300648075, "grad_norm": 0.16464263200759888, "learning_rate": 1.3162363316955985e-06, "loss": 0.0057, "step": 186470 }, { "epoch": 1.5746342699119715, "grad_norm": 0.253132164478302, "learning_rate": 1.3157381199936476e-06, "loss": 0.0112, "step": 186480 }, { "epoch": 1.5747187097591353, "grad_norm": 0.1020815446972847, "learning_rate": 1.3152399883131112e-06, "loss": 0.0019, "step": 186490 }, { "epoch": 1.574803149606299, "grad_norm": 0.34005746245384216, "learning_rate": 1.314741936664805e-06, "loss": 0.005, "step": 186500 }, { "epoch": 1.574887589453463, "grad_norm": 0.10946615040302277, "learning_rate": 1.3142439650595513e-06, "loss": 0.0068, "step": 186510 }, { "epoch": 1.574972029300627, "grad_norm": 0.08690086007118225, "learning_rate": 1.3137460735081609e-06, "loss": 0.0036, "step": 186520 }, { "epoch": 1.5750564691477908, "grad_norm": 0.4391995966434479, "learning_rate": 1.3132482620214525e-06, "loss": 0.0107, "step": 186530 }, { "epoch": 1.5751409089949546, "grad_norm": 0.7998988628387451, "learning_rate": 1.312750530610236e-06, "loss": 0.0091, "step": 186540 }, { "epoch": 1.5752253488421186, "grad_norm": 0.6406570672988892, "learning_rate": 1.3122528792853228e-06, "loss": 0.0091, "step": 186550 }, { "epoch": 1.5753097886892826, "grad_norm": 0.4487874209880829, "learning_rate": 1.3117553080575213e-06, "loss": 0.0075, "step": 186560 }, { "epoch": 1.5753942285364464, "grad_norm": 0.13391287624835968, "learning_rate": 1.3112578169376372e-06, "loss": 0.0052, "step": 186570 }, { "epoch": 1.5754786683836102, "grad_norm": 0.2126012146472931, "learning_rate": 1.3107604059364786e-06, "loss": 0.0052, "step": 186580 }, { "epoch": 1.575563108230774, "grad_norm": 0.37893909215927124, "learning_rate": 1.3102630750648471e-06, "loss": 0.0093, "step": 186590 }, { "epoch": 1.575647548077938, "grad_norm": 0.06251852214336395, "learning_rate": 1.3097658243335464e-06, "loss": 0.0081, "step": 186600 }, { "epoch": 1.575731987925102, "grad_norm": 0.12986452877521515, "learning_rate": 1.3092686537533744e-06, "loss": 0.0113, "step": 186610 }, { "epoch": 1.5758164277722657, "grad_norm": 0.30979663133621216, "learning_rate": 1.3087715633351328e-06, "loss": 0.0069, "step": 186620 }, { "epoch": 1.5759008676194295, "grad_norm": 0.6324231028556824, "learning_rate": 1.3082745530896156e-06, "loss": 0.0087, "step": 186630 }, { "epoch": 1.5759853074665935, "grad_norm": 0.37838608026504517, "learning_rate": 1.3077776230276197e-06, "loss": 0.0093, "step": 186640 }, { "epoch": 1.5760697473137575, "grad_norm": 0.030362563207745552, "learning_rate": 1.3072807731599353e-06, "loss": 0.0056, "step": 186650 }, { "epoch": 1.5761541871609213, "grad_norm": 0.11454886943101883, "learning_rate": 1.3067840034973566e-06, "loss": 0.0072, "step": 186660 }, { "epoch": 1.576238627008085, "grad_norm": 0.33452528715133667, "learning_rate": 1.3062873140506731e-06, "loss": 0.0047, "step": 186670 }, { "epoch": 1.576323066855249, "grad_norm": 0.06906527280807495, "learning_rate": 1.3057907048306712e-06, "loss": 0.0037, "step": 186680 }, { "epoch": 1.576407506702413, "grad_norm": 0.002287896815687418, "learning_rate": 1.3052941758481391e-06, "loss": 0.0074, "step": 186690 }, { "epoch": 1.5764919465495768, "grad_norm": 0.3928384780883789, "learning_rate": 1.3047977271138606e-06, "loss": 0.0066, "step": 186700 }, { "epoch": 1.5765763863967406, "grad_norm": 0.04183889180421829, "learning_rate": 1.3043013586386184e-06, "loss": 0.0056, "step": 186710 }, { "epoch": 1.5766608262439044, "grad_norm": 0.05401771143078804, "learning_rate": 1.3038050704331912e-06, "loss": 0.0029, "step": 186720 }, { "epoch": 1.5767452660910684, "grad_norm": 0.20832791924476624, "learning_rate": 1.3033088625083623e-06, "loss": 0.0082, "step": 186730 }, { "epoch": 1.5768297059382324, "grad_norm": 0.16967129707336426, "learning_rate": 1.3028127348749065e-06, "loss": 0.0058, "step": 186740 }, { "epoch": 1.5769141457853961, "grad_norm": 0.28810012340545654, "learning_rate": 1.302316687543601e-06, "loss": 0.0039, "step": 186750 }, { "epoch": 1.57699858563256, "grad_norm": 0.12521842122077942, "learning_rate": 1.3018207205252186e-06, "loss": 0.0045, "step": 186760 }, { "epoch": 1.577083025479724, "grad_norm": 0.1730624884366989, "learning_rate": 1.3013248338305335e-06, "loss": 0.0074, "step": 186770 }, { "epoch": 1.577167465326888, "grad_norm": 0.38885390758514404, "learning_rate": 1.300829027470315e-06, "loss": 0.0076, "step": 186780 }, { "epoch": 1.5772519051740517, "grad_norm": 0.01157465111464262, "learning_rate": 1.3003333014553322e-06, "loss": 0.0049, "step": 186790 }, { "epoch": 1.5773363450212154, "grad_norm": 0.7266567349433899, "learning_rate": 1.2998376557963515e-06, "loss": 0.0069, "step": 186800 }, { "epoch": 1.5774207848683792, "grad_norm": 0.12357636541128159, "learning_rate": 1.2993420905041377e-06, "loss": 0.0052, "step": 186810 }, { "epoch": 1.5775052247155432, "grad_norm": 0.03298914059996605, "learning_rate": 1.2988466055894567e-06, "loss": 0.0052, "step": 186820 }, { "epoch": 1.5775896645627072, "grad_norm": 0.2961250841617584, "learning_rate": 1.2983512010630672e-06, "loss": 0.0072, "step": 186830 }, { "epoch": 1.577674104409871, "grad_norm": 0.1668156385421753, "learning_rate": 1.297855876935733e-06, "loss": 0.0085, "step": 186840 }, { "epoch": 1.5777585442570348, "grad_norm": 0.24874646961688995, "learning_rate": 1.2973606332182087e-06, "loss": 0.0084, "step": 186850 }, { "epoch": 1.5778429841041988, "grad_norm": 0.7725480794906616, "learning_rate": 1.296865469921254e-06, "loss": 0.0088, "step": 186860 }, { "epoch": 1.5779274239513628, "grad_norm": 0.198736771941185, "learning_rate": 1.2963703870556232e-06, "loss": 0.0097, "step": 186870 }, { "epoch": 1.5780118637985265, "grad_norm": 0.2467157542705536, "learning_rate": 1.2958753846320683e-06, "loss": 0.0069, "step": 186880 }, { "epoch": 1.5780963036456903, "grad_norm": 0.2857772409915924, "learning_rate": 1.2953804626613392e-06, "loss": 0.0068, "step": 186890 }, { "epoch": 1.5781807434928543, "grad_norm": 0.12497653812170029, "learning_rate": 1.2948856211541887e-06, "loss": 0.0035, "step": 186900 }, { "epoch": 1.5782651833400183, "grad_norm": 0.4157832860946655, "learning_rate": 1.2943908601213639e-06, "loss": 0.006, "step": 186910 }, { "epoch": 1.578349623187182, "grad_norm": 0.10395199805498123, "learning_rate": 1.2938961795736082e-06, "loss": 0.005, "step": 186920 }, { "epoch": 1.5784340630343459, "grad_norm": 0.011262795887887478, "learning_rate": 1.2934015795216697e-06, "loss": 0.0109, "step": 186930 }, { "epoch": 1.5785185028815096, "grad_norm": 0.20772618055343628, "learning_rate": 1.2929070599762883e-06, "loss": 0.0071, "step": 186940 }, { "epoch": 1.5786029427286736, "grad_norm": 0.16695153713226318, "learning_rate": 1.2924126209482084e-06, "loss": 0.0059, "step": 186950 }, { "epoch": 1.5786873825758376, "grad_norm": 0.05809725075960159, "learning_rate": 1.2919182624481635e-06, "loss": 0.0054, "step": 186960 }, { "epoch": 1.5787718224230014, "grad_norm": 0.38850516080856323, "learning_rate": 1.291423984486896e-06, "loss": 0.006, "step": 186970 }, { "epoch": 1.5788562622701652, "grad_norm": 0.1339678019285202, "learning_rate": 1.2909297870751381e-06, "loss": 0.0058, "step": 186980 }, { "epoch": 1.5789407021173292, "grad_norm": 0.05900273099541664, "learning_rate": 1.2904356702236264e-06, "loss": 0.0071, "step": 186990 }, { "epoch": 1.5790251419644932, "grad_norm": 0.2678919732570648, "learning_rate": 1.2899416339430904e-06, "loss": 0.0103, "step": 187000 }, { "epoch": 1.579109581811657, "grad_norm": 0.8750244379043579, "learning_rate": 1.2894476782442634e-06, "loss": 0.0103, "step": 187010 }, { "epoch": 1.5791940216588207, "grad_norm": 0.14270047843456268, "learning_rate": 1.2889538031378723e-06, "loss": 0.0075, "step": 187020 }, { "epoch": 1.5792784615059847, "grad_norm": 0.09954653680324554, "learning_rate": 1.288460008634644e-06, "loss": 0.0053, "step": 187030 }, { "epoch": 1.5793629013531485, "grad_norm": 0.050020523369312286, "learning_rate": 1.2879662947453042e-06, "loss": 0.0032, "step": 187040 }, { "epoch": 1.5794473412003125, "grad_norm": 0.16261973977088928, "learning_rate": 1.2874726614805739e-06, "loss": 0.0056, "step": 187050 }, { "epoch": 1.5795317810474763, "grad_norm": 0.07530725002288818, "learning_rate": 1.2869791088511778e-06, "loss": 0.0057, "step": 187060 }, { "epoch": 1.57961622089464, "grad_norm": 0.053982045501470566, "learning_rate": 1.286485636867834e-06, "loss": 0.0057, "step": 187070 }, { "epoch": 1.579700660741804, "grad_norm": 0.2797185480594635, "learning_rate": 1.285992245541262e-06, "loss": 0.0102, "step": 187080 }, { "epoch": 1.579785100588968, "grad_norm": 0.2311176359653473, "learning_rate": 1.285498934882176e-06, "loss": 0.0088, "step": 187090 }, { "epoch": 1.5798695404361318, "grad_norm": 0.11060242354869843, "learning_rate": 1.2850057049012938e-06, "loss": 0.0056, "step": 187100 }, { "epoch": 1.5799539802832956, "grad_norm": 0.3554435968399048, "learning_rate": 1.2845125556093263e-06, "loss": 0.0061, "step": 187110 }, { "epoch": 1.5800384201304596, "grad_norm": 0.24847529828548431, "learning_rate": 1.2840194870169847e-06, "loss": 0.0061, "step": 187120 }, { "epoch": 1.5801228599776236, "grad_norm": 0.22280670702457428, "learning_rate": 1.2835264991349766e-06, "loss": 0.0079, "step": 187130 }, { "epoch": 1.5802072998247874, "grad_norm": 0.47818857431411743, "learning_rate": 1.2830335919740134e-06, "loss": 0.004, "step": 187140 }, { "epoch": 1.5802917396719511, "grad_norm": 0.24911946058273315, "learning_rate": 1.282540765544798e-06, "loss": 0.0065, "step": 187150 }, { "epoch": 1.580376179519115, "grad_norm": 0.03001774288713932, "learning_rate": 1.2820480198580348e-06, "loss": 0.004, "step": 187160 }, { "epoch": 1.580460619366279, "grad_norm": 0.2081325799226761, "learning_rate": 1.2815553549244276e-06, "loss": 0.0075, "step": 187170 }, { "epoch": 1.580545059213443, "grad_norm": 0.35998812317848206, "learning_rate": 1.2810627707546752e-06, "loss": 0.0086, "step": 187180 }, { "epoch": 1.5806294990606067, "grad_norm": 0.1594245284795761, "learning_rate": 1.2805702673594783e-06, "loss": 0.0052, "step": 187190 }, { "epoch": 1.5807139389077705, "grad_norm": 0.14830535650253296, "learning_rate": 1.2800778447495332e-06, "loss": 0.0059, "step": 187200 }, { "epoch": 1.5807983787549345, "grad_norm": 0.19767031073570251, "learning_rate": 1.2795855029355347e-06, "loss": 0.0051, "step": 187210 }, { "epoch": 1.5808828186020984, "grad_norm": 0.1804858148097992, "learning_rate": 1.279093241928176e-06, "loss": 0.0065, "step": 187220 }, { "epoch": 1.5809672584492622, "grad_norm": 0.0018633502768352628, "learning_rate": 1.2786010617381505e-06, "loss": 0.0043, "step": 187230 }, { "epoch": 1.581051698296426, "grad_norm": 0.349364310503006, "learning_rate": 1.2781089623761462e-06, "loss": 0.0042, "step": 187240 }, { "epoch": 1.58113613814359, "grad_norm": 0.2837972342967987, "learning_rate": 1.2776169438528535e-06, "loss": 0.0056, "step": 187250 }, { "epoch": 1.581220577990754, "grad_norm": 0.05456344410777092, "learning_rate": 1.2771250061789587e-06, "loss": 0.007, "step": 187260 }, { "epoch": 1.5813050178379178, "grad_norm": 0.3867177367210388, "learning_rate": 1.2766331493651452e-06, "loss": 0.0035, "step": 187270 }, { "epoch": 1.5813894576850815, "grad_norm": 0.060008395463228226, "learning_rate": 1.2761413734220968e-06, "loss": 0.0072, "step": 187280 }, { "epoch": 1.5814738975322453, "grad_norm": 0.08298010379076004, "learning_rate": 1.2756496783604937e-06, "loss": 0.0081, "step": 187290 }, { "epoch": 1.5815583373794093, "grad_norm": 0.4397217929363251, "learning_rate": 1.2751580641910172e-06, "loss": 0.0044, "step": 187300 }, { "epoch": 1.5816427772265733, "grad_norm": 0.16679184138774872, "learning_rate": 1.2746665309243433e-06, "loss": 0.004, "step": 187310 }, { "epoch": 1.581727217073737, "grad_norm": 0.14829976856708527, "learning_rate": 1.2741750785711503e-06, "loss": 0.0053, "step": 187320 }, { "epoch": 1.5818116569209009, "grad_norm": 0.12155410647392273, "learning_rate": 1.27368370714211e-06, "loss": 0.0078, "step": 187330 }, { "epoch": 1.5818960967680649, "grad_norm": 0.6390583515167236, "learning_rate": 1.2731924166478965e-06, "loss": 0.008, "step": 187340 }, { "epoch": 1.5819805366152289, "grad_norm": 0.11776866018772125, "learning_rate": 1.2727012070991807e-06, "loss": 0.015, "step": 187350 }, { "epoch": 1.5820649764623926, "grad_norm": 0.30544063448905945, "learning_rate": 1.272210078506631e-06, "loss": 0.0055, "step": 187360 }, { "epoch": 1.5821494163095564, "grad_norm": 0.18082261085510254, "learning_rate": 1.271719030880914e-06, "loss": 0.0062, "step": 187370 }, { "epoch": 1.5822338561567204, "grad_norm": 0.15305805206298828, "learning_rate": 1.2712280642326947e-06, "loss": 0.0043, "step": 187380 }, { "epoch": 1.5823182960038842, "grad_norm": 0.20534589886665344, "learning_rate": 1.2707371785726385e-06, "loss": 0.0068, "step": 187390 }, { "epoch": 1.5824027358510482, "grad_norm": 0.1605774462223053, "learning_rate": 1.2702463739114057e-06, "loss": 0.0057, "step": 187400 }, { "epoch": 1.582487175698212, "grad_norm": 0.15850841999053955, "learning_rate": 1.2697556502596585e-06, "loss": 0.0068, "step": 187410 }, { "epoch": 1.5825716155453757, "grad_norm": 0.29019710421562195, "learning_rate": 1.2692650076280532e-06, "loss": 0.0055, "step": 187420 }, { "epoch": 1.5826560553925397, "grad_norm": 0.0527711845934391, "learning_rate": 1.268774446027249e-06, "loss": 0.0049, "step": 187430 }, { "epoch": 1.5827404952397037, "grad_norm": 0.043741848319768906, "learning_rate": 1.2682839654678986e-06, "loss": 0.0062, "step": 187440 }, { "epoch": 1.5828249350868675, "grad_norm": 0.5403138995170593, "learning_rate": 1.267793565960656e-06, "loss": 0.0078, "step": 187450 }, { "epoch": 1.5829093749340313, "grad_norm": 0.18475542962551117, "learning_rate": 1.2673032475161706e-06, "loss": 0.0101, "step": 187460 }, { "epoch": 1.5829938147811953, "grad_norm": 0.4552242159843445, "learning_rate": 1.2668130101450953e-06, "loss": 0.0066, "step": 187470 }, { "epoch": 1.5830782546283593, "grad_norm": 0.20413129031658173, "learning_rate": 1.2663228538580758e-06, "loss": 0.0066, "step": 187480 }, { "epoch": 1.583162694475523, "grad_norm": 0.2812277376651764, "learning_rate": 1.2658327786657593e-06, "loss": 0.0051, "step": 187490 }, { "epoch": 1.5832471343226868, "grad_norm": 1.0783450603485107, "learning_rate": 1.2653427845787903e-06, "loss": 0.0054, "step": 187500 }, { "epoch": 1.5833315741698506, "grad_norm": 0.2648763954639435, "learning_rate": 1.2648528716078084e-06, "loss": 0.0064, "step": 187510 }, { "epoch": 1.5834160140170146, "grad_norm": 0.11602132022380829, "learning_rate": 1.2643630397634587e-06, "loss": 0.0044, "step": 187520 }, { "epoch": 1.5835004538641786, "grad_norm": 0.10715077817440033, "learning_rate": 1.2638732890563788e-06, "loss": 0.0068, "step": 187530 }, { "epoch": 1.5835848937113424, "grad_norm": 0.29470860958099365, "learning_rate": 1.263383619497205e-06, "loss": 0.0108, "step": 187540 }, { "epoch": 1.5836693335585061, "grad_norm": 0.06982704997062683, "learning_rate": 1.2628940310965716e-06, "loss": 0.0058, "step": 187550 }, { "epoch": 1.5837537734056701, "grad_norm": 0.3100002706050873, "learning_rate": 1.2624045238651156e-06, "loss": 0.0104, "step": 187560 }, { "epoch": 1.5838382132528341, "grad_norm": 0.03204004094004631, "learning_rate": 1.2619150978134658e-06, "loss": 0.0039, "step": 187570 }, { "epoch": 1.583922653099998, "grad_norm": 0.19948601722717285, "learning_rate": 1.2614257529522562e-06, "loss": 0.0046, "step": 187580 }, { "epoch": 1.5840070929471617, "grad_norm": 0.35913529992103577, "learning_rate": 1.2609364892921122e-06, "loss": 0.0039, "step": 187590 }, { "epoch": 1.5840915327943257, "grad_norm": 0.2963826656341553, "learning_rate": 1.2604473068436623e-06, "loss": 0.0075, "step": 187600 }, { "epoch": 1.5841759726414897, "grad_norm": 0.15602226555347443, "learning_rate": 1.2599582056175303e-06, "loss": 0.0047, "step": 187610 }, { "epoch": 1.5842604124886535, "grad_norm": 0.37650537490844727, "learning_rate": 1.259469185624338e-06, "loss": 0.0052, "step": 187620 }, { "epoch": 1.5843448523358172, "grad_norm": 0.25749078392982483, "learning_rate": 1.2589802468747102e-06, "loss": 0.006, "step": 187630 }, { "epoch": 1.584429292182981, "grad_norm": 0.23402076959609985, "learning_rate": 1.2584913893792633e-06, "loss": 0.0066, "step": 187640 }, { "epoch": 1.584513732030145, "grad_norm": 0.19542042911052704, "learning_rate": 1.2580026131486178e-06, "loss": 0.0056, "step": 187650 }, { "epoch": 1.584598171877309, "grad_norm": 0.15593713521957397, "learning_rate": 1.2575139181933877e-06, "loss": 0.0054, "step": 187660 }, { "epoch": 1.5846826117244728, "grad_norm": 0.43466833233833313, "learning_rate": 1.2570253045241898e-06, "loss": 0.0089, "step": 187670 }, { "epoch": 1.5847670515716366, "grad_norm": 0.14627447724342346, "learning_rate": 1.2565367721516354e-06, "loss": 0.0088, "step": 187680 }, { "epoch": 1.5848514914188006, "grad_norm": 0.21105247735977173, "learning_rate": 1.256048321086335e-06, "loss": 0.004, "step": 187690 }, { "epoch": 1.5849359312659645, "grad_norm": 0.5465078949928284, "learning_rate": 1.2555599513388966e-06, "loss": 0.0083, "step": 187700 }, { "epoch": 1.5850203711131283, "grad_norm": 0.29408982396125793, "learning_rate": 1.25507166291993e-06, "loss": 0.0047, "step": 187710 }, { "epoch": 1.585104810960292, "grad_norm": 0.2989874482154846, "learning_rate": 1.2545834558400378e-06, "loss": 0.0078, "step": 187720 }, { "epoch": 1.5851892508074559, "grad_norm": 0.14793622493743896, "learning_rate": 1.2540953301098273e-06, "loss": 0.0047, "step": 187730 }, { "epoch": 1.5852736906546199, "grad_norm": 0.16470938920974731, "learning_rate": 1.2536072857398983e-06, "loss": 0.0036, "step": 187740 }, { "epoch": 1.5853581305017839, "grad_norm": 0.31473129987716675, "learning_rate": 1.25311932274085e-06, "loss": 0.006, "step": 187750 }, { "epoch": 1.5854425703489476, "grad_norm": 0.16488108038902283, "learning_rate": 1.2526314411232836e-06, "loss": 0.0055, "step": 187760 }, { "epoch": 1.5855270101961114, "grad_norm": 0.9617807865142822, "learning_rate": 1.252143640897795e-06, "loss": 0.0067, "step": 187770 }, { "epoch": 1.5856114500432754, "grad_norm": 0.052193474024534225, "learning_rate": 1.251655922074978e-06, "loss": 0.0031, "step": 187780 }, { "epoch": 1.5856958898904394, "grad_norm": 0.13622167706489563, "learning_rate": 1.2511682846654243e-06, "loss": 0.006, "step": 187790 }, { "epoch": 1.5857803297376032, "grad_norm": 0.3043440580368042, "learning_rate": 1.2506807286797297e-06, "loss": 0.0053, "step": 187800 }, { "epoch": 1.585864769584767, "grad_norm": 0.36570411920547485, "learning_rate": 1.2501932541284794e-06, "loss": 0.0046, "step": 187810 }, { "epoch": 1.585949209431931, "grad_norm": 0.02416428178548813, "learning_rate": 1.2497058610222645e-06, "loss": 0.0041, "step": 187820 }, { "epoch": 1.586033649279095, "grad_norm": 0.14281414449214935, "learning_rate": 1.2492185493716686e-06, "loss": 0.0068, "step": 187830 }, { "epoch": 1.5861180891262587, "grad_norm": 0.4618658423423767, "learning_rate": 1.2487313191872786e-06, "loss": 0.0066, "step": 187840 }, { "epoch": 1.5862025289734225, "grad_norm": 0.12917616963386536, "learning_rate": 1.2482441704796772e-06, "loss": 0.0049, "step": 187850 }, { "epoch": 1.5862869688205863, "grad_norm": 0.7253285646438599, "learning_rate": 1.2477571032594403e-06, "loss": 0.0075, "step": 187860 }, { "epoch": 1.5863714086677503, "grad_norm": 0.1261327713727951, "learning_rate": 1.2472701175371521e-06, "loss": 0.007, "step": 187870 }, { "epoch": 1.5864558485149143, "grad_norm": 0.2495221644639969, "learning_rate": 1.2467832133233864e-06, "loss": 0.0059, "step": 187880 }, { "epoch": 1.586540288362078, "grad_norm": 0.2575201690196991, "learning_rate": 1.2462963906287217e-06, "loss": 0.0059, "step": 187890 }, { "epoch": 1.5866247282092418, "grad_norm": 0.16372057795524597, "learning_rate": 1.2458096494637285e-06, "loss": 0.0045, "step": 187900 }, { "epoch": 1.5867091680564058, "grad_norm": 0.1327354609966278, "learning_rate": 1.2453229898389817e-06, "loss": 0.0023, "step": 187910 }, { "epoch": 1.5867936079035698, "grad_norm": 0.24345596134662628, "learning_rate": 1.2448364117650507e-06, "loss": 0.0058, "step": 187920 }, { "epoch": 1.5868780477507336, "grad_norm": 0.0752210021018982, "learning_rate": 1.2443499152525029e-06, "loss": 0.0098, "step": 187930 }, { "epoch": 1.5869624875978974, "grad_norm": 0.2918602228164673, "learning_rate": 1.2438635003119042e-06, "loss": 0.0096, "step": 187940 }, { "epoch": 1.5870469274450614, "grad_norm": 0.04986199364066124, "learning_rate": 1.243377166953822e-06, "loss": 0.0061, "step": 187950 }, { "epoch": 1.5871313672922251, "grad_norm": 0.29894980788230896, "learning_rate": 1.2428909151888174e-06, "loss": 0.0055, "step": 187960 }, { "epoch": 1.5872158071393891, "grad_norm": 0.3820672929286957, "learning_rate": 1.2424047450274512e-06, "loss": 0.0065, "step": 187970 }, { "epoch": 1.587300246986553, "grad_norm": 0.2032294124364853, "learning_rate": 1.2419186564802855e-06, "loss": 0.0037, "step": 187980 }, { "epoch": 1.5873846868337167, "grad_norm": 0.11636905372142792, "learning_rate": 1.241432649557875e-06, "loss": 0.0054, "step": 187990 }, { "epoch": 1.5874691266808807, "grad_norm": 0.30163460969924927, "learning_rate": 1.2409467242707784e-06, "loss": 0.0084, "step": 188000 }, { "epoch": 1.5875535665280447, "grad_norm": 0.1983863264322281, "learning_rate": 1.2404608806295488e-06, "loss": 0.0058, "step": 188010 }, { "epoch": 1.5876380063752085, "grad_norm": 0.658454954624176, "learning_rate": 1.2399751186447383e-06, "loss": 0.0075, "step": 188020 }, { "epoch": 1.5877224462223722, "grad_norm": 0.005615381523966789, "learning_rate": 1.2394894383268962e-06, "loss": 0.0046, "step": 188030 }, { "epoch": 1.5878068860695362, "grad_norm": 0.000919176556635648, "learning_rate": 1.239003839686575e-06, "loss": 0.0072, "step": 188040 }, { "epoch": 1.5878913259167002, "grad_norm": 0.26220664381980896, "learning_rate": 1.2385183227343173e-06, "loss": 0.0063, "step": 188050 }, { "epoch": 1.587975765763864, "grad_norm": 0.04705018177628517, "learning_rate": 1.238032887480673e-06, "loss": 0.0058, "step": 188060 }, { "epoch": 1.5880602056110278, "grad_norm": 0.3231513202190399, "learning_rate": 1.237547533936182e-06, "loss": 0.0047, "step": 188070 }, { "epoch": 1.5881446454581916, "grad_norm": 0.7362917065620422, "learning_rate": 1.2370622621113887e-06, "loss": 0.0075, "step": 188080 }, { "epoch": 1.5882290853053556, "grad_norm": 0.24788501858711243, "learning_rate": 1.2365770720168318e-06, "loss": 0.0079, "step": 188090 }, { "epoch": 1.5883135251525196, "grad_norm": 0.14819471538066864, "learning_rate": 1.2360919636630503e-06, "loss": 0.0044, "step": 188100 }, { "epoch": 1.5883979649996833, "grad_norm": 0.01110231876373291, "learning_rate": 1.2356069370605795e-06, "loss": 0.0091, "step": 188110 }, { "epoch": 1.588482404846847, "grad_norm": 0.4502399265766144, "learning_rate": 1.235121992219953e-06, "loss": 0.0079, "step": 188120 }, { "epoch": 1.588566844694011, "grad_norm": 0.5626105070114136, "learning_rate": 1.2346371291517073e-06, "loss": 0.0096, "step": 188130 }, { "epoch": 1.588651284541175, "grad_norm": 0.6063921451568604, "learning_rate": 1.2341523478663703e-06, "loss": 0.0049, "step": 188140 }, { "epoch": 1.5887357243883389, "grad_norm": 0.21167686581611633, "learning_rate": 1.2336676483744736e-06, "loss": 0.0063, "step": 188150 }, { "epoch": 1.5888201642355027, "grad_norm": 0.09287308156490326, "learning_rate": 1.2331830306865439e-06, "loss": 0.0058, "step": 188160 }, { "epoch": 1.5889046040826666, "grad_norm": 0.17859698832035065, "learning_rate": 1.232698494813107e-06, "loss": 0.0051, "step": 188170 }, { "epoch": 1.5889890439298306, "grad_norm": 0.028406821191310883, "learning_rate": 1.232214040764685e-06, "loss": 0.007, "step": 188180 }, { "epoch": 1.5890734837769944, "grad_norm": 0.07677444815635681, "learning_rate": 1.2317296685518037e-06, "loss": 0.0027, "step": 188190 }, { "epoch": 1.5891579236241582, "grad_norm": 0.36633315682411194, "learning_rate": 1.231245378184982e-06, "loss": 0.0119, "step": 188200 }, { "epoch": 1.589242363471322, "grad_norm": 0.3365771770477295, "learning_rate": 1.2307611696747363e-06, "loss": 0.0066, "step": 188210 }, { "epoch": 1.589326803318486, "grad_norm": 0.1987428218126297, "learning_rate": 1.2302770430315875e-06, "loss": 0.0071, "step": 188220 }, { "epoch": 1.58941124316565, "grad_norm": 0.0009982604533433914, "learning_rate": 1.2297929982660472e-06, "loss": 0.0043, "step": 188230 }, { "epoch": 1.5894956830128137, "grad_norm": 0.12273483723402023, "learning_rate": 1.2293090353886321e-06, "loss": 0.0053, "step": 188240 }, { "epoch": 1.5895801228599775, "grad_norm": 0.3347181975841522, "learning_rate": 1.2288251544098523e-06, "loss": 0.0063, "step": 188250 }, { "epoch": 1.5896645627071415, "grad_norm": 0.22862088680267334, "learning_rate": 1.2283413553402169e-06, "loss": 0.0095, "step": 188260 }, { "epoch": 1.5897490025543055, "grad_norm": 0.30452072620391846, "learning_rate": 1.227857638190233e-06, "loss": 0.0054, "step": 188270 }, { "epoch": 1.5898334424014693, "grad_norm": 0.2996760606765747, "learning_rate": 1.2273740029704095e-06, "loss": 0.0072, "step": 188280 }, { "epoch": 1.589917882248633, "grad_norm": 0.15027548372745514, "learning_rate": 1.2268904496912482e-06, "loss": 0.0155, "step": 188290 }, { "epoch": 1.590002322095797, "grad_norm": 0.42587292194366455, "learning_rate": 1.2264069783632549e-06, "loss": 0.0091, "step": 188300 }, { "epoch": 1.5900867619429608, "grad_norm": 0.19715233147144318, "learning_rate": 1.2259235889969269e-06, "loss": 0.0053, "step": 188310 }, { "epoch": 1.5901712017901248, "grad_norm": 0.22225210070610046, "learning_rate": 1.2254402816027671e-06, "loss": 0.0085, "step": 188320 }, { "epoch": 1.5902556416372886, "grad_norm": 0.2931343913078308, "learning_rate": 1.2249570561912704e-06, "loss": 0.0116, "step": 188330 }, { "epoch": 1.5903400814844524, "grad_norm": 0.2579439580440521, "learning_rate": 1.2244739127729333e-06, "loss": 0.0081, "step": 188340 }, { "epoch": 1.5904245213316164, "grad_norm": 0.26957830786705017, "learning_rate": 1.2239908513582488e-06, "loss": 0.0073, "step": 188350 }, { "epoch": 1.5905089611787804, "grad_norm": 0.3528403341770172, "learning_rate": 1.2235078719577076e-06, "loss": 0.0036, "step": 188360 }, { "epoch": 1.5905934010259442, "grad_norm": 0.09898952394723892, "learning_rate": 1.2230249745818034e-06, "loss": 0.0074, "step": 188370 }, { "epoch": 1.590677840873108, "grad_norm": 0.1703842133283615, "learning_rate": 1.2225421592410214e-06, "loss": 0.0056, "step": 188380 }, { "epoch": 1.590762280720272, "grad_norm": 0.0068870116956532, "learning_rate": 1.2220594259458508e-06, "loss": 0.0067, "step": 188390 }, { "epoch": 1.590846720567436, "grad_norm": 0.2822697162628174, "learning_rate": 1.2215767747067741e-06, "loss": 0.0051, "step": 188400 }, { "epoch": 1.5909311604145997, "grad_norm": 0.2939385771751404, "learning_rate": 1.2210942055342767e-06, "loss": 0.0052, "step": 188410 }, { "epoch": 1.5910156002617635, "grad_norm": 0.25459662079811096, "learning_rate": 1.2206117184388388e-06, "loss": 0.0043, "step": 188420 }, { "epoch": 1.5911000401089272, "grad_norm": 0.25038278102874756, "learning_rate": 1.22012931343094e-06, "loss": 0.0064, "step": 188430 }, { "epoch": 1.5911844799560912, "grad_norm": 0.45078176259994507, "learning_rate": 1.2196469905210584e-06, "loss": 0.0103, "step": 188440 }, { "epoch": 1.5912689198032552, "grad_norm": 0.1612105369567871, "learning_rate": 1.2191647497196674e-06, "loss": 0.0088, "step": 188450 }, { "epoch": 1.591353359650419, "grad_norm": 0.27337297797203064, "learning_rate": 1.2186825910372452e-06, "loss": 0.005, "step": 188460 }, { "epoch": 1.5914377994975828, "grad_norm": 0.06829335540533066, "learning_rate": 1.2182005144842602e-06, "loss": 0.0058, "step": 188470 }, { "epoch": 1.5915222393447468, "grad_norm": 0.08321701735258102, "learning_rate": 1.2177185200711872e-06, "loss": 0.0065, "step": 188480 }, { "epoch": 1.5916066791919108, "grad_norm": 0.4928400218486786, "learning_rate": 1.2172366078084924e-06, "loss": 0.0052, "step": 188490 }, { "epoch": 1.5916911190390746, "grad_norm": 0.5348516702651978, "learning_rate": 1.2167547777066435e-06, "loss": 0.0117, "step": 188500 }, { "epoch": 1.5917755588862383, "grad_norm": 0.3521338701248169, "learning_rate": 1.2162730297761039e-06, "loss": 0.013, "step": 188510 }, { "epoch": 1.5918599987334023, "grad_norm": 0.33531734347343445, "learning_rate": 1.2157913640273406e-06, "loss": 0.0121, "step": 188520 }, { "epoch": 1.5919444385805663, "grad_norm": 0.085834801197052, "learning_rate": 1.215309780470812e-06, "loss": 0.0112, "step": 188530 }, { "epoch": 1.59202887842773, "grad_norm": 0.7483876347541809, "learning_rate": 1.21482827911698e-06, "loss": 0.0073, "step": 188540 }, { "epoch": 1.5921133182748939, "grad_norm": 0.4542451500892639, "learning_rate": 1.2143468599763014e-06, "loss": 0.0064, "step": 188550 }, { "epoch": 1.5921977581220577, "grad_norm": 0.15744590759277344, "learning_rate": 1.213865523059235e-06, "loss": 0.0061, "step": 188560 }, { "epoch": 1.5922821979692217, "grad_norm": 0.45829635858535767, "learning_rate": 1.2133842683762332e-06, "loss": 0.0106, "step": 188570 }, { "epoch": 1.5923666378163857, "grad_norm": 0.20223285257816315, "learning_rate": 1.2129030959377487e-06, "loss": 0.0034, "step": 188580 }, { "epoch": 1.5924510776635494, "grad_norm": 0.060058627277612686, "learning_rate": 1.2124220057542336e-06, "loss": 0.0063, "step": 188590 }, { "epoch": 1.5925355175107132, "grad_norm": 0.2900753915309906, "learning_rate": 1.2119409978361345e-06, "loss": 0.0131, "step": 188600 }, { "epoch": 1.5926199573578772, "grad_norm": 0.5872506499290466, "learning_rate": 1.2114600721939023e-06, "loss": 0.0074, "step": 188610 }, { "epoch": 1.5927043972050412, "grad_norm": 0.20678205788135529, "learning_rate": 1.2109792288379796e-06, "loss": 0.0036, "step": 188620 }, { "epoch": 1.592788837052205, "grad_norm": 0.4098822772502899, "learning_rate": 1.2104984677788129e-06, "loss": 0.0053, "step": 188630 }, { "epoch": 1.5928732768993688, "grad_norm": 0.20337562263011932, "learning_rate": 1.2100177890268417e-06, "loss": 0.0052, "step": 188640 }, { "epoch": 1.5929577167465325, "grad_norm": 0.06174120306968689, "learning_rate": 1.2095371925925087e-06, "loss": 0.0029, "step": 188650 }, { "epoch": 1.5930421565936965, "grad_norm": 0.20133598148822784, "learning_rate": 1.209056678486251e-06, "loss": 0.0049, "step": 188660 }, { "epoch": 1.5931265964408605, "grad_norm": 0.6440880298614502, "learning_rate": 1.2085762467185058e-06, "loss": 0.004, "step": 188670 }, { "epoch": 1.5932110362880243, "grad_norm": 0.08532635122537613, "learning_rate": 1.2080958972997075e-06, "loss": 0.0052, "step": 188680 }, { "epoch": 1.593295476135188, "grad_norm": 0.12717238068580627, "learning_rate": 1.2076156302402874e-06, "loss": 0.006, "step": 188690 }, { "epoch": 1.593379915982352, "grad_norm": 0.2887002229690552, "learning_rate": 1.2071354455506807e-06, "loss": 0.0055, "step": 188700 }, { "epoch": 1.593464355829516, "grad_norm": 0.13475863635540009, "learning_rate": 1.206655343241313e-06, "loss": 0.0067, "step": 188710 }, { "epoch": 1.5935487956766798, "grad_norm": 0.2292218953371048, "learning_rate": 1.206175323322616e-06, "loss": 0.0037, "step": 188720 }, { "epoch": 1.5936332355238436, "grad_norm": 0.481935054063797, "learning_rate": 1.2056953858050113e-06, "loss": 0.0048, "step": 188730 }, { "epoch": 1.5937176753710076, "grad_norm": 0.31077834963798523, "learning_rate": 1.2052155306989287e-06, "loss": 0.0041, "step": 188740 }, { "epoch": 1.5938021152181716, "grad_norm": 0.18208137154579163, "learning_rate": 1.2047357580147839e-06, "loss": 0.0074, "step": 188750 }, { "epoch": 1.5938865550653354, "grad_norm": 0.3998991549015045, "learning_rate": 1.2042560677630027e-06, "loss": 0.004, "step": 188760 }, { "epoch": 1.5939709949124992, "grad_norm": 0.19463911652565002, "learning_rate": 1.2037764599539997e-06, "loss": 0.0051, "step": 188770 }, { "epoch": 1.594055434759663, "grad_norm": 0.37790095806121826, "learning_rate": 1.203296934598196e-06, "loss": 0.004, "step": 188780 }, { "epoch": 1.594139874606827, "grad_norm": 0.08383605629205704, "learning_rate": 1.202817491706005e-06, "loss": 0.0042, "step": 188790 }, { "epoch": 1.594224314453991, "grad_norm": 0.0012722872197628021, "learning_rate": 1.2023381312878386e-06, "loss": 0.0039, "step": 188800 }, { "epoch": 1.5943087543011547, "grad_norm": 0.3389376103878021, "learning_rate": 1.2018588533541104e-06, "loss": 0.0068, "step": 188810 }, { "epoch": 1.5943931941483185, "grad_norm": 0.24582673609256744, "learning_rate": 1.2013796579152304e-06, "loss": 0.0079, "step": 188820 }, { "epoch": 1.5944776339954825, "grad_norm": 0.2129955291748047, "learning_rate": 1.2009005449816052e-06, "loss": 0.0027, "step": 188830 }, { "epoch": 1.5945620738426465, "grad_norm": 0.103268563747406, "learning_rate": 1.2004215145636406e-06, "loss": 0.0068, "step": 188840 }, { "epoch": 1.5946465136898103, "grad_norm": 0.1928718537092209, "learning_rate": 1.1999425666717435e-06, "loss": 0.0088, "step": 188850 }, { "epoch": 1.594730953536974, "grad_norm": 0.3574080467224121, "learning_rate": 1.1994637013163141e-06, "loss": 0.0077, "step": 188860 }, { "epoch": 1.594815393384138, "grad_norm": 0.34055984020233154, "learning_rate": 1.1989849185077552e-06, "loss": 0.0076, "step": 188870 }, { "epoch": 1.5948998332313018, "grad_norm": 0.005854898598045111, "learning_rate": 1.1985062182564633e-06, "loss": 0.004, "step": 188880 }, { "epoch": 1.5949842730784658, "grad_norm": 0.22027762234210968, "learning_rate": 1.1980276005728392e-06, "loss": 0.0061, "step": 188890 }, { "epoch": 1.5950687129256296, "grad_norm": 0.4120408594608307, "learning_rate": 1.1975490654672767e-06, "loss": 0.0038, "step": 188900 }, { "epoch": 1.5951531527727933, "grad_norm": 0.17400413751602173, "learning_rate": 1.1970706129501692e-06, "loss": 0.0042, "step": 188910 }, { "epoch": 1.5952375926199573, "grad_norm": 0.38280633091926575, "learning_rate": 1.1965922430319082e-06, "loss": 0.0046, "step": 188920 }, { "epoch": 1.5953220324671213, "grad_norm": 0.7768707275390625, "learning_rate": 1.1961139557228835e-06, "loss": 0.0086, "step": 188930 }, { "epoch": 1.5954064723142851, "grad_norm": 0.1289958655834198, "learning_rate": 1.1956357510334854e-06, "loss": 0.0111, "step": 188940 }, { "epoch": 1.595490912161449, "grad_norm": 0.18415552377700806, "learning_rate": 1.1951576289740975e-06, "loss": 0.006, "step": 188950 }, { "epoch": 1.595575352008613, "grad_norm": 0.18493500351905823, "learning_rate": 1.1946795895551079e-06, "loss": 0.0045, "step": 188960 }, { "epoch": 1.5956597918557769, "grad_norm": 0.17663238942623138, "learning_rate": 1.194201632786896e-06, "loss": 0.0077, "step": 188970 }, { "epoch": 1.5957442317029407, "grad_norm": 0.2877754271030426, "learning_rate": 1.1937237586798467e-06, "loss": 0.0071, "step": 188980 }, { "epoch": 1.5958286715501044, "grad_norm": 0.033687204122543335, "learning_rate": 1.1932459672443375e-06, "loss": 0.0098, "step": 188990 }, { "epoch": 1.5959131113972682, "grad_norm": 0.25028443336486816, "learning_rate": 1.1927682584907451e-06, "loss": 0.0055, "step": 189000 }, { "epoch": 1.5959975512444322, "grad_norm": 0.24941328167915344, "learning_rate": 1.1922906324294454e-06, "loss": 0.0079, "step": 189010 }, { "epoch": 1.5960819910915962, "grad_norm": 0.013471129350364208, "learning_rate": 1.1918130890708136e-06, "loss": 0.0085, "step": 189020 }, { "epoch": 1.59616643093876, "grad_norm": 0.33973774313926697, "learning_rate": 1.1913356284252213e-06, "loss": 0.0047, "step": 189030 }, { "epoch": 1.5962508707859238, "grad_norm": 0.22659935057163239, "learning_rate": 1.1908582505030375e-06, "loss": 0.0055, "step": 189040 }, { "epoch": 1.5963353106330878, "grad_norm": 0.6734918355941772, "learning_rate": 1.1903809553146335e-06, "loss": 0.0113, "step": 189050 }, { "epoch": 1.5964197504802518, "grad_norm": 0.5284770727157593, "learning_rate": 1.189903742870374e-06, "loss": 0.0073, "step": 189060 }, { "epoch": 1.5965041903274155, "grad_norm": 0.2380892038345337, "learning_rate": 1.1894266131806248e-06, "loss": 0.0043, "step": 189070 }, { "epoch": 1.5965886301745793, "grad_norm": 0.45177900791168213, "learning_rate": 1.1889495662557472e-06, "loss": 0.0063, "step": 189080 }, { "epoch": 1.5966730700217433, "grad_norm": 0.16945712268352509, "learning_rate": 1.188472602106105e-06, "loss": 0.0079, "step": 189090 }, { "epoch": 1.5967575098689073, "grad_norm": 0.20198692381381989, "learning_rate": 1.1879957207420555e-06, "loss": 0.0111, "step": 189100 }, { "epoch": 1.596841949716071, "grad_norm": 0.18782319128513336, "learning_rate": 1.1875189221739597e-06, "loss": 0.0072, "step": 189110 }, { "epoch": 1.5969263895632348, "grad_norm": 0.05010161176323891, "learning_rate": 1.18704220641217e-06, "loss": 0.0053, "step": 189120 }, { "epoch": 1.5970108294103986, "grad_norm": 0.3984993100166321, "learning_rate": 1.186565573467044e-06, "loss": 0.0086, "step": 189130 }, { "epoch": 1.5970952692575626, "grad_norm": 0.13458196818828583, "learning_rate": 1.1860890233489319e-06, "loss": 0.0042, "step": 189140 }, { "epoch": 1.5971797091047266, "grad_norm": 0.05509168282151222, "learning_rate": 1.1856125560681846e-06, "loss": 0.0051, "step": 189150 }, { "epoch": 1.5972641489518904, "grad_norm": 0.037109375, "learning_rate": 1.1851361716351512e-06, "loss": 0.0055, "step": 189160 }, { "epoch": 1.5973485887990542, "grad_norm": 0.2494560182094574, "learning_rate": 1.184659870060177e-06, "loss": 0.0025, "step": 189170 }, { "epoch": 1.5974330286462182, "grad_norm": 0.14567773044109344, "learning_rate": 1.1841836513536097e-06, "loss": 0.0053, "step": 189180 }, { "epoch": 1.5975174684933822, "grad_norm": 0.19896654784679413, "learning_rate": 1.1837075155257904e-06, "loss": 0.0055, "step": 189190 }, { "epoch": 1.597601908340546, "grad_norm": 0.3442859947681427, "learning_rate": 1.1832314625870634e-06, "loss": 0.0053, "step": 189200 }, { "epoch": 1.5976863481877097, "grad_norm": 0.6210770010948181, "learning_rate": 1.1827554925477652e-06, "loss": 0.0057, "step": 189210 }, { "epoch": 1.5977707880348735, "grad_norm": 0.3206549286842346, "learning_rate": 1.1822796054182366e-06, "loss": 0.0041, "step": 189220 }, { "epoch": 1.5978552278820375, "grad_norm": 0.905311107635498, "learning_rate": 1.1818038012088135e-06, "loss": 0.0106, "step": 189230 }, { "epoch": 1.5979396677292015, "grad_norm": 0.04840011149644852, "learning_rate": 1.1813280799298289e-06, "loss": 0.0039, "step": 189240 }, { "epoch": 1.5980241075763653, "grad_norm": 0.13506755232810974, "learning_rate": 1.1808524415916146e-06, "loss": 0.0087, "step": 189250 }, { "epoch": 1.598108547423529, "grad_norm": 0.4149361252784729, "learning_rate": 1.180376886204504e-06, "loss": 0.0097, "step": 189260 }, { "epoch": 1.598192987270693, "grad_norm": 0.47457897663116455, "learning_rate": 1.1799014137788244e-06, "loss": 0.0121, "step": 189270 }, { "epoch": 1.598277427117857, "grad_norm": 0.02374962903559208, "learning_rate": 1.1794260243249022e-06, "loss": 0.0042, "step": 189280 }, { "epoch": 1.5983618669650208, "grad_norm": 0.2642819881439209, "learning_rate": 1.1789507178530652e-06, "loss": 0.0056, "step": 189290 }, { "epoch": 1.5984463068121846, "grad_norm": 0.5038654804229736, "learning_rate": 1.1784754943736343e-06, "loss": 0.0077, "step": 189300 }, { "epoch": 1.5985307466593486, "grad_norm": 0.02874167449772358, "learning_rate": 1.1780003538969337e-06, "loss": 0.0021, "step": 189310 }, { "epoch": 1.5986151865065126, "grad_norm": 0.22210852801799774, "learning_rate": 1.1775252964332824e-06, "loss": 0.0085, "step": 189320 }, { "epoch": 1.5986996263536764, "grad_norm": 0.27870282530784607, "learning_rate": 1.1770503219929986e-06, "loss": 0.0094, "step": 189330 }, { "epoch": 1.5987840662008401, "grad_norm": 1.075860619544983, "learning_rate": 1.1765754305863964e-06, "loss": 0.0079, "step": 189340 }, { "epoch": 1.598868506048004, "grad_norm": 0.2174411118030548, "learning_rate": 1.1761006222237941e-06, "loss": 0.0056, "step": 189350 }, { "epoch": 1.598952945895168, "grad_norm": 0.5124621987342834, "learning_rate": 1.1756258969155015e-06, "loss": 0.0078, "step": 189360 }, { "epoch": 1.599037385742332, "grad_norm": 0.01223681215196848, "learning_rate": 1.1751512546718318e-06, "loss": 0.0062, "step": 189370 }, { "epoch": 1.5991218255894957, "grad_norm": 0.04219207167625427, "learning_rate": 1.1746766955030931e-06, "loss": 0.0108, "step": 189380 }, { "epoch": 1.5992062654366594, "grad_norm": 0.4020281136035919, "learning_rate": 1.1742022194195928e-06, "loss": 0.0056, "step": 189390 }, { "epoch": 1.5992907052838234, "grad_norm": 0.08628573268651962, "learning_rate": 1.1737278264316366e-06, "loss": 0.0059, "step": 189400 }, { "epoch": 1.5993751451309874, "grad_norm": 0.09902813285589218, "learning_rate": 1.1732535165495258e-06, "loss": 0.0048, "step": 189410 }, { "epoch": 1.5994595849781512, "grad_norm": 0.38036081194877625, "learning_rate": 1.1727792897835666e-06, "loss": 0.0058, "step": 189420 }, { "epoch": 1.599544024825315, "grad_norm": 0.10091693699359894, "learning_rate": 1.172305146144055e-06, "loss": 0.006, "step": 189430 }, { "epoch": 1.599628464672479, "grad_norm": 0.19458773732185364, "learning_rate": 1.1718310856412928e-06, "loss": 0.0082, "step": 189440 }, { "epoch": 1.5997129045196428, "grad_norm": 0.28213149309158325, "learning_rate": 1.1713571082855735e-06, "loss": 0.006, "step": 189450 }, { "epoch": 1.5997973443668068, "grad_norm": 0.33358126878738403, "learning_rate": 1.170883214087195e-06, "loss": 0.0087, "step": 189460 }, { "epoch": 1.5998817842139705, "grad_norm": 0.16892535984516144, "learning_rate": 1.1704094030564477e-06, "loss": 0.0068, "step": 189470 }, { "epoch": 1.5999662240611343, "grad_norm": 0.23223158717155457, "learning_rate": 1.1699356752036245e-06, "loss": 0.005, "step": 189480 }, { "epoch": 1.6000506639082983, "grad_norm": 0.19200100004673004, "learning_rate": 1.1694620305390114e-06, "loss": 0.0045, "step": 189490 }, { "epoch": 1.6001351037554623, "grad_norm": 0.7814301252365112, "learning_rate": 1.1689884690728998e-06, "loss": 0.0062, "step": 189500 }, { "epoch": 1.600219543602626, "grad_norm": 0.3494321405887604, "learning_rate": 1.168514990815573e-06, "loss": 0.0097, "step": 189510 }, { "epoch": 1.6003039834497899, "grad_norm": 0.05583233758807182, "learning_rate": 1.1680415957773144e-06, "loss": 0.0028, "step": 189520 }, { "epoch": 1.6003884232969539, "grad_norm": 0.23887303471565247, "learning_rate": 1.1675682839684088e-06, "loss": 0.0055, "step": 189530 }, { "epoch": 1.6004728631441179, "grad_norm": 0.2826005816459656, "learning_rate": 1.1670950553991333e-06, "loss": 0.0067, "step": 189540 }, { "epoch": 1.6005573029912816, "grad_norm": 0.1378525346517563, "learning_rate": 1.1666219100797687e-06, "loss": 0.0084, "step": 189550 }, { "epoch": 1.6006417428384454, "grad_norm": 0.36546918749809265, "learning_rate": 1.1661488480205913e-06, "loss": 0.004, "step": 189560 }, { "epoch": 1.6007261826856092, "grad_norm": 0.16323436796665192, "learning_rate": 1.1656758692318748e-06, "loss": 0.0018, "step": 189570 }, { "epoch": 1.6008106225327732, "grad_norm": 0.34526923298835754, "learning_rate": 1.1652029737238913e-06, "loss": 0.0076, "step": 189580 }, { "epoch": 1.6008950623799372, "grad_norm": 0.7745584845542908, "learning_rate": 1.164730161506915e-06, "loss": 0.0067, "step": 189590 }, { "epoch": 1.600979502227101, "grad_norm": 0.12682290375232697, "learning_rate": 1.164257432591212e-06, "loss": 0.0076, "step": 189600 }, { "epoch": 1.6010639420742647, "grad_norm": 0.06506810337305069, "learning_rate": 1.1637847869870533e-06, "loss": 0.0155, "step": 189610 }, { "epoch": 1.6011483819214287, "grad_norm": 0.2666689157485962, "learning_rate": 1.1633122247047029e-06, "loss": 0.0112, "step": 189620 }, { "epoch": 1.6012328217685927, "grad_norm": 0.09198915213346481, "learning_rate": 1.1628397457544232e-06, "loss": 0.0079, "step": 189630 }, { "epoch": 1.6013172616157565, "grad_norm": 0.27387961745262146, "learning_rate": 1.1623673501464806e-06, "loss": 0.0038, "step": 189640 }, { "epoch": 1.6014017014629203, "grad_norm": 0.2659698724746704, "learning_rate": 1.1618950378911303e-06, "loss": 0.0036, "step": 189650 }, { "epoch": 1.6014861413100843, "grad_norm": 0.49441850185394287, "learning_rate": 1.161422808998634e-06, "loss": 0.0035, "step": 189660 }, { "epoch": 1.6015705811572483, "grad_norm": 0.09045042842626572, "learning_rate": 1.1609506634792473e-06, "loss": 0.0064, "step": 189670 }, { "epoch": 1.601655021004412, "grad_norm": 0.5417681932449341, "learning_rate": 1.1604786013432261e-06, "loss": 0.0045, "step": 189680 }, { "epoch": 1.6017394608515758, "grad_norm": 0.2701738774776459, "learning_rate": 1.1600066226008216e-06, "loss": 0.0103, "step": 189690 }, { "epoch": 1.6018239006987396, "grad_norm": 0.17072781920433044, "learning_rate": 1.1595347272622881e-06, "loss": 0.0046, "step": 189700 }, { "epoch": 1.6019083405459036, "grad_norm": 0.03321462497115135, "learning_rate": 1.159062915337873e-06, "loss": 0.0076, "step": 189710 }, { "epoch": 1.6019927803930676, "grad_norm": 0.2201891988515854, "learning_rate": 1.1585911868378247e-06, "loss": 0.0063, "step": 189720 }, { "epoch": 1.6020772202402314, "grad_norm": 0.17987863719463348, "learning_rate": 1.1581195417723867e-06, "loss": 0.0069, "step": 189730 }, { "epoch": 1.6021616600873951, "grad_norm": 0.45636340975761414, "learning_rate": 1.1576479801518064e-06, "loss": 0.0087, "step": 189740 }, { "epoch": 1.6022460999345591, "grad_norm": 0.21956929564476013, "learning_rate": 1.157176501986325e-06, "loss": 0.0046, "step": 189750 }, { "epoch": 1.6023305397817231, "grad_norm": 0.05783912539482117, "learning_rate": 1.1567051072861812e-06, "loss": 0.0057, "step": 189760 }, { "epoch": 1.602414979628887, "grad_norm": 0.1989659070968628, "learning_rate": 1.156233796061616e-06, "loss": 0.0074, "step": 189770 }, { "epoch": 1.6024994194760507, "grad_norm": 0.3254787027835846, "learning_rate": 1.1557625683228635e-06, "loss": 0.0111, "step": 189780 }, { "epoch": 1.6025838593232147, "grad_norm": 0.16517901420593262, "learning_rate": 1.1552914240801621e-06, "loss": 0.0059, "step": 189790 }, { "epoch": 1.6026682991703785, "grad_norm": 0.4415128529071808, "learning_rate": 1.1548203633437427e-06, "loss": 0.0075, "step": 189800 }, { "epoch": 1.6027527390175424, "grad_norm": 0.15469102561473846, "learning_rate": 1.1543493861238376e-06, "loss": 0.005, "step": 189810 }, { "epoch": 1.6028371788647062, "grad_norm": 0.015622753649950027, "learning_rate": 1.1538784924306739e-06, "loss": 0.0064, "step": 189820 }, { "epoch": 1.60292161871187, "grad_norm": 0.014635181054472923, "learning_rate": 1.1534076822744827e-06, "loss": 0.0037, "step": 189830 }, { "epoch": 1.603006058559034, "grad_norm": 0.2759559452533722, "learning_rate": 1.1529369556654868e-06, "loss": 0.0055, "step": 189840 }, { "epoch": 1.603090498406198, "grad_norm": 0.2924475073814392, "learning_rate": 1.152466312613914e-06, "loss": 0.0074, "step": 189850 }, { "epoch": 1.6031749382533618, "grad_norm": 0.29666072130203247, "learning_rate": 1.1519957531299837e-06, "loss": 0.0048, "step": 189860 }, { "epoch": 1.6032593781005255, "grad_norm": 0.047017958015203476, "learning_rate": 1.1515252772239161e-06, "loss": 0.0066, "step": 189870 }, { "epoch": 1.6033438179476895, "grad_norm": 0.2801222503185272, "learning_rate": 1.1510548849059321e-06, "loss": 0.0069, "step": 189880 }, { "epoch": 1.6034282577948535, "grad_norm": 0.6923865675926208, "learning_rate": 1.1505845761862473e-06, "loss": 0.0098, "step": 189890 }, { "epoch": 1.6035126976420173, "grad_norm": 0.19864162802696228, "learning_rate": 1.1501143510750768e-06, "loss": 0.0072, "step": 189900 }, { "epoch": 1.603597137489181, "grad_norm": 0.39318567514419556, "learning_rate": 1.1496442095826322e-06, "loss": 0.0077, "step": 189910 }, { "epoch": 1.6036815773363449, "grad_norm": 0.020720267668366432, "learning_rate": 1.1491741517191279e-06, "loss": 0.0039, "step": 189920 }, { "epoch": 1.6037660171835089, "grad_norm": 0.1492035686969757, "learning_rate": 1.1487041774947699e-06, "loss": 0.0076, "step": 189930 }, { "epoch": 1.6038504570306729, "grad_norm": 0.32489824295043945, "learning_rate": 1.14823428691977e-06, "loss": 0.0053, "step": 189940 }, { "epoch": 1.6039348968778366, "grad_norm": 0.04752218723297119, "learning_rate": 1.1477644800043308e-06, "loss": 0.0048, "step": 189950 }, { "epoch": 1.6040193367250004, "grad_norm": 0.3468307554721832, "learning_rate": 1.1472947567586601e-06, "loss": 0.0068, "step": 189960 }, { "epoch": 1.6041037765721644, "grad_norm": 0.08770204335451126, "learning_rate": 1.1468251171929561e-06, "loss": 0.009, "step": 189970 }, { "epoch": 1.6041882164193284, "grad_norm": 0.48315516114234924, "learning_rate": 1.1463555613174205e-06, "loss": 0.0068, "step": 189980 }, { "epoch": 1.6042726562664922, "grad_norm": 0.19143587350845337, "learning_rate": 1.145886089142253e-06, "loss": 0.0044, "step": 189990 }, { "epoch": 1.604357096113656, "grad_norm": 0.1960872858762741, "learning_rate": 1.145416700677649e-06, "loss": 0.0083, "step": 190000 }, { "epoch": 1.60444153596082, "grad_norm": 0.05750589817762375, "learning_rate": 1.1449473959338052e-06, "loss": 0.0059, "step": 190010 }, { "epoch": 1.604525975807984, "grad_norm": 0.24387215077877045, "learning_rate": 1.1444781749209127e-06, "loss": 0.0073, "step": 190020 }, { "epoch": 1.6046104156551477, "grad_norm": 0.10641072690486908, "learning_rate": 1.1440090376491659e-06, "loss": 0.012, "step": 190030 }, { "epoch": 1.6046948555023115, "grad_norm": 0.0609968900680542, "learning_rate": 1.1435399841287525e-06, "loss": 0.0048, "step": 190040 }, { "epoch": 1.6047792953494753, "grad_norm": 0.14309847354888916, "learning_rate": 1.1430710143698603e-06, "loss": 0.0063, "step": 190050 }, { "epoch": 1.6048637351966393, "grad_norm": 0.3885805010795593, "learning_rate": 1.1426021283826744e-06, "loss": 0.0075, "step": 190060 }, { "epoch": 1.6049481750438033, "grad_norm": 0.21219907701015472, "learning_rate": 1.142133326177381e-06, "loss": 0.008, "step": 190070 }, { "epoch": 1.605032614890967, "grad_norm": 0.3720433712005615, "learning_rate": 1.141664607764159e-06, "loss": 0.0094, "step": 190080 }, { "epoch": 1.6051170547381308, "grad_norm": 0.17732448875904083, "learning_rate": 1.1411959731531936e-06, "loss": 0.0123, "step": 190090 }, { "epoch": 1.6052014945852948, "grad_norm": 0.37246522307395935, "learning_rate": 1.1407274223546604e-06, "loss": 0.0082, "step": 190100 }, { "epoch": 1.6052859344324588, "grad_norm": 0.12681640684604645, "learning_rate": 1.1402589553787353e-06, "loss": 0.0044, "step": 190110 }, { "epoch": 1.6053703742796226, "grad_norm": 0.3182523250579834, "learning_rate": 1.1397905722355963e-06, "loss": 0.0067, "step": 190120 }, { "epoch": 1.6054548141267864, "grad_norm": 0.10997924208641052, "learning_rate": 1.1393222729354149e-06, "loss": 0.0068, "step": 190130 }, { "epoch": 1.6055392539739501, "grad_norm": 0.6454545855522156, "learning_rate": 1.1388540574883627e-06, "loss": 0.0108, "step": 190140 }, { "epoch": 1.6056236938211141, "grad_norm": 0.8180926442146301, "learning_rate": 1.1383859259046075e-06, "loss": 0.0057, "step": 190150 }, { "epoch": 1.6057081336682781, "grad_norm": 0.546021044254303, "learning_rate": 1.1379178781943202e-06, "loss": 0.0099, "step": 190160 }, { "epoch": 1.605792573515442, "grad_norm": 0.3176169991493225, "learning_rate": 1.1374499143676637e-06, "loss": 0.0092, "step": 190170 }, { "epoch": 1.6058770133626057, "grad_norm": 0.24991761147975922, "learning_rate": 1.1369820344348054e-06, "loss": 0.0065, "step": 190180 }, { "epoch": 1.6059614532097697, "grad_norm": 0.3854067623615265, "learning_rate": 1.1365142384059041e-06, "loss": 0.0051, "step": 190190 }, { "epoch": 1.6060458930569337, "grad_norm": 0.3657802641391754, "learning_rate": 1.136046526291123e-06, "loss": 0.0088, "step": 190200 }, { "epoch": 1.6061303329040975, "grad_norm": 0.00023843790404498577, "learning_rate": 1.1355788981006193e-06, "loss": 0.003, "step": 190210 }, { "epoch": 1.6062147727512612, "grad_norm": 0.19510744512081146, "learning_rate": 1.1351113538445507e-06, "loss": 0.0034, "step": 190220 }, { "epoch": 1.6062992125984252, "grad_norm": 0.5770436525344849, "learning_rate": 1.134643893533071e-06, "loss": 0.0086, "step": 190230 }, { "epoch": 1.6063836524455892, "grad_norm": 0.17974264919757843, "learning_rate": 1.1341765171763331e-06, "loss": 0.0058, "step": 190240 }, { "epoch": 1.606468092292753, "grad_norm": 0.14563390612602234, "learning_rate": 1.13370922478449e-06, "loss": 0.0061, "step": 190250 }, { "epoch": 1.6065525321399168, "grad_norm": 0.4656539261341095, "learning_rate": 1.133242016367689e-06, "loss": 0.0041, "step": 190260 }, { "epoch": 1.6066369719870806, "grad_norm": 0.29712653160095215, "learning_rate": 1.1327748919360799e-06, "loss": 0.0081, "step": 190270 }, { "epoch": 1.6067214118342446, "grad_norm": 0.004520895890891552, "learning_rate": 1.1323078514998082e-06, "loss": 0.0078, "step": 190280 }, { "epoch": 1.6068058516814085, "grad_norm": 0.38387295603752136, "learning_rate": 1.1318408950690174e-06, "loss": 0.0077, "step": 190290 }, { "epoch": 1.6068902915285723, "grad_norm": 0.3030376434326172, "learning_rate": 1.1313740226538483e-06, "loss": 0.0089, "step": 190300 }, { "epoch": 1.606974731375736, "grad_norm": 0.25189170241355896, "learning_rate": 1.130907234264444e-06, "loss": 0.0051, "step": 190310 }, { "epoch": 1.6070591712229, "grad_norm": 0.5875717997550964, "learning_rate": 1.13044052991094e-06, "loss": 0.0049, "step": 190320 }, { "epoch": 1.607143611070064, "grad_norm": 0.17948396503925323, "learning_rate": 1.1299739096034768e-06, "loss": 0.0052, "step": 190330 }, { "epoch": 1.6072280509172279, "grad_norm": 0.17994938790798187, "learning_rate": 1.1295073733521867e-06, "loss": 0.0103, "step": 190340 }, { "epoch": 1.6073124907643916, "grad_norm": 0.29305991530418396, "learning_rate": 1.1290409211672022e-06, "loss": 0.0064, "step": 190350 }, { "epoch": 1.6073969306115556, "grad_norm": 0.0351225920021534, "learning_rate": 1.1285745530586566e-06, "loss": 0.0093, "step": 190360 }, { "epoch": 1.6074813704587194, "grad_norm": 0.2152414172887802, "learning_rate": 1.128108269036679e-06, "loss": 0.005, "step": 190370 }, { "epoch": 1.6075658103058834, "grad_norm": 0.15098783373832703, "learning_rate": 1.1276420691113959e-06, "loss": 0.005, "step": 190380 }, { "epoch": 1.6076502501530472, "grad_norm": 0.14870844781398773, "learning_rate": 1.1271759532929323e-06, "loss": 0.0067, "step": 190390 }, { "epoch": 1.607734690000211, "grad_norm": 0.06261475384235382, "learning_rate": 1.1267099215914145e-06, "loss": 0.0047, "step": 190400 }, { "epoch": 1.607819129847375, "grad_norm": 0.3488151431083679, "learning_rate": 1.1262439740169629e-06, "loss": 0.0081, "step": 190410 }, { "epoch": 1.607903569694539, "grad_norm": 0.13849999010562897, "learning_rate": 1.1257781105796989e-06, "loss": 0.0033, "step": 190420 }, { "epoch": 1.6079880095417027, "grad_norm": 0.10832727700471878, "learning_rate": 1.1253123312897397e-06, "loss": 0.0064, "step": 190430 }, { "epoch": 1.6080724493888665, "grad_norm": 0.05093005672097206, "learning_rate": 1.1248466361572036e-06, "loss": 0.0033, "step": 190440 }, { "epoch": 1.6081568892360305, "grad_norm": 0.2764555513858795, "learning_rate": 1.124381025192205e-06, "loss": 0.004, "step": 190450 }, { "epoch": 1.6082413290831945, "grad_norm": 0.29018813371658325, "learning_rate": 1.1239154984048555e-06, "loss": 0.0055, "step": 190460 }, { "epoch": 1.6083257689303583, "grad_norm": 0.29618197679519653, "learning_rate": 1.1234500558052673e-06, "loss": 0.0082, "step": 190470 }, { "epoch": 1.608410208777522, "grad_norm": 0.11369218677282333, "learning_rate": 1.1229846974035485e-06, "loss": 0.0083, "step": 190480 }, { "epoch": 1.6084946486246858, "grad_norm": 0.12657906115055084, "learning_rate": 1.122519423209809e-06, "loss": 0.0043, "step": 190490 }, { "epoch": 1.6085790884718498, "grad_norm": 0.34309056401252747, "learning_rate": 1.1220542332341516e-06, "loss": 0.0063, "step": 190500 }, { "epoch": 1.6086635283190138, "grad_norm": 0.11200866103172302, "learning_rate": 1.121589127486683e-06, "loss": 0.0062, "step": 190510 }, { "epoch": 1.6087479681661776, "grad_norm": 0.12506413459777832, "learning_rate": 1.1211241059775025e-06, "loss": 0.0048, "step": 190520 }, { "epoch": 1.6088324080133414, "grad_norm": 0.2703981101512909, "learning_rate": 1.1206591687167123e-06, "loss": 0.0054, "step": 190530 }, { "epoch": 1.6089168478605054, "grad_norm": 0.007422252558171749, "learning_rate": 1.1201943157144108e-06, "loss": 0.004, "step": 190540 }, { "epoch": 1.6090012877076694, "grad_norm": 0.12161222845315933, "learning_rate": 1.1197295469806936e-06, "loss": 0.0096, "step": 190550 }, { "epoch": 1.6090857275548331, "grad_norm": 0.5437761545181274, "learning_rate": 1.1192648625256537e-06, "loss": 0.004, "step": 190560 }, { "epoch": 1.609170167401997, "grad_norm": 0.08313602954149246, "learning_rate": 1.1188002623593875e-06, "loss": 0.0075, "step": 190570 }, { "epoch": 1.609254607249161, "grad_norm": 0.10242889821529388, "learning_rate": 1.1183357464919837e-06, "loss": 0.0041, "step": 190580 }, { "epoch": 1.609339047096325, "grad_norm": 0.15730014443397522, "learning_rate": 1.1178713149335314e-06, "loss": 0.0047, "step": 190590 }, { "epoch": 1.6094234869434887, "grad_norm": 0.15679612755775452, "learning_rate": 1.1174069676941196e-06, "loss": 0.0087, "step": 190600 }, { "epoch": 1.6095079267906525, "grad_norm": 0.2515481114387512, "learning_rate": 1.1169427047838328e-06, "loss": 0.0058, "step": 190610 }, { "epoch": 1.6095923666378162, "grad_norm": 0.2015775889158249, "learning_rate": 1.1164785262127548e-06, "loss": 0.005, "step": 190620 }, { "epoch": 1.6096768064849802, "grad_norm": 0.028058791533112526, "learning_rate": 1.116014431990966e-06, "loss": 0.0032, "step": 190630 }, { "epoch": 1.6097612463321442, "grad_norm": 0.06449735909700394, "learning_rate": 1.1155504221285496e-06, "loss": 0.0077, "step": 190640 }, { "epoch": 1.609845686179308, "grad_norm": 0.16488690674304962, "learning_rate": 1.1150864966355801e-06, "loss": 0.0072, "step": 190650 }, { "epoch": 1.6099301260264718, "grad_norm": 0.3019331991672516, "learning_rate": 1.1146226555221374e-06, "loss": 0.0087, "step": 190660 }, { "epoch": 1.6100145658736358, "grad_norm": 0.011592504568397999, "learning_rate": 1.1141588987982932e-06, "loss": 0.0082, "step": 190670 }, { "epoch": 1.6100990057207998, "grad_norm": 0.22729012370109558, "learning_rate": 1.1136952264741225e-06, "loss": 0.0028, "step": 190680 }, { "epoch": 1.6101834455679636, "grad_norm": 0.5957784056663513, "learning_rate": 1.1132316385596952e-06, "loss": 0.0059, "step": 190690 }, { "epoch": 1.6102678854151273, "grad_norm": 0.22382470965385437, "learning_rate": 1.1127681350650804e-06, "loss": 0.0043, "step": 190700 }, { "epoch": 1.6103523252622913, "grad_norm": 0.23276984691619873, "learning_rate": 1.1123047160003453e-06, "loss": 0.0055, "step": 190710 }, { "epoch": 1.610436765109455, "grad_norm": 0.4762827157974243, "learning_rate": 1.1118413813755536e-06, "loss": 0.0095, "step": 190720 }, { "epoch": 1.610521204956619, "grad_norm": 0.16145440936088562, "learning_rate": 1.111378131200771e-06, "loss": 0.0036, "step": 190730 }, { "epoch": 1.6106056448037829, "grad_norm": 0.3420839309692383, "learning_rate": 1.110914965486058e-06, "loss": 0.0062, "step": 190740 }, { "epoch": 1.6106900846509467, "grad_norm": 0.2913864254951477, "learning_rate": 1.1104518842414763e-06, "loss": 0.007, "step": 190750 }, { "epoch": 1.6107745244981106, "grad_norm": 0.22517001628875732, "learning_rate": 1.109988887477081e-06, "loss": 0.0045, "step": 190760 }, { "epoch": 1.6108589643452746, "grad_norm": 0.4784412384033203, "learning_rate": 1.1095259752029313e-06, "loss": 0.0049, "step": 190770 }, { "epoch": 1.6109434041924384, "grad_norm": 0.18671134114265442, "learning_rate": 1.1090631474290804e-06, "loss": 0.0059, "step": 190780 }, { "epoch": 1.6110278440396022, "grad_norm": 0.011314565315842628, "learning_rate": 1.1086004041655802e-06, "loss": 0.0051, "step": 190790 }, { "epoch": 1.6111122838867662, "grad_norm": 0.3187698721885681, "learning_rate": 1.1081377454224823e-06, "loss": 0.0066, "step": 190800 }, { "epoch": 1.6111967237339302, "grad_norm": 0.1758573204278946, "learning_rate": 1.1076751712098332e-06, "loss": 0.0086, "step": 190810 }, { "epoch": 1.611281163581094, "grad_norm": 0.004313807934522629, "learning_rate": 1.1072126815376837e-06, "loss": 0.0083, "step": 190820 }, { "epoch": 1.6113656034282577, "grad_norm": 0.5154687762260437, "learning_rate": 1.1067502764160753e-06, "loss": 0.0082, "step": 190830 }, { "epoch": 1.6114500432754215, "grad_norm": 0.16919483244419098, "learning_rate": 1.1062879558550543e-06, "loss": 0.0036, "step": 190840 }, { "epoch": 1.6115344831225855, "grad_norm": 0.16248063743114471, "learning_rate": 1.10582571986466e-06, "loss": 0.0046, "step": 190850 }, { "epoch": 1.6116189229697495, "grad_norm": 0.2765078544616699, "learning_rate": 1.1053635684549357e-06, "loss": 0.0031, "step": 190860 }, { "epoch": 1.6117033628169133, "grad_norm": 0.2798464596271515, "learning_rate": 1.1049015016359138e-06, "loss": 0.0073, "step": 190870 }, { "epoch": 1.611787802664077, "grad_norm": 0.07901827991008759, "learning_rate": 1.1044395194176343e-06, "loss": 0.002, "step": 190880 }, { "epoch": 1.611872242511241, "grad_norm": 0.0018068784847855568, "learning_rate": 1.1039776218101294e-06, "loss": 0.0039, "step": 190890 }, { "epoch": 1.611956682358405, "grad_norm": 0.3799300193786621, "learning_rate": 1.1035158088234332e-06, "loss": 0.0058, "step": 190900 }, { "epoch": 1.6120411222055688, "grad_norm": 0.16760721802711487, "learning_rate": 1.1030540804675738e-06, "loss": 0.0098, "step": 190910 }, { "epoch": 1.6121255620527326, "grad_norm": 0.13287554681301117, "learning_rate": 1.1025924367525826e-06, "loss": 0.0048, "step": 190920 }, { "epoch": 1.6122100018998966, "grad_norm": 0.022116465494036674, "learning_rate": 1.1021308776884848e-06, "loss": 0.017, "step": 190930 }, { "epoch": 1.6122944417470606, "grad_norm": 0.05273522809147835, "learning_rate": 1.101669403285306e-06, "loss": 0.0059, "step": 190940 }, { "epoch": 1.6123788815942244, "grad_norm": 0.265926718711853, "learning_rate": 1.1012080135530684e-06, "loss": 0.0036, "step": 190950 }, { "epoch": 1.6124633214413882, "grad_norm": 0.0468108206987381, "learning_rate": 1.1007467085017926e-06, "loss": 0.0061, "step": 190960 }, { "epoch": 1.612547761288552, "grad_norm": 0.07688463479280472, "learning_rate": 1.1002854881415008e-06, "loss": 0.0046, "step": 190970 }, { "epoch": 1.612632201135716, "grad_norm": 0.039138417690992355, "learning_rate": 1.0998243524822077e-06, "loss": 0.0038, "step": 190980 }, { "epoch": 1.61271664098288, "grad_norm": 0.25040149688720703, "learning_rate": 1.0993633015339316e-06, "loss": 0.006, "step": 190990 }, { "epoch": 1.6128010808300437, "grad_norm": 0.28701552748680115, "learning_rate": 1.098902335306684e-06, "loss": 0.0101, "step": 191000 }, { "epoch": 1.6128855206772075, "grad_norm": 0.26242223381996155, "learning_rate": 1.09844145381048e-06, "loss": 0.0075, "step": 191010 }, { "epoch": 1.6129699605243715, "grad_norm": 0.6884591579437256, "learning_rate": 1.0979806570553275e-06, "loss": 0.0171, "step": 191020 }, { "epoch": 1.6130544003715355, "grad_norm": 0.380485475063324, "learning_rate": 1.0975199450512359e-06, "loss": 0.0105, "step": 191030 }, { "epoch": 1.6131388402186992, "grad_norm": 0.29658785462379456, "learning_rate": 1.0970593178082113e-06, "loss": 0.0056, "step": 191040 }, { "epoch": 1.613223280065863, "grad_norm": 0.21060559153556824, "learning_rate": 1.0965987753362572e-06, "loss": 0.0036, "step": 191050 }, { "epoch": 1.6133077199130268, "grad_norm": 0.12128826975822449, "learning_rate": 1.0961383176453793e-06, "loss": 0.0054, "step": 191060 }, { "epoch": 1.6133921597601908, "grad_norm": 0.379891037940979, "learning_rate": 1.0956779447455757e-06, "loss": 0.0046, "step": 191070 }, { "epoch": 1.6134765996073548, "grad_norm": 0.2427605241537094, "learning_rate": 1.0952176566468481e-06, "loss": 0.0078, "step": 191080 }, { "epoch": 1.6135610394545186, "grad_norm": 0.12982068955898285, "learning_rate": 1.094757453359192e-06, "loss": 0.0086, "step": 191090 }, { "epoch": 1.6136454793016823, "grad_norm": 0.18757852911949158, "learning_rate": 1.0942973348926051e-06, "loss": 0.0083, "step": 191100 }, { "epoch": 1.6137299191488463, "grad_norm": 0.08008988946676254, "learning_rate": 1.093837301257079e-06, "loss": 0.0052, "step": 191110 }, { "epoch": 1.6138143589960103, "grad_norm": 0.23542535305023193, "learning_rate": 1.0933773524626068e-06, "loss": 0.0061, "step": 191120 }, { "epoch": 1.613898798843174, "grad_norm": 0.2639104127883911, "learning_rate": 1.0929174885191768e-06, "loss": 0.0081, "step": 191130 }, { "epoch": 1.6139832386903379, "grad_norm": 0.3537781834602356, "learning_rate": 1.0924577094367795e-06, "loss": 0.0081, "step": 191140 }, { "epoch": 1.6140676785375019, "grad_norm": 0.06431154161691666, "learning_rate": 1.0919980152253984e-06, "loss": 0.0077, "step": 191150 }, { "epoch": 1.6141521183846659, "grad_norm": 0.15410274267196655, "learning_rate": 1.0915384058950213e-06, "loss": 0.0093, "step": 191160 }, { "epoch": 1.6142365582318297, "grad_norm": 0.07484526932239532, "learning_rate": 1.091078881455629e-06, "loss": 0.01, "step": 191170 }, { "epoch": 1.6143209980789934, "grad_norm": 0.3860228955745697, "learning_rate": 1.0906194419172023e-06, "loss": 0.0145, "step": 191180 }, { "epoch": 1.6144054379261572, "grad_norm": 0.5720934271812439, "learning_rate": 1.0901600872897205e-06, "loss": 0.0076, "step": 191190 }, { "epoch": 1.6144898777733212, "grad_norm": 0.11424040794372559, "learning_rate": 1.0897008175831587e-06, "loss": 0.0063, "step": 191200 }, { "epoch": 1.6145743176204852, "grad_norm": 0.00728036230430007, "learning_rate": 1.089241632807495e-06, "loss": 0.0063, "step": 191210 }, { "epoch": 1.614658757467649, "grad_norm": 0.15838676691055298, "learning_rate": 1.0887825329727009e-06, "loss": 0.0073, "step": 191220 }, { "epoch": 1.6147431973148128, "grad_norm": 0.17025743424892426, "learning_rate": 1.0883235180887498e-06, "loss": 0.0044, "step": 191230 }, { "epoch": 1.6148276371619767, "grad_norm": 0.2990196645259857, "learning_rate": 1.0878645881656086e-06, "loss": 0.0127, "step": 191240 }, { "epoch": 1.6149120770091407, "grad_norm": 0.4675845205783844, "learning_rate": 1.087405743213249e-06, "loss": 0.0065, "step": 191250 }, { "epoch": 1.6149965168563045, "grad_norm": 0.3991960883140564, "learning_rate": 1.0869469832416341e-06, "loss": 0.0186, "step": 191260 }, { "epoch": 1.6150809567034683, "grad_norm": 0.5004103779792786, "learning_rate": 1.086488308260729e-06, "loss": 0.0092, "step": 191270 }, { "epoch": 1.6151653965506323, "grad_norm": 0.18667535483837128, "learning_rate": 1.0860297182804963e-06, "loss": 0.0116, "step": 191280 }, { "epoch": 1.615249836397796, "grad_norm": 0.3267991542816162, "learning_rate": 1.0855712133108949e-06, "loss": 0.0037, "step": 191290 }, { "epoch": 1.61533427624496, "grad_norm": 0.16426406800746918, "learning_rate": 1.085112793361885e-06, "loss": 0.0061, "step": 191300 }, { "epoch": 1.6154187160921238, "grad_norm": 0.16862647235393524, "learning_rate": 1.084654458443422e-06, "loss": 0.0062, "step": 191310 }, { "epoch": 1.6155031559392876, "grad_norm": 0.15774115920066833, "learning_rate": 1.0841962085654639e-06, "loss": 0.0056, "step": 191320 }, { "epoch": 1.6155875957864516, "grad_norm": 0.06707539409399033, "learning_rate": 1.0837380437379596e-06, "loss": 0.003, "step": 191330 }, { "epoch": 1.6156720356336156, "grad_norm": 0.1866828352212906, "learning_rate": 1.083279963970864e-06, "loss": 0.0032, "step": 191340 }, { "epoch": 1.6157564754807794, "grad_norm": 0.18138229846954346, "learning_rate": 1.0828219692741255e-06, "loss": 0.0054, "step": 191350 }, { "epoch": 1.6158409153279432, "grad_norm": 0.24604350328445435, "learning_rate": 1.0823640596576901e-06, "loss": 0.0058, "step": 191360 }, { "epoch": 1.6159253551751072, "grad_norm": 0.216843843460083, "learning_rate": 1.081906235131504e-06, "loss": 0.0053, "step": 191370 }, { "epoch": 1.6160097950222712, "grad_norm": 0.2523668706417084, "learning_rate": 1.0814484957055128e-06, "loss": 0.0038, "step": 191380 }, { "epoch": 1.616094234869435, "grad_norm": 0.250916451215744, "learning_rate": 1.080990841389657e-06, "loss": 0.0069, "step": 191390 }, { "epoch": 1.6161786747165987, "grad_norm": 0.2192348688840866, "learning_rate": 1.080533272193876e-06, "loss": 0.0029, "step": 191400 }, { "epoch": 1.6162631145637625, "grad_norm": 0.42196452617645264, "learning_rate": 1.0800757881281104e-06, "loss": 0.007, "step": 191410 }, { "epoch": 1.6163475544109265, "grad_norm": 0.13844642043113708, "learning_rate": 1.0796183892022938e-06, "loss": 0.005, "step": 191420 }, { "epoch": 1.6164319942580905, "grad_norm": 0.21202100813388824, "learning_rate": 1.0791610754263637e-06, "loss": 0.0035, "step": 191430 }, { "epoch": 1.6165164341052543, "grad_norm": 0.1370166540145874, "learning_rate": 1.0787038468102524e-06, "loss": 0.007, "step": 191440 }, { "epoch": 1.616600873952418, "grad_norm": 0.105320505797863, "learning_rate": 1.0782467033638894e-06, "loss": 0.008, "step": 191450 }, { "epoch": 1.616685313799582, "grad_norm": 0.14986300468444824, "learning_rate": 1.077789645097203e-06, "loss": 0.0045, "step": 191460 }, { "epoch": 1.616769753646746, "grad_norm": 0.1591719686985016, "learning_rate": 1.0773326720201234e-06, "loss": 0.0103, "step": 191470 }, { "epoch": 1.6168541934939098, "grad_norm": 0.005404339171946049, "learning_rate": 1.0768757841425726e-06, "loss": 0.0039, "step": 191480 }, { "epoch": 1.6169386333410736, "grad_norm": 0.23693311214447021, "learning_rate": 1.0764189814744775e-06, "loss": 0.0106, "step": 191490 }, { "epoch": 1.6170230731882376, "grad_norm": 0.6507350206375122, "learning_rate": 1.0759622640257577e-06, "loss": 0.0063, "step": 191500 }, { "epoch": 1.6171075130354016, "grad_norm": 0.21505959331989288, "learning_rate": 1.075505631806334e-06, "loss": 0.0045, "step": 191510 }, { "epoch": 1.6171919528825653, "grad_norm": 0.30226027965545654, "learning_rate": 1.075049084826123e-06, "loss": 0.0064, "step": 191520 }, { "epoch": 1.6172763927297291, "grad_norm": 0.7792969346046448, "learning_rate": 1.074592623095041e-06, "loss": 0.009, "step": 191530 }, { "epoch": 1.617360832576893, "grad_norm": 0.23359818756580353, "learning_rate": 1.0741362466230037e-06, "loss": 0.0077, "step": 191540 }, { "epoch": 1.617445272424057, "grad_norm": 0.5583550930023193, "learning_rate": 1.0736799554199217e-06, "loss": 0.007, "step": 191550 }, { "epoch": 1.6175297122712209, "grad_norm": 0.3105085492134094, "learning_rate": 1.0732237494957076e-06, "loss": 0.0039, "step": 191560 }, { "epoch": 1.6176141521183847, "grad_norm": 0.09256479144096375, "learning_rate": 1.0727676288602672e-06, "loss": 0.0027, "step": 191570 }, { "epoch": 1.6176985919655484, "grad_norm": 0.43287336826324463, "learning_rate": 1.0723115935235112e-06, "loss": 0.0078, "step": 191580 }, { "epoch": 1.6177830318127124, "grad_norm": 0.603939414024353, "learning_rate": 1.0718556434953425e-06, "loss": 0.0076, "step": 191590 }, { "epoch": 1.6178674716598764, "grad_norm": 0.07000215351581573, "learning_rate": 1.071399778785664e-06, "loss": 0.0044, "step": 191600 }, { "epoch": 1.6179519115070402, "grad_norm": 0.20234587788581848, "learning_rate": 1.0709439994043757e-06, "loss": 0.0042, "step": 191610 }, { "epoch": 1.618036351354204, "grad_norm": 0.32989779114723206, "learning_rate": 1.0704883053613802e-06, "loss": 0.0058, "step": 191620 }, { "epoch": 1.618120791201368, "grad_norm": 0.01925607956945896, "learning_rate": 1.0700326966665736e-06, "loss": 0.0046, "step": 191630 }, { "epoch": 1.6182052310485318, "grad_norm": 0.33088287711143494, "learning_rate": 1.0695771733298499e-06, "loss": 0.0062, "step": 191640 }, { "epoch": 1.6182896708956958, "grad_norm": 0.26149433851242065, "learning_rate": 1.069121735361106e-06, "loss": 0.0081, "step": 191650 }, { "epoch": 1.6183741107428595, "grad_norm": 0.1343286633491516, "learning_rate": 1.0686663827702308e-06, "loss": 0.0084, "step": 191660 }, { "epoch": 1.6184585505900233, "grad_norm": 0.46880367398262024, "learning_rate": 1.0682111155671182e-06, "loss": 0.0117, "step": 191670 }, { "epoch": 1.6185429904371873, "grad_norm": 0.40268975496292114, "learning_rate": 1.0677559337616545e-06, "loss": 0.0047, "step": 191680 }, { "epoch": 1.6186274302843513, "grad_norm": 0.1294224113225937, "learning_rate": 1.0673008373637256e-06, "loss": 0.0047, "step": 191690 }, { "epoch": 1.618711870131515, "grad_norm": 0.3111096918582916, "learning_rate": 1.0668458263832154e-06, "loss": 0.0054, "step": 191700 }, { "epoch": 1.6187963099786788, "grad_norm": 0.2610957622528076, "learning_rate": 1.0663909008300094e-06, "loss": 0.0046, "step": 191710 }, { "epoch": 1.6188807498258428, "grad_norm": 0.15880344808101654, "learning_rate": 1.0659360607139852e-06, "loss": 0.0035, "step": 191720 }, { "epoch": 1.6189651896730068, "grad_norm": 0.12207768857479095, "learning_rate": 1.0654813060450253e-06, "loss": 0.0039, "step": 191730 }, { "epoch": 1.6190496295201706, "grad_norm": 0.0793299674987793, "learning_rate": 1.0650266368330036e-06, "loss": 0.0052, "step": 191740 }, { "epoch": 1.6191340693673344, "grad_norm": 0.21065160632133484, "learning_rate": 1.0645720530877985e-06, "loss": 0.0066, "step": 191750 }, { "epoch": 1.6192185092144982, "grad_norm": 0.07064193487167358, "learning_rate": 1.0641175548192834e-06, "loss": 0.0057, "step": 191760 }, { "epoch": 1.6193029490616622, "grad_norm": 0.31819579005241394, "learning_rate": 1.0636631420373256e-06, "loss": 0.0045, "step": 191770 }, { "epoch": 1.6193873889088262, "grad_norm": 0.614737868309021, "learning_rate": 1.0632088147517994e-06, "loss": 0.015, "step": 191780 }, { "epoch": 1.61947182875599, "grad_norm": 0.24157752096652985, "learning_rate": 1.0627545729725691e-06, "loss": 0.0068, "step": 191790 }, { "epoch": 1.6195562686031537, "grad_norm": 0.34159761667251587, "learning_rate": 1.0623004167095047e-06, "loss": 0.0097, "step": 191800 }, { "epoch": 1.6196407084503177, "grad_norm": 0.17044971883296967, "learning_rate": 1.0618463459724665e-06, "loss": 0.0084, "step": 191810 }, { "epoch": 1.6197251482974817, "grad_norm": 0.02700403518974781, "learning_rate": 1.06139236077132e-06, "loss": 0.0026, "step": 191820 }, { "epoch": 1.6198095881446455, "grad_norm": 0.24885813891887665, "learning_rate": 1.0609384611159247e-06, "loss": 0.0063, "step": 191830 }, { "epoch": 1.6198940279918093, "grad_norm": 0.28326502442359924, "learning_rate": 1.0604846470161385e-06, "loss": 0.0035, "step": 191840 }, { "epoch": 1.6199784678389733, "grad_norm": 0.6665193438529968, "learning_rate": 1.0600309184818168e-06, "loss": 0.0074, "step": 191850 }, { "epoch": 1.6200629076861373, "grad_norm": 0.013254291377961636, "learning_rate": 1.0595772755228184e-06, "loss": 0.0078, "step": 191860 }, { "epoch": 1.620147347533301, "grad_norm": 0.4204820394515991, "learning_rate": 1.0591237181489937e-06, "loss": 0.0052, "step": 191870 }, { "epoch": 1.6202317873804648, "grad_norm": 0.1411123424768448, "learning_rate": 1.0586702463701926e-06, "loss": 0.0059, "step": 191880 }, { "epoch": 1.6203162272276286, "grad_norm": 0.04577278718352318, "learning_rate": 1.0582168601962678e-06, "loss": 0.0115, "step": 191890 }, { "epoch": 1.6204006670747926, "grad_norm": 0.12573347985744476, "learning_rate": 1.057763559637064e-06, "loss": 0.0048, "step": 191900 }, { "epoch": 1.6204851069219566, "grad_norm": 0.4460136294364929, "learning_rate": 1.0573103447024286e-06, "loss": 0.0061, "step": 191910 }, { "epoch": 1.6205695467691204, "grad_norm": 0.3572072982788086, "learning_rate": 1.056857215402205e-06, "loss": 0.0145, "step": 191920 }, { "epoch": 1.6206539866162841, "grad_norm": 0.03736574947834015, "learning_rate": 1.0564041717462343e-06, "loss": 0.0028, "step": 191930 }, { "epoch": 1.6207384264634481, "grad_norm": 0.12010926008224487, "learning_rate": 1.0559512137443557e-06, "loss": 0.0072, "step": 191940 }, { "epoch": 1.6208228663106121, "grad_norm": 0.32503339648246765, "learning_rate": 1.0554983414064097e-06, "loss": 0.0046, "step": 191950 }, { "epoch": 1.620907306157776, "grad_norm": 0.20962074398994446, "learning_rate": 1.0550455547422301e-06, "loss": 0.0079, "step": 191960 }, { "epoch": 1.6209917460049397, "grad_norm": 0.5738019943237305, "learning_rate": 1.054592853761654e-06, "loss": 0.0086, "step": 191970 }, { "epoch": 1.6210761858521034, "grad_norm": 0.21727019548416138, "learning_rate": 1.054140238474513e-06, "loss": 0.0036, "step": 191980 }, { "epoch": 1.6211606256992674, "grad_norm": 0.2291104644536972, "learning_rate": 1.0536877088906355e-06, "loss": 0.0066, "step": 191990 }, { "epoch": 1.6212450655464314, "grad_norm": 0.46028822660446167, "learning_rate": 1.053235265019854e-06, "loss": 0.006, "step": 192000 }, { "epoch": 1.6213295053935952, "grad_norm": 0.16286273300647736, "learning_rate": 1.052782906871994e-06, "loss": 0.0038, "step": 192010 }, { "epoch": 1.621413945240759, "grad_norm": 0.2205696403980255, "learning_rate": 1.05233063445688e-06, "loss": 0.0076, "step": 192020 }, { "epoch": 1.621498385087923, "grad_norm": 0.19943924248218536, "learning_rate": 1.0518784477843346e-06, "loss": 0.0086, "step": 192030 }, { "epoch": 1.621582824935087, "grad_norm": 0.4277246594429016, "learning_rate": 1.0514263468641816e-06, "loss": 0.0112, "step": 192040 }, { "epoch": 1.6216672647822508, "grad_norm": 0.1997816264629364, "learning_rate": 1.0509743317062376e-06, "loss": 0.0122, "step": 192050 }, { "epoch": 1.6217517046294145, "grad_norm": 0.2148008942604065, "learning_rate": 1.0505224023203236e-06, "loss": 0.0063, "step": 192060 }, { "epoch": 1.6218361444765785, "grad_norm": 0.23200353980064392, "learning_rate": 1.050070558716252e-06, "loss": 0.0083, "step": 192070 }, { "epoch": 1.6219205843237425, "grad_norm": 0.1017572283744812, "learning_rate": 1.049618800903841e-06, "loss": 0.005, "step": 192080 }, { "epoch": 1.6220050241709063, "grad_norm": 0.09619655460119247, "learning_rate": 1.049167128892898e-06, "loss": 0.0091, "step": 192090 }, { "epoch": 1.62208946401807, "grad_norm": 0.1510562300682068, "learning_rate": 1.048715542693236e-06, "loss": 0.0055, "step": 192100 }, { "epoch": 1.6221739038652339, "grad_norm": 0.07672778517007828, "learning_rate": 1.0482640423146634e-06, "loss": 0.0056, "step": 192110 }, { "epoch": 1.6222583437123979, "grad_norm": 0.38301387429237366, "learning_rate": 1.0478126277669842e-06, "loss": 0.0061, "step": 192120 }, { "epoch": 1.6223427835595619, "grad_norm": 0.3841884136199951, "learning_rate": 1.0473612990600062e-06, "loss": 0.0083, "step": 192130 }, { "epoch": 1.6224272234067256, "grad_norm": 0.3478841185569763, "learning_rate": 1.04691005620353e-06, "loss": 0.0059, "step": 192140 }, { "epoch": 1.6225116632538894, "grad_norm": 0.546803891658783, "learning_rate": 1.0464588992073583e-06, "loss": 0.0058, "step": 192150 }, { "epoch": 1.6225961031010534, "grad_norm": 0.019863490015268326, "learning_rate": 1.0460078280812892e-06, "loss": 0.0034, "step": 192160 }, { "epoch": 1.6226805429482174, "grad_norm": 0.13330812752246857, "learning_rate": 1.0455568428351198e-06, "loss": 0.011, "step": 192170 }, { "epoch": 1.6227649827953812, "grad_norm": 0.3152494728565216, "learning_rate": 1.0451059434786442e-06, "loss": 0.0064, "step": 192180 }, { "epoch": 1.622849422642545, "grad_norm": 0.2827269434928894, "learning_rate": 1.0446551300216578e-06, "loss": 0.008, "step": 192190 }, { "epoch": 1.622933862489709, "grad_norm": 0.013343647122383118, "learning_rate": 1.0442044024739507e-06, "loss": 0.006, "step": 192200 }, { "epoch": 1.6230183023368727, "grad_norm": 0.03306518867611885, "learning_rate": 1.0437537608453146e-06, "loss": 0.003, "step": 192210 }, { "epoch": 1.6231027421840367, "grad_norm": 0.36243411898612976, "learning_rate": 1.043303205145536e-06, "loss": 0.0082, "step": 192220 }, { "epoch": 1.6231871820312005, "grad_norm": 0.20411047339439392, "learning_rate": 1.0428527353843998e-06, "loss": 0.009, "step": 192230 }, { "epoch": 1.6232716218783643, "grad_norm": 0.33666491508483887, "learning_rate": 1.0424023515716925e-06, "loss": 0.0074, "step": 192240 }, { "epoch": 1.6233560617255283, "grad_norm": 0.14662687480449677, "learning_rate": 1.0419520537171957e-06, "loss": 0.0063, "step": 192250 }, { "epoch": 1.6234405015726923, "grad_norm": 0.4751158654689789, "learning_rate": 1.0415018418306883e-06, "loss": 0.0058, "step": 192260 }, { "epoch": 1.623524941419856, "grad_norm": 0.46103110909461975, "learning_rate": 1.0410517159219491e-06, "loss": 0.0061, "step": 192270 }, { "epoch": 1.6236093812670198, "grad_norm": 0.24446409940719604, "learning_rate": 1.0406016760007564e-06, "loss": 0.0077, "step": 192280 }, { "epoch": 1.6236938211141838, "grad_norm": 0.24157699942588806, "learning_rate": 1.0401517220768824e-06, "loss": 0.0123, "step": 192290 }, { "epoch": 1.6237782609613478, "grad_norm": 0.5979498624801636, "learning_rate": 1.039701854160103e-06, "loss": 0.0037, "step": 192300 }, { "epoch": 1.6238627008085116, "grad_norm": 0.6784719824790955, "learning_rate": 1.0392520722601869e-06, "loss": 0.0053, "step": 192310 }, { "epoch": 1.6239471406556754, "grad_norm": 0.08539188653230667, "learning_rate": 1.0388023763869048e-06, "loss": 0.0033, "step": 192320 }, { "epoch": 1.6240315805028391, "grad_norm": 0.15227991342544556, "learning_rate": 1.0383527665500237e-06, "loss": 0.0032, "step": 192330 }, { "epoch": 1.6241160203500031, "grad_norm": 0.12816038727760315, "learning_rate": 1.0379032427593089e-06, "loss": 0.0017, "step": 192340 }, { "epoch": 1.6242004601971671, "grad_norm": 0.18476907908916473, "learning_rate": 1.0374538050245231e-06, "loss": 0.0082, "step": 192350 }, { "epoch": 1.624284900044331, "grad_norm": 0.2203085720539093, "learning_rate": 1.037004453355428e-06, "loss": 0.0073, "step": 192360 }, { "epoch": 1.6243693398914947, "grad_norm": 0.23342685401439667, "learning_rate": 1.0365551877617851e-06, "loss": 0.007, "step": 192370 }, { "epoch": 1.6244537797386587, "grad_norm": 0.23642027378082275, "learning_rate": 1.0361060082533502e-06, "loss": 0.0035, "step": 192380 }, { "epoch": 1.6245382195858227, "grad_norm": 0.5399585366249084, "learning_rate": 1.0356569148398815e-06, "loss": 0.0095, "step": 192390 }, { "epoch": 1.6246226594329864, "grad_norm": 0.41055187582969666, "learning_rate": 1.0352079075311327e-06, "loss": 0.0066, "step": 192400 }, { "epoch": 1.6247070992801502, "grad_norm": 0.2768898606300354, "learning_rate": 1.0347589863368552e-06, "loss": 0.0046, "step": 192410 }, { "epoch": 1.6247915391273142, "grad_norm": 0.1860112100839615, "learning_rate": 1.0343101512667991e-06, "loss": 0.0055, "step": 192420 }, { "epoch": 1.6248759789744782, "grad_norm": 0.19036133587360382, "learning_rate": 1.0338614023307148e-06, "loss": 0.008, "step": 192430 }, { "epoch": 1.624960418821642, "grad_norm": 0.12104132026433945, "learning_rate": 1.033412739538347e-06, "loss": 0.0074, "step": 192440 }, { "epoch": 1.6250448586688058, "grad_norm": 0.5510103106498718, "learning_rate": 1.032964162899443e-06, "loss": 0.0091, "step": 192450 }, { "epoch": 1.6251292985159695, "grad_norm": 0.26268690824508667, "learning_rate": 1.0325156724237445e-06, "loss": 0.0051, "step": 192460 }, { "epoch": 1.6252137383631335, "grad_norm": 0.2852616310119629, "learning_rate": 1.032067268120991e-06, "loss": 0.0041, "step": 192470 }, { "epoch": 1.6252981782102975, "grad_norm": 0.24483992159366608, "learning_rate": 1.0316189500009249e-06, "loss": 0.0071, "step": 192480 }, { "epoch": 1.6253826180574613, "grad_norm": 0.10504725575447083, "learning_rate": 1.0311707180732815e-06, "loss": 0.0106, "step": 192490 }, { "epoch": 1.625467057904625, "grad_norm": 0.8932705521583557, "learning_rate": 1.0307225723477975e-06, "loss": 0.0131, "step": 192500 }, { "epoch": 1.625551497751789, "grad_norm": 0.08390520513057709, "learning_rate": 1.030274512834204e-06, "loss": 0.0042, "step": 192510 }, { "epoch": 1.625635937598953, "grad_norm": 0.8143832087516785, "learning_rate": 1.0298265395422363e-06, "loss": 0.0092, "step": 192520 }, { "epoch": 1.6257203774461169, "grad_norm": 0.22176265716552734, "learning_rate": 1.0293786524816213e-06, "loss": 0.0027, "step": 192530 }, { "epoch": 1.6258048172932806, "grad_norm": 0.00036361548700369895, "learning_rate": 1.0289308516620895e-06, "loss": 0.0071, "step": 192540 }, { "epoch": 1.6258892571404444, "grad_norm": 0.137697234749794, "learning_rate": 1.0284831370933645e-06, "loss": 0.0038, "step": 192550 }, { "epoch": 1.6259736969876084, "grad_norm": 0.37109866738319397, "learning_rate": 1.0280355087851735e-06, "loss": 0.0111, "step": 192560 }, { "epoch": 1.6260581368347724, "grad_norm": 0.809782087802887, "learning_rate": 1.0275879667472367e-06, "loss": 0.0099, "step": 192570 }, { "epoch": 1.6261425766819362, "grad_norm": 0.18405716121196747, "learning_rate": 1.0271405109892757e-06, "loss": 0.0054, "step": 192580 }, { "epoch": 1.6262270165291, "grad_norm": 0.0660264864563942, "learning_rate": 1.0266931415210085e-06, "loss": 0.0076, "step": 192590 }, { "epoch": 1.626311456376264, "grad_norm": 0.017644111067056656, "learning_rate": 1.0262458583521511e-06, "loss": 0.0068, "step": 192600 }, { "epoch": 1.626395896223428, "grad_norm": 0.10499019175767899, "learning_rate": 1.0257986614924204e-06, "loss": 0.0034, "step": 192610 }, { "epoch": 1.6264803360705917, "grad_norm": 0.35543346405029297, "learning_rate": 1.0253515509515267e-06, "loss": 0.0055, "step": 192620 }, { "epoch": 1.6265647759177555, "grad_norm": 0.14651070535182953, "learning_rate": 1.024904526739185e-06, "loss": 0.0071, "step": 192630 }, { "epoch": 1.6266492157649195, "grad_norm": 0.272445410490036, "learning_rate": 1.0244575888651004e-06, "loss": 0.0063, "step": 192640 }, { "epoch": 1.6267336556120835, "grad_norm": 0.0985470786690712, "learning_rate": 1.0240107373389836e-06, "loss": 0.0077, "step": 192650 }, { "epoch": 1.6268180954592473, "grad_norm": 0.1315269023180008, "learning_rate": 1.0235639721705393e-06, "loss": 0.0099, "step": 192660 }, { "epoch": 1.626902535306411, "grad_norm": 0.7421367168426514, "learning_rate": 1.0231172933694704e-06, "loss": 0.0089, "step": 192670 }, { "epoch": 1.6269869751535748, "grad_norm": 0.5963689088821411, "learning_rate": 1.0226707009454779e-06, "loss": 0.0084, "step": 192680 }, { "epoch": 1.6270714150007388, "grad_norm": 0.06418309360742569, "learning_rate": 1.0222241949082635e-06, "loss": 0.0069, "step": 192690 }, { "epoch": 1.6271558548479028, "grad_norm": 0.11124133318662643, "learning_rate": 1.0217777752675246e-06, "loss": 0.0068, "step": 192700 }, { "epoch": 1.6272402946950666, "grad_norm": 0.23658065497875214, "learning_rate": 1.0213314420329561e-06, "loss": 0.0062, "step": 192710 }, { "epoch": 1.6273247345422304, "grad_norm": 0.19860443472862244, "learning_rate": 1.020885195214255e-06, "loss": 0.0086, "step": 192720 }, { "epoch": 1.6274091743893944, "grad_norm": 0.45569804310798645, "learning_rate": 1.0204390348211118e-06, "loss": 0.0054, "step": 192730 }, { "epoch": 1.6274936142365584, "grad_norm": 0.308528870344162, "learning_rate": 1.019992960863217e-06, "loss": 0.0052, "step": 192740 }, { "epoch": 1.6275780540837221, "grad_norm": 0.29615798592567444, "learning_rate": 1.0195469733502583e-06, "loss": 0.007, "step": 192750 }, { "epoch": 1.627662493930886, "grad_norm": 0.21468383073806763, "learning_rate": 1.0191010722919254e-06, "loss": 0.0047, "step": 192760 }, { "epoch": 1.62774693377805, "grad_norm": 0.13745522499084473, "learning_rate": 1.0186552576978998e-06, "loss": 0.0085, "step": 192770 }, { "epoch": 1.6278313736252137, "grad_norm": 0.2981725335121155, "learning_rate": 1.018209529577867e-06, "loss": 0.0142, "step": 192780 }, { "epoch": 1.6279158134723777, "grad_norm": 0.11957457661628723, "learning_rate": 1.0177638879415063e-06, "loss": 0.007, "step": 192790 }, { "epoch": 1.6280002533195415, "grad_norm": 0.07526112347841263, "learning_rate": 1.0173183327984999e-06, "loss": 0.0047, "step": 192800 }, { "epoch": 1.6280846931667052, "grad_norm": 0.06245935708284378, "learning_rate": 1.0168728641585224e-06, "loss": 0.0058, "step": 192810 }, { "epoch": 1.6281691330138692, "grad_norm": 0.08164674788713455, "learning_rate": 1.0164274820312503e-06, "loss": 0.006, "step": 192820 }, { "epoch": 1.6282535728610332, "grad_norm": 0.18647290766239166, "learning_rate": 1.0159821864263569e-06, "loss": 0.0033, "step": 192830 }, { "epoch": 1.628338012708197, "grad_norm": 0.7604014873504639, "learning_rate": 1.0155369773535128e-06, "loss": 0.0133, "step": 192840 }, { "epoch": 1.6284224525553608, "grad_norm": 0.4287617802619934, "learning_rate": 1.0150918548223903e-06, "loss": 0.0078, "step": 192850 }, { "epoch": 1.6285068924025248, "grad_norm": 0.22352442145347595, "learning_rate": 1.014646818842655e-06, "loss": 0.0068, "step": 192860 }, { "epoch": 1.6285913322496888, "grad_norm": 0.4123680591583252, "learning_rate": 1.0142018694239752e-06, "loss": 0.0074, "step": 192870 }, { "epoch": 1.6286757720968525, "grad_norm": 0.0487891286611557, "learning_rate": 1.0137570065760127e-06, "loss": 0.0088, "step": 192880 }, { "epoch": 1.6287602119440163, "grad_norm": 0.2768875062465668, "learning_rate": 1.013312230308433e-06, "loss": 0.0064, "step": 192890 }, { "epoch": 1.62884465179118, "grad_norm": 0.24957707524299622, "learning_rate": 1.0128675406308941e-06, "loss": 0.0056, "step": 192900 }, { "epoch": 1.628929091638344, "grad_norm": 0.159158855676651, "learning_rate": 1.0124229375530553e-06, "loss": 0.0104, "step": 192910 }, { "epoch": 1.629013531485508, "grad_norm": 0.2688572108745575, "learning_rate": 1.0119784210845718e-06, "loss": 0.0071, "step": 192920 }, { "epoch": 1.6290979713326719, "grad_norm": 0.5886633396148682, "learning_rate": 1.0115339912351013e-06, "loss": 0.012, "step": 192930 }, { "epoch": 1.6291824111798356, "grad_norm": 0.033673472702503204, "learning_rate": 1.011089648014295e-06, "loss": 0.004, "step": 192940 }, { "epoch": 1.6292668510269996, "grad_norm": 0.06716867536306381, "learning_rate": 1.0106453914318031e-06, "loss": 0.0022, "step": 192950 }, { "epoch": 1.6293512908741636, "grad_norm": 0.2511182725429535, "learning_rate": 1.0102012214972767e-06, "loss": 0.0067, "step": 192960 }, { "epoch": 1.6294357307213274, "grad_norm": 0.37952864170074463, "learning_rate": 1.0097571382203613e-06, "loss": 0.0071, "step": 192970 }, { "epoch": 1.6295201705684912, "grad_norm": 0.28296300768852234, "learning_rate": 1.0093131416107054e-06, "loss": 0.0092, "step": 192980 }, { "epoch": 1.6296046104156552, "grad_norm": 0.13126704096794128, "learning_rate": 1.008869231677948e-06, "loss": 0.0041, "step": 192990 }, { "epoch": 1.6296890502628192, "grad_norm": 0.4663240313529968, "learning_rate": 1.0084254084317346e-06, "loss": 0.0044, "step": 193000 }, { "epoch": 1.629773490109983, "grad_norm": 0.4206430912017822, "learning_rate": 1.0079816718817014e-06, "loss": 0.0059, "step": 193010 }, { "epoch": 1.6298579299571467, "grad_norm": 0.279264897108078, "learning_rate": 1.0075380220374898e-06, "loss": 0.005, "step": 193020 }, { "epoch": 1.6299423698043105, "grad_norm": 0.06950562447309494, "learning_rate": 1.0070944589087328e-06, "loss": 0.0068, "step": 193030 }, { "epoch": 1.6300268096514745, "grad_norm": 0.21911445260047913, "learning_rate": 1.0066509825050674e-06, "loss": 0.0062, "step": 193040 }, { "epoch": 1.6301112494986385, "grad_norm": 0.7314008474349976, "learning_rate": 1.0062075928361249e-06, "loss": 0.008, "step": 193050 }, { "epoch": 1.6301956893458023, "grad_norm": 0.5327257513999939, "learning_rate": 1.0057642899115344e-06, "loss": 0.0061, "step": 193060 }, { "epoch": 1.630280129192966, "grad_norm": 0.28943678736686707, "learning_rate": 1.0053210737409252e-06, "loss": 0.0047, "step": 193070 }, { "epoch": 1.63036456904013, "grad_norm": 0.3243129253387451, "learning_rate": 1.0048779443339223e-06, "loss": 0.0103, "step": 193080 }, { "epoch": 1.630449008887294, "grad_norm": 0.0938245877623558, "learning_rate": 1.0044349017001531e-06, "loss": 0.0044, "step": 193090 }, { "epoch": 1.6305334487344578, "grad_norm": 0.06278442591428757, "learning_rate": 1.0039919458492375e-06, "loss": 0.0059, "step": 193100 }, { "epoch": 1.6306178885816216, "grad_norm": 0.061814989894628525, "learning_rate": 1.0035490767907996e-06, "loss": 0.0036, "step": 193110 }, { "epoch": 1.6307023284287856, "grad_norm": 0.38421788811683655, "learning_rate": 1.0031062945344555e-06, "loss": 0.0059, "step": 193120 }, { "epoch": 1.6307867682759494, "grad_norm": 0.11468187719583511, "learning_rate": 1.0026635990898248e-06, "loss": 0.0063, "step": 193130 }, { "epoch": 1.6308712081231134, "grad_norm": 0.21003833413124084, "learning_rate": 1.002220990466522e-06, "loss": 0.0058, "step": 193140 }, { "epoch": 1.6309556479702771, "grad_norm": 0.11141383647918701, "learning_rate": 1.0017784686741594e-06, "loss": 0.0095, "step": 193150 }, { "epoch": 1.631040087817441, "grad_norm": 0.1473395675420761, "learning_rate": 1.0013360337223482e-06, "loss": 0.01, "step": 193160 }, { "epoch": 1.631124527664605, "grad_norm": 0.29893651604652405, "learning_rate": 1.0008936856207001e-06, "loss": 0.0052, "step": 193170 }, { "epoch": 1.631208967511769, "grad_norm": 0.008027578704059124, "learning_rate": 1.0004514243788221e-06, "loss": 0.0053, "step": 193180 }, { "epoch": 1.6312934073589327, "grad_norm": 0.0522993728518486, "learning_rate": 1.000009250006318e-06, "loss": 0.0048, "step": 193190 }, { "epoch": 1.6313778472060965, "grad_norm": 0.27533748745918274, "learning_rate": 9.995671625127944e-07, "loss": 0.0062, "step": 193200 }, { "epoch": 1.6314622870532605, "grad_norm": 0.005762302782386541, "learning_rate": 9.991251619078512e-07, "loss": 0.0076, "step": 193210 }, { "epoch": 1.6315467269004245, "grad_norm": 0.518341064453125, "learning_rate": 9.98683248201091e-07, "loss": 0.0067, "step": 193220 }, { "epoch": 1.6316311667475882, "grad_norm": 0.17140710353851318, "learning_rate": 9.982414214021107e-07, "loss": 0.0052, "step": 193230 }, { "epoch": 1.631715606594752, "grad_norm": 0.26608678698539734, "learning_rate": 9.977996815205066e-07, "loss": 0.0066, "step": 193240 }, { "epoch": 1.6318000464419158, "grad_norm": 0.3773612678050995, "learning_rate": 9.973580285658718e-07, "loss": 0.0069, "step": 193250 }, { "epoch": 1.6318844862890798, "grad_norm": 0.04105842113494873, "learning_rate": 9.969164625478018e-07, "loss": 0.0034, "step": 193260 }, { "epoch": 1.6319689261362438, "grad_norm": 0.016226306557655334, "learning_rate": 9.964749834758846e-07, "loss": 0.0043, "step": 193270 }, { "epoch": 1.6320533659834076, "grad_norm": 0.1248486340045929, "learning_rate": 9.960335913597119e-07, "loss": 0.0069, "step": 193280 }, { "epoch": 1.6321378058305713, "grad_norm": 0.16375556588172913, "learning_rate": 9.95592286208869e-07, "loss": 0.006, "step": 193290 }, { "epoch": 1.6322222456777353, "grad_norm": 0.020176779478788376, "learning_rate": 9.951510680329413e-07, "loss": 0.0051, "step": 193300 }, { "epoch": 1.6323066855248993, "grad_norm": 0.35614871978759766, "learning_rate": 9.947099368415114e-07, "loss": 0.0043, "step": 193310 }, { "epoch": 1.632391125372063, "grad_norm": 0.26378771662712097, "learning_rate": 9.942688926441596e-07, "loss": 0.006, "step": 193320 }, { "epoch": 1.6324755652192269, "grad_norm": 0.4107036888599396, "learning_rate": 9.938279354504677e-07, "loss": 0.0068, "step": 193330 }, { "epoch": 1.6325600050663909, "grad_norm": 0.16240495443344116, "learning_rate": 9.93387065270011e-07, "loss": 0.0048, "step": 193340 }, { "epoch": 1.6326444449135549, "grad_norm": 0.35309234261512756, "learning_rate": 9.929462821123676e-07, "loss": 0.0056, "step": 193350 }, { "epoch": 1.6327288847607186, "grad_norm": 0.15666532516479492, "learning_rate": 9.925055859871085e-07, "loss": 0.0065, "step": 193360 }, { "epoch": 1.6328133246078824, "grad_norm": 0.10764098167419434, "learning_rate": 9.920649769038083e-07, "loss": 0.0056, "step": 193370 }, { "epoch": 1.6328977644550462, "grad_norm": 0.239373579621315, "learning_rate": 9.91624454872035e-07, "loss": 0.0081, "step": 193380 }, { "epoch": 1.6329822043022102, "grad_norm": 0.5426380634307861, "learning_rate": 9.91184019901358e-07, "loss": 0.0103, "step": 193390 }, { "epoch": 1.6330666441493742, "grad_norm": 0.12367808073759079, "learning_rate": 9.907436720013418e-07, "loss": 0.0073, "step": 193400 }, { "epoch": 1.633151083996538, "grad_norm": 0.27300482988357544, "learning_rate": 9.903034111815508e-07, "loss": 0.0101, "step": 193410 }, { "epoch": 1.6332355238437017, "grad_norm": 0.12641966342926025, "learning_rate": 9.898632374515488e-07, "loss": 0.0056, "step": 193420 }, { "epoch": 1.6333199636908657, "grad_norm": 0.18696771562099457, "learning_rate": 9.894231508208945e-07, "loss": 0.0058, "step": 193430 }, { "epoch": 1.6334044035380297, "grad_norm": 0.09072937816381454, "learning_rate": 9.889831512991487e-07, "loss": 0.0069, "step": 193440 }, { "epoch": 1.6334888433851935, "grad_norm": 0.294653981924057, "learning_rate": 9.885432388958655e-07, "loss": 0.0079, "step": 193450 }, { "epoch": 1.6335732832323573, "grad_norm": 0.20031271874904633, "learning_rate": 9.881034136206025e-07, "loss": 0.0074, "step": 193460 }, { "epoch": 1.633657723079521, "grad_norm": 0.3076358735561371, "learning_rate": 9.876636754829116e-07, "loss": 0.0058, "step": 193470 }, { "epoch": 1.633742162926685, "grad_norm": 0.00019508443074300885, "learning_rate": 9.872240244923431e-07, "loss": 0.0051, "step": 193480 }, { "epoch": 1.633826602773849, "grad_norm": 0.45809435844421387, "learning_rate": 9.867844606584453e-07, "loss": 0.003, "step": 193490 }, { "epoch": 1.6339110426210128, "grad_norm": 0.20387397706508636, "learning_rate": 9.863449839907679e-07, "loss": 0.006, "step": 193500 }, { "epoch": 1.6339954824681766, "grad_norm": 0.26857423782348633, "learning_rate": 9.859055944988538e-07, "loss": 0.009, "step": 193510 }, { "epoch": 1.6340799223153406, "grad_norm": 0.30548179149627686, "learning_rate": 9.854662921922486e-07, "loss": 0.0053, "step": 193520 }, { "epoch": 1.6341643621625046, "grad_norm": 0.10504734516143799, "learning_rate": 9.85027077080493e-07, "loss": 0.003, "step": 193530 }, { "epoch": 1.6342488020096684, "grad_norm": 0.2856430411338806, "learning_rate": 9.845879491731254e-07, "loss": 0.0082, "step": 193540 }, { "epoch": 1.6343332418568322, "grad_norm": 0.008572887629270554, "learning_rate": 9.841489084796862e-07, "loss": 0.0059, "step": 193550 }, { "epoch": 1.6344176817039962, "grad_norm": 0.501203179359436, "learning_rate": 9.837099550097095e-07, "loss": 0.0051, "step": 193560 }, { "epoch": 1.6345021215511601, "grad_norm": 0.15097366273403168, "learning_rate": 9.832710887727292e-07, "loss": 0.0062, "step": 193570 }, { "epoch": 1.634586561398324, "grad_norm": 0.7513577342033386, "learning_rate": 9.82832309778277e-07, "loss": 0.0055, "step": 193580 }, { "epoch": 1.6346710012454877, "grad_norm": 0.07730607688426971, "learning_rate": 9.82393618035885e-07, "loss": 0.0048, "step": 193590 }, { "epoch": 1.6347554410926515, "grad_norm": 0.17376698553562164, "learning_rate": 9.819550135550792e-07, "loss": 0.0032, "step": 193600 }, { "epoch": 1.6348398809398155, "grad_norm": 0.50263512134552, "learning_rate": 9.815164963453882e-07, "loss": 0.0104, "step": 193610 }, { "epoch": 1.6349243207869795, "grad_norm": 0.3875282108783722, "learning_rate": 9.810780664163356e-07, "loss": 0.0047, "step": 193620 }, { "epoch": 1.6350087606341432, "grad_norm": 0.2628462612628937, "learning_rate": 9.806397237774434e-07, "loss": 0.0068, "step": 193630 }, { "epoch": 1.635093200481307, "grad_norm": 0.19565649330615997, "learning_rate": 9.802014684382332e-07, "loss": 0.0049, "step": 193640 }, { "epoch": 1.635177640328471, "grad_norm": 1.099279522895813, "learning_rate": 9.797633004082218e-07, "loss": 0.0105, "step": 193650 }, { "epoch": 1.635262080175635, "grad_norm": 0.35836949944496155, "learning_rate": 9.793252196969294e-07, "loss": 0.0231, "step": 193660 }, { "epoch": 1.6353465200227988, "grad_norm": 0.5717440843582153, "learning_rate": 9.788872263138677e-07, "loss": 0.0087, "step": 193670 }, { "epoch": 1.6354309598699626, "grad_norm": 0.2110772281885147, "learning_rate": 9.784493202685524e-07, "loss": 0.0036, "step": 193680 }, { "epoch": 1.6355153997171266, "grad_norm": 0.9169752597808838, "learning_rate": 9.780115015704927e-07, "loss": 0.0082, "step": 193690 }, { "epoch": 1.6355998395642903, "grad_norm": 0.31457996368408203, "learning_rate": 9.775737702292009e-07, "loss": 0.0069, "step": 193700 }, { "epoch": 1.6356842794114543, "grad_norm": 0.3433830738067627, "learning_rate": 9.771361262541818e-07, "loss": 0.0064, "step": 193710 }, { "epoch": 1.635768719258618, "grad_norm": 0.2106037586927414, "learning_rate": 9.766985696549419e-07, "loss": 0.0081, "step": 193720 }, { "epoch": 1.6358531591057819, "grad_norm": 0.338377445936203, "learning_rate": 9.762611004409834e-07, "loss": 0.0057, "step": 193730 }, { "epoch": 1.6359375989529459, "grad_norm": 0.5122872591018677, "learning_rate": 9.758237186218106e-07, "loss": 0.0086, "step": 193740 }, { "epoch": 1.6360220388001099, "grad_norm": 0.1561395227909088, "learning_rate": 9.753864242069205e-07, "loss": 0.0062, "step": 193750 }, { "epoch": 1.6361064786472737, "grad_norm": 0.505260705947876, "learning_rate": 9.749492172058139e-07, "loss": 0.0053, "step": 193760 }, { "epoch": 1.6361909184944374, "grad_norm": 0.1490500569343567, "learning_rate": 9.745120976279859e-07, "loss": 0.0042, "step": 193770 }, { "epoch": 1.6362753583416014, "grad_norm": 0.18759337067604065, "learning_rate": 9.740750654829284e-07, "loss": 0.0038, "step": 193780 }, { "epoch": 1.6363597981887654, "grad_norm": 0.13782252371311188, "learning_rate": 9.736381207801371e-07, "loss": 0.0082, "step": 193790 }, { "epoch": 1.6364442380359292, "grad_norm": 0.16940340399742126, "learning_rate": 9.732012635291005e-07, "loss": 0.0114, "step": 193800 }, { "epoch": 1.636528677883093, "grad_norm": 0.0645676776766777, "learning_rate": 9.727644937393078e-07, "loss": 0.0088, "step": 193810 }, { "epoch": 1.6366131177302568, "grad_norm": 0.1484280228614807, "learning_rate": 9.723278114202429e-07, "loss": 0.0064, "step": 193820 }, { "epoch": 1.6366975575774207, "grad_norm": 0.05109228193759918, "learning_rate": 9.718912165813944e-07, "loss": 0.004, "step": 193830 }, { "epoch": 1.6367819974245847, "grad_norm": 0.009136182256042957, "learning_rate": 9.714547092322419e-07, "loss": 0.0072, "step": 193840 }, { "epoch": 1.6368664372717485, "grad_norm": 0.1042945384979248, "learning_rate": 9.71018289382269e-07, "loss": 0.0073, "step": 193850 }, { "epoch": 1.6369508771189123, "grad_norm": 0.24303269386291504, "learning_rate": 9.705819570409519e-07, "loss": 0.0083, "step": 193860 }, { "epoch": 1.6370353169660763, "grad_norm": 0.2308882474899292, "learning_rate": 9.701457122177699e-07, "loss": 0.0051, "step": 193870 }, { "epoch": 1.6371197568132403, "grad_norm": 0.049679748713970184, "learning_rate": 9.69709554922199e-07, "loss": 0.0107, "step": 193880 }, { "epoch": 1.637204196660404, "grad_norm": 0.18862487375736237, "learning_rate": 9.692734851637076e-07, "loss": 0.0072, "step": 193890 }, { "epoch": 1.6372886365075678, "grad_norm": 0.31618812680244446, "learning_rate": 9.688375029517722e-07, "loss": 0.0077, "step": 193900 }, { "epoch": 1.6373730763547318, "grad_norm": 0.17242488265037537, "learning_rate": 9.68401608295858e-07, "loss": 0.0043, "step": 193910 }, { "epoch": 1.6374575162018958, "grad_norm": 0.17843450605869293, "learning_rate": 9.679658012054366e-07, "loss": 0.0027, "step": 193920 }, { "epoch": 1.6375419560490596, "grad_norm": 0.36763522028923035, "learning_rate": 9.675300816899702e-07, "loss": 0.0068, "step": 193930 }, { "epoch": 1.6376263958962234, "grad_norm": 0.2494799792766571, "learning_rate": 9.670944497589257e-07, "loss": 0.0034, "step": 193940 }, { "epoch": 1.6377108357433872, "grad_norm": 0.1983213871717453, "learning_rate": 9.666589054217628e-07, "loss": 0.0082, "step": 193950 }, { "epoch": 1.6377952755905512, "grad_norm": 0.18103979527950287, "learning_rate": 9.662234486879425e-07, "loss": 0.0053, "step": 193960 }, { "epoch": 1.6378797154377152, "grad_norm": 0.17661124467849731, "learning_rate": 9.657880795669206e-07, "loss": 0.0071, "step": 193970 }, { "epoch": 1.637964155284879, "grad_norm": 0.6531459093093872, "learning_rate": 9.653527980681565e-07, "loss": 0.0089, "step": 193980 }, { "epoch": 1.6380485951320427, "grad_norm": 0.424504816532135, "learning_rate": 9.649176042011028e-07, "loss": 0.0078, "step": 193990 }, { "epoch": 1.6381330349792067, "grad_norm": 0.23526325821876526, "learning_rate": 9.644824979752104e-07, "loss": 0.0074, "step": 194000 }, { "epoch": 1.6382174748263707, "grad_norm": 0.40788108110427856, "learning_rate": 9.640474793999327e-07, "loss": 0.0088, "step": 194010 }, { "epoch": 1.6383019146735345, "grad_norm": 0.24273483455181122, "learning_rate": 9.636125484847152e-07, "loss": 0.0083, "step": 194020 }, { "epoch": 1.6383863545206983, "grad_norm": 0.1485588550567627, "learning_rate": 9.631777052390074e-07, "loss": 0.0109, "step": 194030 }, { "epoch": 1.6384707943678622, "grad_norm": 0.3370720446109772, "learning_rate": 9.627429496722523e-07, "loss": 0.0185, "step": 194040 }, { "epoch": 1.638555234215026, "grad_norm": 0.20220984518527985, "learning_rate": 9.623082817938934e-07, "loss": 0.0072, "step": 194050 }, { "epoch": 1.63863967406219, "grad_norm": 0.19530679285526276, "learning_rate": 9.6187370161337e-07, "loss": 0.004, "step": 194060 }, { "epoch": 1.6387241139093538, "grad_norm": 0.4549417495727539, "learning_rate": 9.614392091401237e-07, "loss": 0.0061, "step": 194070 }, { "epoch": 1.6388085537565176, "grad_norm": 0.09910144656896591, "learning_rate": 9.610048043835885e-07, "loss": 0.0063, "step": 194080 }, { "epoch": 1.6388929936036816, "grad_norm": 0.2557922303676605, "learning_rate": 9.60570487353203e-07, "loss": 0.0094, "step": 194090 }, { "epoch": 1.6389774334508456, "grad_norm": 0.2739604711532593, "learning_rate": 9.601362580583967e-07, "loss": 0.0058, "step": 194100 }, { "epoch": 1.6390618732980093, "grad_norm": 0.018658500164747238, "learning_rate": 9.597021165086051e-07, "loss": 0.0103, "step": 194110 }, { "epoch": 1.6391463131451731, "grad_norm": 0.2452985793352127, "learning_rate": 9.59268062713255e-07, "loss": 0.0081, "step": 194120 }, { "epoch": 1.6392307529923371, "grad_norm": 0.5143187046051025, "learning_rate": 9.58834096681775e-07, "loss": 0.0085, "step": 194130 }, { "epoch": 1.6393151928395011, "grad_norm": 0.16486574709415436, "learning_rate": 9.584002184235901e-07, "loss": 0.0059, "step": 194140 }, { "epoch": 1.6393996326866649, "grad_norm": 0.10830994695425034, "learning_rate": 9.579664279481232e-07, "loss": 0.0041, "step": 194150 }, { "epoch": 1.6394840725338287, "grad_norm": 0.07099784165620804, "learning_rate": 9.575327252647981e-07, "loss": 0.0081, "step": 194160 }, { "epoch": 1.6395685123809924, "grad_norm": 0.14151668548583984, "learning_rate": 9.57099110383033e-07, "loss": 0.0057, "step": 194170 }, { "epoch": 1.6396529522281564, "grad_norm": 0.1377061903476715, "learning_rate": 9.56665583312248e-07, "loss": 0.0047, "step": 194180 }, { "epoch": 1.6397373920753204, "grad_norm": 0.42439818382263184, "learning_rate": 9.562321440618577e-07, "loss": 0.0087, "step": 194190 }, { "epoch": 1.6398218319224842, "grad_norm": 0.2521423101425171, "learning_rate": 9.557987926412764e-07, "loss": 0.0109, "step": 194200 }, { "epoch": 1.639906271769648, "grad_norm": 0.0113134840503335, "learning_rate": 9.553655290599157e-07, "loss": 0.0043, "step": 194210 }, { "epoch": 1.639990711616812, "grad_norm": 0.4499700963497162, "learning_rate": 9.549323533271882e-07, "loss": 0.0071, "step": 194220 }, { "epoch": 1.640075151463976, "grad_norm": 0.12311377376317978, "learning_rate": 9.54499265452501e-07, "loss": 0.0042, "step": 194230 }, { "epoch": 1.6401595913111398, "grad_norm": 0.22038371860980988, "learning_rate": 9.540662654452592e-07, "loss": 0.0042, "step": 194240 }, { "epoch": 1.6402440311583035, "grad_norm": 0.26929163932800293, "learning_rate": 9.536333533148707e-07, "loss": 0.0041, "step": 194250 }, { "epoch": 1.6403284710054675, "grad_norm": 0.030002251267433167, "learning_rate": 9.532005290707347e-07, "loss": 0.0082, "step": 194260 }, { "epoch": 1.6404129108526315, "grad_norm": 0.16097503900527954, "learning_rate": 9.527677927222556e-07, "loss": 0.008, "step": 194270 }, { "epoch": 1.6404973506997953, "grad_norm": 0.2958252727985382, "learning_rate": 9.523351442788309e-07, "loss": 0.0088, "step": 194280 }, { "epoch": 1.640581790546959, "grad_norm": 0.17108699679374695, "learning_rate": 9.519025837498569e-07, "loss": 0.0071, "step": 194290 }, { "epoch": 1.6406662303941228, "grad_norm": 0.4368800222873688, "learning_rate": 9.514701111447283e-07, "loss": 0.007, "step": 194300 }, { "epoch": 1.6407506702412868, "grad_norm": 0.23567979037761688, "learning_rate": 9.510377264728409e-07, "loss": 0.0066, "step": 194310 }, { "epoch": 1.6408351100884508, "grad_norm": 0.18832701444625854, "learning_rate": 9.506054297435824e-07, "loss": 0.0075, "step": 194320 }, { "epoch": 1.6409195499356146, "grad_norm": 0.387054443359375, "learning_rate": 9.501732209663461e-07, "loss": 0.0086, "step": 194330 }, { "epoch": 1.6410039897827784, "grad_norm": 0.018934153020381927, "learning_rate": 9.497411001505157e-07, "loss": 0.0043, "step": 194340 }, { "epoch": 1.6410884296299424, "grad_norm": 0.29996994137763977, "learning_rate": 9.4930906730548e-07, "loss": 0.0049, "step": 194350 }, { "epoch": 1.6411728694771064, "grad_norm": 0.31883272528648376, "learning_rate": 9.488771224406218e-07, "loss": 0.0069, "step": 194360 }, { "epoch": 1.6412573093242702, "grad_norm": 0.3358398675918579, "learning_rate": 9.48445265565322e-07, "loss": 0.0033, "step": 194370 }, { "epoch": 1.641341749171434, "grad_norm": 0.09306587278842926, "learning_rate": 9.480134966889604e-07, "loss": 0.0041, "step": 194380 }, { "epoch": 1.6414261890185977, "grad_norm": 0.2345409393310547, "learning_rate": 9.475818158209143e-07, "loss": 0.0157, "step": 194390 }, { "epoch": 1.6415106288657617, "grad_norm": 0.15635298192501068, "learning_rate": 9.471502229705626e-07, "loss": 0.009, "step": 194400 }, { "epoch": 1.6415950687129257, "grad_norm": 0.4400227665901184, "learning_rate": 9.467187181472753e-07, "loss": 0.009, "step": 194410 }, { "epoch": 1.6416795085600895, "grad_norm": 0.24030707776546478, "learning_rate": 9.46287301360429e-07, "loss": 0.0043, "step": 194420 }, { "epoch": 1.6417639484072533, "grad_norm": 0.15486136078834534, "learning_rate": 9.4585597261939e-07, "loss": 0.0105, "step": 194430 }, { "epoch": 1.6418483882544173, "grad_norm": 0.20059674978256226, "learning_rate": 9.454247319335303e-07, "loss": 0.0071, "step": 194440 }, { "epoch": 1.6419328281015813, "grad_norm": 0.08900518715381622, "learning_rate": 9.44993579312214e-07, "loss": 0.0049, "step": 194450 }, { "epoch": 1.642017267948745, "grad_norm": 0.12236793339252472, "learning_rate": 9.445625147648063e-07, "loss": 0.0039, "step": 194460 }, { "epoch": 1.6421017077959088, "grad_norm": 0.29803967475891113, "learning_rate": 9.4413153830067e-07, "loss": 0.0045, "step": 194470 }, { "epoch": 1.6421861476430728, "grad_norm": 0.25406286120414734, "learning_rate": 9.43700649929164e-07, "loss": 0.0049, "step": 194480 }, { "epoch": 1.6422705874902368, "grad_norm": 0.5281122922897339, "learning_rate": 9.432698496596504e-07, "loss": 0.0052, "step": 194490 }, { "epoch": 1.6423550273374006, "grad_norm": 0.1300884485244751, "learning_rate": 9.428391375014828e-07, "loss": 0.0098, "step": 194500 }, { "epoch": 1.6424394671845644, "grad_norm": 0.00226884544827044, "learning_rate": 9.424085134640188e-07, "loss": 0.0046, "step": 194510 }, { "epoch": 1.6425239070317281, "grad_norm": 0.28805580735206604, "learning_rate": 9.419779775566106e-07, "loss": 0.007, "step": 194520 }, { "epoch": 1.6426083468788921, "grad_norm": 0.16721779108047485, "learning_rate": 9.41547529788609e-07, "loss": 0.0058, "step": 194530 }, { "epoch": 1.6426927867260561, "grad_norm": 0.11262203007936478, "learning_rate": 9.411171701693628e-07, "loss": 0.0039, "step": 194540 }, { "epoch": 1.64277722657322, "grad_norm": 0.5011718273162842, "learning_rate": 9.406868987082202e-07, "loss": 0.0049, "step": 194550 }, { "epoch": 1.6428616664203837, "grad_norm": 0.7723498940467834, "learning_rate": 9.402567154145259e-07, "loss": 0.0102, "step": 194560 }, { "epoch": 1.6429461062675477, "grad_norm": 0.37702324986457825, "learning_rate": 9.398266202976247e-07, "loss": 0.0062, "step": 194570 }, { "epoch": 1.6430305461147117, "grad_norm": 0.27995941042900085, "learning_rate": 9.393966133668569e-07, "loss": 0.0057, "step": 194580 }, { "epoch": 1.6431149859618754, "grad_norm": 0.06083783879876137, "learning_rate": 9.389666946315618e-07, "loss": 0.0035, "step": 194590 }, { "epoch": 1.6431994258090392, "grad_norm": 0.170707106590271, "learning_rate": 9.385368641010795e-07, "loss": 0.0086, "step": 194600 }, { "epoch": 1.6432838656562032, "grad_norm": 0.3742072284221649, "learning_rate": 9.381071217847437e-07, "loss": 0.0096, "step": 194610 }, { "epoch": 1.643368305503367, "grad_norm": 0.38107940554618835, "learning_rate": 9.376774676918887e-07, "loss": 0.009, "step": 194620 }, { "epoch": 1.643452745350531, "grad_norm": 0.08170074969530106, "learning_rate": 9.372479018318459e-07, "loss": 0.005, "step": 194630 }, { "epoch": 1.6435371851976948, "grad_norm": 0.2477681189775467, "learning_rate": 9.368184242139472e-07, "loss": 0.0059, "step": 194640 }, { "epoch": 1.6436216250448585, "grad_norm": 0.2409680187702179, "learning_rate": 9.363890348475185e-07, "loss": 0.0057, "step": 194650 }, { "epoch": 1.6437060648920225, "grad_norm": 0.09890948235988617, "learning_rate": 9.359597337418885e-07, "loss": 0.0064, "step": 194660 }, { "epoch": 1.6437905047391865, "grad_norm": 0.20079192519187927, "learning_rate": 9.355305209063787e-07, "loss": 0.0049, "step": 194670 }, { "epoch": 1.6438749445863503, "grad_norm": 0.13798479735851288, "learning_rate": 9.351013963503148e-07, "loss": 0.0095, "step": 194680 }, { "epoch": 1.643959384433514, "grad_norm": 0.32237377762794495, "learning_rate": 9.346723600830154e-07, "loss": 0.0103, "step": 194690 }, { "epoch": 1.644043824280678, "grad_norm": 0.08155956864356995, "learning_rate": 9.342434121137994e-07, "loss": 0.0052, "step": 194700 }, { "epoch": 1.644128264127842, "grad_norm": 0.2527036964893341, "learning_rate": 9.338145524519832e-07, "loss": 0.0076, "step": 194710 }, { "epoch": 1.6442127039750059, "grad_norm": 0.21360363066196442, "learning_rate": 9.333857811068802e-07, "loss": 0.0075, "step": 194720 }, { "epoch": 1.6442971438221696, "grad_norm": 0.019331498071551323, "learning_rate": 9.329570980878061e-07, "loss": 0.0027, "step": 194730 }, { "epoch": 1.6443815836693334, "grad_norm": 0.24801714718341827, "learning_rate": 9.325285034040693e-07, "loss": 0.0054, "step": 194740 }, { "epoch": 1.6444660235164974, "grad_norm": 0.12701433897018433, "learning_rate": 9.320999970649813e-07, "loss": 0.0026, "step": 194750 }, { "epoch": 1.6445504633636614, "grad_norm": 0.6156802773475647, "learning_rate": 9.316715790798464e-07, "loss": 0.0179, "step": 194760 }, { "epoch": 1.6446349032108252, "grad_norm": 0.0834287777543068, "learning_rate": 9.312432494579732e-07, "loss": 0.0047, "step": 194770 }, { "epoch": 1.644719343057989, "grad_norm": 0.142863929271698, "learning_rate": 9.308150082086609e-07, "loss": 0.005, "step": 194780 }, { "epoch": 1.644803782905153, "grad_norm": 0.18046702444553375, "learning_rate": 9.303868553412138e-07, "loss": 0.0035, "step": 194790 }, { "epoch": 1.644888222752317, "grad_norm": 0.07608969509601593, "learning_rate": 9.299587908649294e-07, "loss": 0.0019, "step": 194800 }, { "epoch": 1.6449726625994807, "grad_norm": 0.16161656379699707, "learning_rate": 9.295308147891069e-07, "loss": 0.005, "step": 194810 }, { "epoch": 1.6450571024466445, "grad_norm": 0.2052924484014511, "learning_rate": 9.291029271230406e-07, "loss": 0.0126, "step": 194820 }, { "epoch": 1.6451415422938085, "grad_norm": 0.24716657400131226, "learning_rate": 9.286751278760237e-07, "loss": 0.0049, "step": 194830 }, { "epoch": 1.6452259821409725, "grad_norm": 0.2317589521408081, "learning_rate": 9.282474170573502e-07, "loss": 0.0101, "step": 194840 }, { "epoch": 1.6453104219881363, "grad_norm": 0.07645902782678604, "learning_rate": 9.278197946763079e-07, "loss": 0.007, "step": 194850 }, { "epoch": 1.6453948618353, "grad_norm": 0.181022047996521, "learning_rate": 9.273922607421854e-07, "loss": 0.0074, "step": 194860 }, { "epoch": 1.6454793016824638, "grad_norm": 0.24220775067806244, "learning_rate": 9.269648152642674e-07, "loss": 0.0063, "step": 194870 }, { "epoch": 1.6455637415296278, "grad_norm": 0.20507165789604187, "learning_rate": 9.265374582518399e-07, "loss": 0.0069, "step": 194880 }, { "epoch": 1.6456481813767918, "grad_norm": 0.32108014822006226, "learning_rate": 9.261101897141833e-07, "loss": 0.0048, "step": 194890 }, { "epoch": 1.6457326212239556, "grad_norm": 0.20392972230911255, "learning_rate": 9.256830096605796e-07, "loss": 0.0047, "step": 194900 }, { "epoch": 1.6458170610711194, "grad_norm": 0.1611536294221878, "learning_rate": 9.252559181003046e-07, "loss": 0.0052, "step": 194910 }, { "epoch": 1.6459015009182834, "grad_norm": 0.19761987030506134, "learning_rate": 9.248289150426371e-07, "loss": 0.0085, "step": 194920 }, { "epoch": 1.6459859407654474, "grad_norm": 0.1811397224664688, "learning_rate": 9.244020004968507e-07, "loss": 0.0055, "step": 194930 }, { "epoch": 1.6460703806126111, "grad_norm": 0.27361783385276794, "learning_rate": 9.239751744722181e-07, "loss": 0.0046, "step": 194940 }, { "epoch": 1.646154820459775, "grad_norm": 0.16001975536346436, "learning_rate": 9.23548436978009e-07, "loss": 0.0065, "step": 194950 }, { "epoch": 1.6462392603069387, "grad_norm": 0.09972257167100906, "learning_rate": 9.231217880234917e-07, "loss": 0.0081, "step": 194960 }, { "epoch": 1.6463237001541027, "grad_norm": 0.032605379819869995, "learning_rate": 9.226952276179341e-07, "loss": 0.0042, "step": 194970 }, { "epoch": 1.6464081400012667, "grad_norm": 0.73846036195755, "learning_rate": 9.222687557706001e-07, "loss": 0.0125, "step": 194980 }, { "epoch": 1.6464925798484304, "grad_norm": 0.044739071279764175, "learning_rate": 9.218423724907543e-07, "loss": 0.0051, "step": 194990 }, { "epoch": 1.6465770196955942, "grad_norm": 0.5865917205810547, "learning_rate": 9.214160777876552e-07, "loss": 0.0113, "step": 195000 }, { "epoch": 1.6466614595427582, "grad_norm": 0.21218417584896088, "learning_rate": 9.20989871670564e-07, "loss": 0.0071, "step": 195010 }, { "epoch": 1.6467458993899222, "grad_norm": 0.5696257948875427, "learning_rate": 9.205637541487372e-07, "loss": 0.0037, "step": 195020 }, { "epoch": 1.646830339237086, "grad_norm": 0.13965252041816711, "learning_rate": 9.201377252314298e-07, "loss": 0.0032, "step": 195030 }, { "epoch": 1.6469147790842498, "grad_norm": 0.7252002954483032, "learning_rate": 9.197117849278936e-07, "loss": 0.0087, "step": 195040 }, { "epoch": 1.6469992189314138, "grad_norm": 0.2194851040840149, "learning_rate": 9.192859332473825e-07, "loss": 0.0092, "step": 195050 }, { "epoch": 1.6470836587785778, "grad_norm": 0.46058371663093567, "learning_rate": 9.188601701991451e-07, "loss": 0.0054, "step": 195060 }, { "epoch": 1.6471680986257415, "grad_norm": 0.37639158964157104, "learning_rate": 9.184344957924268e-07, "loss": 0.0119, "step": 195070 }, { "epoch": 1.6472525384729053, "grad_norm": 0.18207336962223053, "learning_rate": 9.180089100364764e-07, "loss": 0.0043, "step": 195080 }, { "epoch": 1.647336978320069, "grad_norm": 0.04874873533844948, "learning_rate": 9.175834129405358e-07, "loss": 0.0061, "step": 195090 }, { "epoch": 1.647421418167233, "grad_norm": 0.03775518387556076, "learning_rate": 9.171580045138467e-07, "loss": 0.0042, "step": 195100 }, { "epoch": 1.647505858014397, "grad_norm": 0.15776792168617249, "learning_rate": 9.167326847656482e-07, "loss": 0.0068, "step": 195110 }, { "epoch": 1.6475902978615609, "grad_norm": 0.7040033340454102, "learning_rate": 9.163074537051803e-07, "loss": 0.0099, "step": 195120 }, { "epoch": 1.6476747377087246, "grad_norm": 0.049893103539943695, "learning_rate": 9.158823113416759e-07, "loss": 0.0073, "step": 195130 }, { "epoch": 1.6477591775558886, "grad_norm": 0.22235411405563354, "learning_rate": 9.154572576843729e-07, "loss": 0.0045, "step": 195140 }, { "epoch": 1.6478436174030526, "grad_norm": 0.4737308919429779, "learning_rate": 9.150322927424993e-07, "loss": 0.0065, "step": 195150 }, { "epoch": 1.6479280572502164, "grad_norm": 0.06259096413850784, "learning_rate": 9.146074165252889e-07, "loss": 0.0068, "step": 195160 }, { "epoch": 1.6480124970973802, "grad_norm": 0.07600905001163483, "learning_rate": 9.141826290419681e-07, "loss": 0.0046, "step": 195170 }, { "epoch": 1.6480969369445442, "grad_norm": 0.2229984551668167, "learning_rate": 9.13757930301763e-07, "loss": 0.0067, "step": 195180 }, { "epoch": 1.648181376791708, "grad_norm": 0.2864888310432434, "learning_rate": 9.133333203138988e-07, "loss": 0.0063, "step": 195190 }, { "epoch": 1.648265816638872, "grad_norm": 0.0017483149422332644, "learning_rate": 9.129087990875956e-07, "loss": 0.0074, "step": 195200 }, { "epoch": 1.6483502564860357, "grad_norm": 0.08472375571727753, "learning_rate": 9.124843666320777e-07, "loss": 0.0042, "step": 195210 }, { "epoch": 1.6484346963331995, "grad_norm": 0.15605083107948303, "learning_rate": 9.120600229565602e-07, "loss": 0.0054, "step": 195220 }, { "epoch": 1.6485191361803635, "grad_norm": 0.312233567237854, "learning_rate": 9.116357680702626e-07, "loss": 0.0054, "step": 195230 }, { "epoch": 1.6486035760275275, "grad_norm": 0.3434205651283264, "learning_rate": 9.112116019823969e-07, "loss": 0.0081, "step": 195240 }, { "epoch": 1.6486880158746913, "grad_norm": 0.30247634649276733, "learning_rate": 9.107875247021785e-07, "loss": 0.0051, "step": 195250 }, { "epoch": 1.648772455721855, "grad_norm": 0.4724193811416626, "learning_rate": 9.103635362388174e-07, "loss": 0.0091, "step": 195260 }, { "epoch": 1.648856895569019, "grad_norm": 0.3137173056602478, "learning_rate": 9.099396366015223e-07, "loss": 0.0077, "step": 195270 }, { "epoch": 1.648941335416183, "grad_norm": 0.24021731317043304, "learning_rate": 9.095158257994985e-07, "loss": 0.0057, "step": 195280 }, { "epoch": 1.6490257752633468, "grad_norm": 0.03523092344403267, "learning_rate": 9.090921038419542e-07, "loss": 0.0088, "step": 195290 }, { "epoch": 1.6491102151105106, "grad_norm": 0.3347960114479065, "learning_rate": 9.086684707380917e-07, "loss": 0.0062, "step": 195300 }, { "epoch": 1.6491946549576744, "grad_norm": 0.13158366084098816, "learning_rate": 9.082449264971094e-07, "loss": 0.005, "step": 195310 }, { "epoch": 1.6492790948048384, "grad_norm": 0.2479240447282791, "learning_rate": 9.078214711282107e-07, "loss": 0.0093, "step": 195320 }, { "epoch": 1.6493635346520024, "grad_norm": 0.5287280678749084, "learning_rate": 9.073981046405894e-07, "loss": 0.0053, "step": 195330 }, { "epoch": 1.6494479744991661, "grad_norm": 0.18889907002449036, "learning_rate": 9.069748270434442e-07, "loss": 0.0121, "step": 195340 }, { "epoch": 1.64953241434633, "grad_norm": 0.5703122615814209, "learning_rate": 9.065516383459666e-07, "loss": 0.009, "step": 195350 }, { "epoch": 1.649616854193494, "grad_norm": 0.05133998766541481, "learning_rate": 9.061285385573492e-07, "loss": 0.0049, "step": 195360 }, { "epoch": 1.649701294040658, "grad_norm": 0.15035024285316467, "learning_rate": 9.057055276867793e-07, "loss": 0.0093, "step": 195370 }, { "epoch": 1.6497857338878217, "grad_norm": 0.022580290213227272, "learning_rate": 9.052826057434477e-07, "loss": 0.0098, "step": 195380 }, { "epoch": 1.6498701737349855, "grad_norm": 0.36374303698539734, "learning_rate": 9.048597727365377e-07, "loss": 0.0038, "step": 195390 }, { "epoch": 1.6499546135821495, "grad_norm": 0.16335001587867737, "learning_rate": 9.044370286752357e-07, "loss": 0.0076, "step": 195400 }, { "epoch": 1.6500390534293135, "grad_norm": 0.2341412752866745, "learning_rate": 9.040143735687218e-07, "loss": 0.0069, "step": 195410 }, { "epoch": 1.6501234932764772, "grad_norm": 0.42957255244255066, "learning_rate": 9.035918074261762e-07, "loss": 0.0066, "step": 195420 }, { "epoch": 1.650207933123641, "grad_norm": 0.4010217785835266, "learning_rate": 9.031693302567773e-07, "loss": 0.0112, "step": 195430 }, { "epoch": 1.6502923729708048, "grad_norm": 0.027954522520303726, "learning_rate": 9.027469420696999e-07, "loss": 0.0051, "step": 195440 }, { "epoch": 1.6503768128179688, "grad_norm": 0.21100156009197235, "learning_rate": 9.023246428741206e-07, "loss": 0.0044, "step": 195450 }, { "epoch": 1.6504612526651328, "grad_norm": 0.8131760954856873, "learning_rate": 9.019024326792092e-07, "loss": 0.0076, "step": 195460 }, { "epoch": 1.6505456925122965, "grad_norm": 0.11286018043756485, "learning_rate": 9.014803114941384e-07, "loss": 0.0063, "step": 195470 }, { "epoch": 1.6506301323594603, "grad_norm": 0.1586543768644333, "learning_rate": 9.010582793280736e-07, "loss": 0.0056, "step": 195480 }, { "epoch": 1.6507145722066243, "grad_norm": 0.022105131298303604, "learning_rate": 9.006363361901848e-07, "loss": 0.0073, "step": 195490 }, { "epoch": 1.6507990120537883, "grad_norm": 0.3014388084411621, "learning_rate": 9.002144820896347e-07, "loss": 0.0074, "step": 195500 }, { "epoch": 1.650883451900952, "grad_norm": 0.03796086832880974, "learning_rate": 8.997927170355858e-07, "loss": 0.003, "step": 195510 }, { "epoch": 1.6509678917481159, "grad_norm": 0.4864572584629059, "learning_rate": 8.993710410371976e-07, "loss": 0.0122, "step": 195520 }, { "epoch": 1.6510523315952799, "grad_norm": 0.4703270196914673, "learning_rate": 8.989494541036314e-07, "loss": 0.0071, "step": 195530 }, { "epoch": 1.6511367714424436, "grad_norm": 0.02376924641430378, "learning_rate": 8.985279562440424e-07, "loss": 0.0038, "step": 195540 }, { "epoch": 1.6512212112896076, "grad_norm": 0.08081616461277008, "learning_rate": 8.981065474675849e-07, "loss": 0.0071, "step": 195550 }, { "epoch": 1.6513056511367714, "grad_norm": 0.13319031894207, "learning_rate": 8.976852277834136e-07, "loss": 0.0046, "step": 195560 }, { "epoch": 1.6513900909839352, "grad_norm": 0.13654562830924988, "learning_rate": 8.972639972006775e-07, "loss": 0.0055, "step": 195570 }, { "epoch": 1.6514745308310992, "grad_norm": 0.2034773975610733, "learning_rate": 8.968428557285275e-07, "loss": 0.007, "step": 195580 }, { "epoch": 1.6515589706782632, "grad_norm": 0.37373676896095276, "learning_rate": 8.964218033761102e-07, "loss": 0.0042, "step": 195590 }, { "epoch": 1.651643410525427, "grad_norm": 0.49539896845817566, "learning_rate": 8.960008401525705e-07, "loss": 0.0053, "step": 195600 }, { "epoch": 1.6517278503725907, "grad_norm": 0.1668381243944168, "learning_rate": 8.9557996606705e-07, "loss": 0.0092, "step": 195610 }, { "epoch": 1.6518122902197547, "grad_norm": 0.052032720297575, "learning_rate": 8.951591811286931e-07, "loss": 0.0048, "step": 195620 }, { "epoch": 1.6518967300669187, "grad_norm": 0.1063334122300148, "learning_rate": 8.947384853466362e-07, "loss": 0.0058, "step": 195630 }, { "epoch": 1.6519811699140825, "grad_norm": 0.1271178126335144, "learning_rate": 8.943178787300194e-07, "loss": 0.0066, "step": 195640 }, { "epoch": 1.6520656097612463, "grad_norm": 0.0802723839879036, "learning_rate": 8.93897361287977e-07, "loss": 0.0042, "step": 195650 }, { "epoch": 1.65215004960841, "grad_norm": 0.23632961511611938, "learning_rate": 8.934769330296411e-07, "loss": 0.004, "step": 195660 }, { "epoch": 1.652234489455574, "grad_norm": 0.25005578994750977, "learning_rate": 8.930565939641473e-07, "loss": 0.0067, "step": 195670 }, { "epoch": 1.652318929302738, "grad_norm": 0.24504294991493225, "learning_rate": 8.926363441006203e-07, "loss": 0.0066, "step": 195680 }, { "epoch": 1.6524033691499018, "grad_norm": 0.20467987656593323, "learning_rate": 8.922161834481912e-07, "loss": 0.0049, "step": 195690 }, { "epoch": 1.6524878089970656, "grad_norm": 0.2839696705341339, "learning_rate": 8.91796112015984e-07, "loss": 0.0035, "step": 195700 }, { "epoch": 1.6525722488442296, "grad_norm": 0.02549591287970543, "learning_rate": 8.913761298131246e-07, "loss": 0.0057, "step": 195710 }, { "epoch": 1.6526566886913936, "grad_norm": 0.19022798538208008, "learning_rate": 8.909562368487323e-07, "loss": 0.0052, "step": 195720 }, { "epoch": 1.6527411285385574, "grad_norm": 0.4380040466785431, "learning_rate": 8.905364331319294e-07, "loss": 0.0083, "step": 195730 }, { "epoch": 1.6528255683857211, "grad_norm": 0.49043574929237366, "learning_rate": 8.901167186718334e-07, "loss": 0.0069, "step": 195740 }, { "epoch": 1.6529100082328851, "grad_norm": 0.6857151389122009, "learning_rate": 8.896970934775601e-07, "loss": 0.0073, "step": 195750 }, { "epoch": 1.6529944480800491, "grad_norm": 0.06199020519852638, "learning_rate": 8.892775575582225e-07, "loss": 0.0032, "step": 195760 }, { "epoch": 1.653078887927213, "grad_norm": 0.14741507172584534, "learning_rate": 8.888581109229355e-07, "loss": 0.0039, "step": 195770 }, { "epoch": 1.6531633277743767, "grad_norm": 0.23633569478988647, "learning_rate": 8.88438753580807e-07, "loss": 0.0119, "step": 195780 }, { "epoch": 1.6532477676215405, "grad_norm": 0.8996821641921997, "learning_rate": 8.880194855409457e-07, "loss": 0.0139, "step": 195790 }, { "epoch": 1.6533322074687045, "grad_norm": 0.09069561213254929, "learning_rate": 8.876003068124594e-07, "loss": 0.0068, "step": 195800 }, { "epoch": 1.6534166473158685, "grad_norm": 0.7828161716461182, "learning_rate": 8.871812174044508e-07, "loss": 0.0056, "step": 195810 }, { "epoch": 1.6535010871630322, "grad_norm": 0.0369236022233963, "learning_rate": 8.867622173260243e-07, "loss": 0.0059, "step": 195820 }, { "epoch": 1.653585527010196, "grad_norm": 0.19001826643943787, "learning_rate": 8.863433065862797e-07, "loss": 0.0046, "step": 195830 }, { "epoch": 1.65366996685736, "grad_norm": 0.17669546604156494, "learning_rate": 8.859244851943161e-07, "loss": 0.0089, "step": 195840 }, { "epoch": 1.653754406704524, "grad_norm": 0.20113889873027802, "learning_rate": 8.855057531592276e-07, "loss": 0.0131, "step": 195850 }, { "epoch": 1.6538388465516878, "grad_norm": 0.26570650935173035, "learning_rate": 8.850871104901127e-07, "loss": 0.0064, "step": 195860 }, { "epoch": 1.6539232863988516, "grad_norm": 0.12122024595737457, "learning_rate": 8.846685571960605e-07, "loss": 0.0101, "step": 195870 }, { "epoch": 1.6540077262460153, "grad_norm": 0.10883122682571411, "learning_rate": 8.842500932861658e-07, "loss": 0.0061, "step": 195880 }, { "epoch": 1.6540921660931793, "grad_norm": 0.1541273593902588, "learning_rate": 8.838317187695155e-07, "loss": 0.0046, "step": 195890 }, { "epoch": 1.6541766059403433, "grad_norm": 1.241358995437622, "learning_rate": 8.834134336551959e-07, "loss": 0.0164, "step": 195900 }, { "epoch": 1.654261045787507, "grad_norm": 0.07296356558799744, "learning_rate": 8.829952379522938e-07, "loss": 0.0063, "step": 195910 }, { "epoch": 1.6543454856346709, "grad_norm": 0.15013375878334045, "learning_rate": 8.825771316698917e-07, "loss": 0.0085, "step": 195920 }, { "epoch": 1.6544299254818349, "grad_norm": 0.5327796339988708, "learning_rate": 8.821591148170706e-07, "loss": 0.0133, "step": 195930 }, { "epoch": 1.6545143653289989, "grad_norm": 0.3573093116283417, "learning_rate": 8.81741187402908e-07, "loss": 0.0073, "step": 195940 }, { "epoch": 1.6545988051761626, "grad_norm": 0.4939744174480438, "learning_rate": 8.813233494364848e-07, "loss": 0.0066, "step": 195950 }, { "epoch": 1.6546832450233264, "grad_norm": 0.3175490200519562, "learning_rate": 8.809056009268729e-07, "loss": 0.0071, "step": 195960 }, { "epoch": 1.6547676848704904, "grad_norm": 0.15431785583496094, "learning_rate": 8.804879418831486e-07, "loss": 0.0053, "step": 195970 }, { "epoch": 1.6548521247176544, "grad_norm": 0.21213917434215546, "learning_rate": 8.800703723143806e-07, "loss": 0.0059, "step": 195980 }, { "epoch": 1.6549365645648182, "grad_norm": 0.9323314428329468, "learning_rate": 8.796528922296421e-07, "loss": 0.0077, "step": 195990 }, { "epoch": 1.655021004411982, "grad_norm": 0.11756506562232971, "learning_rate": 8.792355016379977e-07, "loss": 0.007, "step": 196000 }, { "epoch": 1.6551054442591457, "grad_norm": 0.2780652344226837, "learning_rate": 8.788182005485124e-07, "loss": 0.0078, "step": 196010 }, { "epoch": 1.6551898841063097, "grad_norm": 0.3339459002017975, "learning_rate": 8.784009889702528e-07, "loss": 0.0032, "step": 196020 }, { "epoch": 1.6552743239534737, "grad_norm": 0.3081122040748596, "learning_rate": 8.779838669122776e-07, "loss": 0.0077, "step": 196030 }, { "epoch": 1.6553587638006375, "grad_norm": 0.29554682970046997, "learning_rate": 8.775668343836491e-07, "loss": 0.0043, "step": 196040 }, { "epoch": 1.6554432036478013, "grad_norm": 0.06257573515176773, "learning_rate": 8.771498913934228e-07, "loss": 0.0076, "step": 196050 }, { "epoch": 1.6555276434949653, "grad_norm": 0.22430561482906342, "learning_rate": 8.767330379506578e-07, "loss": 0.0082, "step": 196060 }, { "epoch": 1.6556120833421293, "grad_norm": 0.2919488847255707, "learning_rate": 8.763162740644065e-07, "loss": 0.0064, "step": 196070 }, { "epoch": 1.655696523189293, "grad_norm": 0.028007669374346733, "learning_rate": 8.758995997437203e-07, "loss": 0.0068, "step": 196080 }, { "epoch": 1.6557809630364568, "grad_norm": 0.2382618635892868, "learning_rate": 8.754830149976485e-07, "loss": 0.0061, "step": 196090 }, { "epoch": 1.6558654028836208, "grad_norm": 0.09488687664270401, "learning_rate": 8.750665198352415e-07, "loss": 0.003, "step": 196100 }, { "epoch": 1.6559498427307846, "grad_norm": 0.21063430607318878, "learning_rate": 8.746501142655434e-07, "loss": 0.003, "step": 196110 }, { "epoch": 1.6560342825779486, "grad_norm": 0.21842794120311737, "learning_rate": 8.742337982976002e-07, "loss": 0.0081, "step": 196120 }, { "epoch": 1.6561187224251124, "grad_norm": 0.33172762393951416, "learning_rate": 8.73817571940454e-07, "loss": 0.0037, "step": 196130 }, { "epoch": 1.6562031622722762, "grad_norm": 0.1335752308368683, "learning_rate": 8.734014352031434e-07, "loss": 0.0078, "step": 196140 }, { "epoch": 1.6562876021194401, "grad_norm": 0.04627462103962898, "learning_rate": 8.729853880947086e-07, "loss": 0.0069, "step": 196150 }, { "epoch": 1.6563720419666041, "grad_norm": 0.13613612949848175, "learning_rate": 8.725694306241861e-07, "loss": 0.0107, "step": 196160 }, { "epoch": 1.656456481813768, "grad_norm": 0.1163378432393074, "learning_rate": 8.721535628006095e-07, "loss": 0.0095, "step": 196170 }, { "epoch": 1.6565409216609317, "grad_norm": 0.07769040018320084, "learning_rate": 8.717377846330105e-07, "loss": 0.0061, "step": 196180 }, { "epoch": 1.6566253615080957, "grad_norm": 0.41055983304977417, "learning_rate": 8.713220961304219e-07, "loss": 0.0039, "step": 196190 }, { "epoch": 1.6567098013552597, "grad_norm": 0.21960198879241943, "learning_rate": 8.709064973018705e-07, "loss": 0.006, "step": 196200 }, { "epoch": 1.6567942412024235, "grad_norm": 0.11963354796171188, "learning_rate": 8.70490988156385e-07, "loss": 0.0076, "step": 196210 }, { "epoch": 1.6568786810495872, "grad_norm": 0.25306111574172974, "learning_rate": 8.700755687029877e-07, "loss": 0.0047, "step": 196220 }, { "epoch": 1.656963120896751, "grad_norm": 0.23404806852340698, "learning_rate": 8.696602389507036e-07, "loss": 0.0062, "step": 196230 }, { "epoch": 1.657047560743915, "grad_norm": 0.23823872208595276, "learning_rate": 8.692449989085533e-07, "loss": 0.0055, "step": 196240 }, { "epoch": 1.657132000591079, "grad_norm": 0.5395019054412842, "learning_rate": 8.68829848585555e-07, "loss": 0.0048, "step": 196250 }, { "epoch": 1.6572164404382428, "grad_norm": 0.3945446014404297, "learning_rate": 8.684147879907256e-07, "loss": 0.0053, "step": 196260 }, { "epoch": 1.6573008802854066, "grad_norm": 0.1849353164434433, "learning_rate": 8.679998171330794e-07, "loss": 0.0052, "step": 196270 }, { "epoch": 1.6573853201325706, "grad_norm": 0.09449958056211472, "learning_rate": 8.675849360216315e-07, "loss": 0.0041, "step": 196280 }, { "epoch": 1.6574697599797346, "grad_norm": 0.151050865650177, "learning_rate": 8.671701446653907e-07, "loss": 0.0057, "step": 196290 }, { "epoch": 1.6575541998268983, "grad_norm": 0.012060267850756645, "learning_rate": 8.667554430733688e-07, "loss": 0.0069, "step": 196300 }, { "epoch": 1.657638639674062, "grad_norm": 0.426688551902771, "learning_rate": 8.663408312545718e-07, "loss": 0.0055, "step": 196310 }, { "epoch": 1.657723079521226, "grad_norm": 0.018644561991095543, "learning_rate": 8.659263092180042e-07, "loss": 0.0063, "step": 196320 }, { "epoch": 1.65780751936839, "grad_norm": 0.3020237386226654, "learning_rate": 8.655118769726694e-07, "loss": 0.0043, "step": 196330 }, { "epoch": 1.6578919592155539, "grad_norm": 0.08685821294784546, "learning_rate": 8.650975345275708e-07, "loss": 0.0038, "step": 196340 }, { "epoch": 1.6579763990627177, "grad_norm": 0.3454330861568451, "learning_rate": 8.64683281891705e-07, "loss": 0.0062, "step": 196350 }, { "epoch": 1.6580608389098814, "grad_norm": 0.11937130242586136, "learning_rate": 8.642691190740721e-07, "loss": 0.0037, "step": 196360 }, { "epoch": 1.6581452787570454, "grad_norm": 0.5126237869262695, "learning_rate": 8.638550460836664e-07, "loss": 0.0076, "step": 196370 }, { "epoch": 1.6582297186042094, "grad_norm": 0.27919697761535645, "learning_rate": 8.634410629294804e-07, "loss": 0.0101, "step": 196380 }, { "epoch": 1.6583141584513732, "grad_norm": 0.3367989659309387, "learning_rate": 8.630271696205078e-07, "loss": 0.0059, "step": 196390 }, { "epoch": 1.658398598298537, "grad_norm": 0.5275723934173584, "learning_rate": 8.626133661657377e-07, "loss": 0.0046, "step": 196400 }, { "epoch": 1.658483038145701, "grad_norm": 0.08532709628343582, "learning_rate": 8.621996525741571e-07, "loss": 0.005, "step": 196410 }, { "epoch": 1.658567477992865, "grad_norm": 0.44401052594184875, "learning_rate": 8.617860288547514e-07, "loss": 0.0055, "step": 196420 }, { "epoch": 1.6586519178400287, "grad_norm": 0.49396899342536926, "learning_rate": 8.613724950165059e-07, "loss": 0.0059, "step": 196430 }, { "epoch": 1.6587363576871925, "grad_norm": 0.002561424160376191, "learning_rate": 8.609590510684008e-07, "loss": 0.0079, "step": 196440 }, { "epoch": 1.6588207975343565, "grad_norm": 0.30225858092308044, "learning_rate": 8.605456970194176e-07, "loss": 0.0052, "step": 196450 }, { "epoch": 1.6589052373815203, "grad_norm": 0.33306458592414856, "learning_rate": 8.601324328785332e-07, "loss": 0.0067, "step": 196460 }, { "epoch": 1.6589896772286843, "grad_norm": 0.3473817706108093, "learning_rate": 8.597192586547248e-07, "loss": 0.0063, "step": 196470 }, { "epoch": 1.659074117075848, "grad_norm": 0.3165987730026245, "learning_rate": 8.593061743569659e-07, "loss": 0.0075, "step": 196480 }, { "epoch": 1.6591585569230118, "grad_norm": 0.16963021457195282, "learning_rate": 8.58893179994228e-07, "loss": 0.0048, "step": 196490 }, { "epoch": 1.6592429967701758, "grad_norm": 0.3868723511695862, "learning_rate": 8.584802755754817e-07, "loss": 0.0081, "step": 196500 }, { "epoch": 1.6593274366173398, "grad_norm": 0.06885997951030731, "learning_rate": 8.58067461109694e-07, "loss": 0.0049, "step": 196510 }, { "epoch": 1.6594118764645036, "grad_norm": 0.08014202862977982, "learning_rate": 8.576547366058335e-07, "loss": 0.0064, "step": 196520 }, { "epoch": 1.6594963163116674, "grad_norm": 0.060802675783634186, "learning_rate": 8.572421020728622e-07, "loss": 0.0053, "step": 196530 }, { "epoch": 1.6595807561588314, "grad_norm": 0.15170186758041382, "learning_rate": 8.56829557519745e-07, "loss": 0.0048, "step": 196540 }, { "epoch": 1.6596651960059954, "grad_norm": 0.549070417881012, "learning_rate": 8.564171029554391e-07, "loss": 0.0055, "step": 196550 }, { "epoch": 1.6597496358531592, "grad_norm": 0.45982521772384644, "learning_rate": 8.560047383889058e-07, "loss": 0.0022, "step": 196560 }, { "epoch": 1.659834075700323, "grad_norm": 0.18204084038734436, "learning_rate": 8.555924638291008e-07, "loss": 0.0072, "step": 196570 }, { "epoch": 1.6599185155474867, "grad_norm": 0.014800443314015865, "learning_rate": 8.551802792849778e-07, "loss": 0.0029, "step": 196580 }, { "epoch": 1.6600029553946507, "grad_norm": 0.011921476572751999, "learning_rate": 8.547681847654899e-07, "loss": 0.0028, "step": 196590 }, { "epoch": 1.6600873952418147, "grad_norm": 0.3374579846858978, "learning_rate": 8.543561802795869e-07, "loss": 0.004, "step": 196600 }, { "epoch": 1.6601718350889785, "grad_norm": 0.07425201684236526, "learning_rate": 8.539442658362185e-07, "loss": 0.0045, "step": 196610 }, { "epoch": 1.6602562749361423, "grad_norm": 0.19427669048309326, "learning_rate": 8.535324414443302e-07, "loss": 0.0069, "step": 196620 }, { "epoch": 1.6603407147833062, "grad_norm": 0.5093969106674194, "learning_rate": 8.531207071128689e-07, "loss": 0.0064, "step": 196630 }, { "epoch": 1.6604251546304702, "grad_norm": 0.5910844206809998, "learning_rate": 8.527090628507761e-07, "loss": 0.0081, "step": 196640 }, { "epoch": 1.660509594477634, "grad_norm": 0.035278256982564926, "learning_rate": 8.522975086669926e-07, "loss": 0.0083, "step": 196650 }, { "epoch": 1.6605940343247978, "grad_norm": 0.5876023769378662, "learning_rate": 8.518860445704558e-07, "loss": 0.009, "step": 196660 }, { "epoch": 1.6606784741719618, "grad_norm": 0.1818966269493103, "learning_rate": 8.514746705701055e-07, "loss": 0.0102, "step": 196670 }, { "epoch": 1.6607629140191258, "grad_norm": 0.5234346985816956, "learning_rate": 8.510633866748736e-07, "loss": 0.0077, "step": 196680 }, { "epoch": 1.6608473538662896, "grad_norm": 0.18052363395690918, "learning_rate": 8.506521928936967e-07, "loss": 0.0067, "step": 196690 }, { "epoch": 1.6609317937134533, "grad_norm": 0.16902375221252441, "learning_rate": 8.502410892355023e-07, "loss": 0.0063, "step": 196700 }, { "epoch": 1.6610162335606171, "grad_norm": 0.23937837779521942, "learning_rate": 8.49830075709222e-07, "loss": 0.0063, "step": 196710 }, { "epoch": 1.6611006734077811, "grad_norm": 0.16340646147727966, "learning_rate": 8.494191523237821e-07, "loss": 0.0051, "step": 196720 }, { "epoch": 1.6611851132549451, "grad_norm": 0.33435845375061035, "learning_rate": 8.490083190881082e-07, "loss": 0.0074, "step": 196730 }, { "epoch": 1.6612695531021089, "grad_norm": 0.27764034271240234, "learning_rate": 8.485975760111226e-07, "loss": 0.006, "step": 196740 }, { "epoch": 1.6613539929492727, "grad_norm": 0.0986834317445755, "learning_rate": 8.481869231017459e-07, "loss": 0.0041, "step": 196750 }, { "epoch": 1.6614384327964367, "grad_norm": 0.06734123080968857, "learning_rate": 8.477763603688993e-07, "loss": 0.009, "step": 196760 }, { "epoch": 1.6615228726436007, "grad_norm": 0.4623628258705139, "learning_rate": 8.473658878214986e-07, "loss": 0.0092, "step": 196770 }, { "epoch": 1.6616073124907644, "grad_norm": 1.0222938060760498, "learning_rate": 8.469555054684603e-07, "loss": 0.0095, "step": 196780 }, { "epoch": 1.6616917523379282, "grad_norm": 0.05457618832588196, "learning_rate": 8.465452133186969e-07, "loss": 0.0101, "step": 196790 }, { "epoch": 1.661776192185092, "grad_norm": 0.32585597038269043, "learning_rate": 8.461350113811212e-07, "loss": 0.0037, "step": 196800 }, { "epoch": 1.661860632032256, "grad_norm": 0.17262913286685944, "learning_rate": 8.457248996646422e-07, "loss": 0.0105, "step": 196810 }, { "epoch": 1.66194507187942, "grad_norm": 0.39057663083076477, "learning_rate": 8.453148781781667e-07, "loss": 0.0105, "step": 196820 }, { "epoch": 1.6620295117265838, "grad_norm": 0.35453128814697266, "learning_rate": 8.449049469306009e-07, "loss": 0.0038, "step": 196830 }, { "epoch": 1.6621139515737475, "grad_norm": 0.1983703076839447, "learning_rate": 8.444951059308465e-07, "loss": 0.005, "step": 196840 }, { "epoch": 1.6621983914209115, "grad_norm": 0.1960776150226593, "learning_rate": 8.440853551878086e-07, "loss": 0.0033, "step": 196850 }, { "epoch": 1.6622828312680755, "grad_norm": 0.03091922029852867, "learning_rate": 8.436756947103841e-07, "loss": 0.0045, "step": 196860 }, { "epoch": 1.6623672711152393, "grad_norm": 0.0882091298699379, "learning_rate": 8.432661245074724e-07, "loss": 0.0042, "step": 196870 }, { "epoch": 1.662451710962403, "grad_norm": 0.09181723743677139, "learning_rate": 8.428566445879677e-07, "loss": 0.0046, "step": 196880 }, { "epoch": 1.662536150809567, "grad_norm": 0.36941102147102356, "learning_rate": 8.424472549607676e-07, "loss": 0.0034, "step": 196890 }, { "epoch": 1.662620590656731, "grad_norm": 0.15716040134429932, "learning_rate": 8.420379556347585e-07, "loss": 0.0041, "step": 196900 }, { "epoch": 1.6627050305038948, "grad_norm": 0.5676669478416443, "learning_rate": 8.416287466188339e-07, "loss": 0.0088, "step": 196910 }, { "epoch": 1.6627894703510586, "grad_norm": 0.40057873725891113, "learning_rate": 8.412196279218799e-07, "loss": 0.0097, "step": 196920 }, { "epoch": 1.6628739101982224, "grad_norm": 0.35823148488998413, "learning_rate": 8.408105995527844e-07, "loss": 0.0033, "step": 196930 }, { "epoch": 1.6629583500453864, "grad_norm": 0.2649095356464386, "learning_rate": 8.404016615204291e-07, "loss": 0.0041, "step": 196940 }, { "epoch": 1.6630427898925504, "grad_norm": 0.40413013100624084, "learning_rate": 8.399928138336988e-07, "loss": 0.0075, "step": 196950 }, { "epoch": 1.6631272297397142, "grad_norm": 0.07610608637332916, "learning_rate": 8.395840565014718e-07, "loss": 0.0068, "step": 196960 }, { "epoch": 1.663211669586878, "grad_norm": 0.002029448514804244, "learning_rate": 8.391753895326272e-07, "loss": 0.0139, "step": 196970 }, { "epoch": 1.663296109434042, "grad_norm": 0.006434504874050617, "learning_rate": 8.387668129360399e-07, "loss": 0.0053, "step": 196980 }, { "epoch": 1.663380549281206, "grad_norm": 0.45974573493003845, "learning_rate": 8.383583267205835e-07, "loss": 0.0064, "step": 196990 }, { "epoch": 1.6634649891283697, "grad_norm": 0.23711653053760529, "learning_rate": 8.379499308951328e-07, "loss": 0.0089, "step": 197000 }, { "epoch": 1.6635494289755335, "grad_norm": 0.12298395484685898, "learning_rate": 8.375416254685554e-07, "loss": 0.0064, "step": 197010 }, { "epoch": 1.6636338688226975, "grad_norm": 0.024603845551609993, "learning_rate": 8.371334104497219e-07, "loss": 0.005, "step": 197020 }, { "epoch": 1.6637183086698613, "grad_norm": 0.38096436858177185, "learning_rate": 8.367252858474972e-07, "loss": 0.0069, "step": 197030 }, { "epoch": 1.6638027485170253, "grad_norm": 0.22609843313694, "learning_rate": 8.363172516707468e-07, "loss": 0.0056, "step": 197040 }, { "epoch": 1.663887188364189, "grad_norm": 0.24204692244529724, "learning_rate": 8.359093079283325e-07, "loss": 0.0065, "step": 197050 }, { "epoch": 1.6639716282113528, "grad_norm": 0.5349586606025696, "learning_rate": 8.355014546291152e-07, "loss": 0.0069, "step": 197060 }, { "epoch": 1.6640560680585168, "grad_norm": 0.2893389165401459, "learning_rate": 8.350936917819524e-07, "loss": 0.0119, "step": 197070 }, { "epoch": 1.6641405079056808, "grad_norm": 0.25524336099624634, "learning_rate": 8.346860193957002e-07, "loss": 0.0069, "step": 197080 }, { "epoch": 1.6642249477528446, "grad_norm": 0.4871387183666229, "learning_rate": 8.342784374792152e-07, "loss": 0.0108, "step": 197090 }, { "epoch": 1.6643093876000083, "grad_norm": 0.44260522723197937, "learning_rate": 8.338709460413474e-07, "loss": 0.0032, "step": 197100 }, { "epoch": 1.6643938274471723, "grad_norm": 0.12719163298606873, "learning_rate": 8.334635450909506e-07, "loss": 0.0059, "step": 197110 }, { "epoch": 1.6644782672943363, "grad_norm": 0.4015505909919739, "learning_rate": 8.330562346368704e-07, "loss": 0.004, "step": 197120 }, { "epoch": 1.6645627071415001, "grad_norm": 0.31636667251586914, "learning_rate": 8.326490146879562e-07, "loss": 0.0079, "step": 197130 }, { "epoch": 1.664647146988664, "grad_norm": 0.2808685302734375, "learning_rate": 8.322418852530511e-07, "loss": 0.0046, "step": 197140 }, { "epoch": 1.6647315868358277, "grad_norm": 0.08871496468782425, "learning_rate": 8.318348463409986e-07, "loss": 0.0029, "step": 197150 }, { "epoch": 1.6648160266829917, "grad_norm": 0.05270885303616524, "learning_rate": 8.314278979606377e-07, "loss": 0.0029, "step": 197160 }, { "epoch": 1.6649004665301557, "grad_norm": 0.27056989073753357, "learning_rate": 8.310210401208102e-07, "loss": 0.015, "step": 197170 }, { "epoch": 1.6649849063773194, "grad_norm": 0.010436422191560268, "learning_rate": 8.306142728303501e-07, "loss": 0.0053, "step": 197180 }, { "epoch": 1.6650693462244832, "grad_norm": 0.1068320944905281, "learning_rate": 8.302075960980943e-07, "loss": 0.0056, "step": 197190 }, { "epoch": 1.6651537860716472, "grad_norm": 0.17078326642513275, "learning_rate": 8.298010099328751e-07, "loss": 0.0085, "step": 197200 }, { "epoch": 1.6652382259188112, "grad_norm": 1.0315337181091309, "learning_rate": 8.293945143435239e-07, "loss": 0.006, "step": 197210 }, { "epoch": 1.665322665765975, "grad_norm": 0.13356788456439972, "learning_rate": 8.289881093388691e-07, "loss": 0.0088, "step": 197220 }, { "epoch": 1.6654071056131388, "grad_norm": 0.10139010846614838, "learning_rate": 8.285817949277364e-07, "loss": 0.0052, "step": 197230 }, { "epoch": 1.6654915454603028, "grad_norm": 0.02169545367360115, "learning_rate": 8.281755711189538e-07, "loss": 0.0054, "step": 197240 }, { "epoch": 1.6655759853074668, "grad_norm": 0.16756106913089752, "learning_rate": 8.277694379213414e-07, "loss": 0.005, "step": 197250 }, { "epoch": 1.6656604251546305, "grad_norm": 0.2404859960079193, "learning_rate": 8.273633953437232e-07, "loss": 0.0042, "step": 197260 }, { "epoch": 1.6657448650017943, "grad_norm": 0.11736541986465454, "learning_rate": 8.269574433949156e-07, "loss": 0.0046, "step": 197270 }, { "epoch": 1.665829304848958, "grad_norm": 0.27231544256210327, "learning_rate": 8.265515820837389e-07, "loss": 0.0086, "step": 197280 }, { "epoch": 1.665913744696122, "grad_norm": 0.01580238528549671, "learning_rate": 8.261458114190063e-07, "loss": 0.0045, "step": 197290 }, { "epoch": 1.665998184543286, "grad_norm": 0.2808888256549835, "learning_rate": 8.257401314095309e-07, "loss": 0.0062, "step": 197300 }, { "epoch": 1.6660826243904499, "grad_norm": 0.11158400774002075, "learning_rate": 8.253345420641252e-07, "loss": 0.0045, "step": 197310 }, { "epoch": 1.6661670642376136, "grad_norm": 0.6170568466186523, "learning_rate": 8.249290433915963e-07, "loss": 0.0058, "step": 197320 }, { "epoch": 1.6662515040847776, "grad_norm": 0.17244352400302887, "learning_rate": 8.245236354007541e-07, "loss": 0.0045, "step": 197330 }, { "epoch": 1.6663359439319416, "grad_norm": 0.20290160179138184, "learning_rate": 8.24118318100402e-07, "loss": 0.0086, "step": 197340 }, { "epoch": 1.6664203837791054, "grad_norm": 0.11813674867153168, "learning_rate": 8.237130914993452e-07, "loss": 0.0081, "step": 197350 }, { "epoch": 1.6665048236262692, "grad_norm": 0.22978879511356354, "learning_rate": 8.233079556063833e-07, "loss": 0.007, "step": 197360 }, { "epoch": 1.666589263473433, "grad_norm": 0.10348935425281525, "learning_rate": 8.229029104303177e-07, "loss": 0.0139, "step": 197370 }, { "epoch": 1.666673703320597, "grad_norm": 0.04321354255080223, "learning_rate": 8.224979559799451e-07, "loss": 0.0087, "step": 197380 }, { "epoch": 1.666758143167761, "grad_norm": 0.2667495012283325, "learning_rate": 8.220930922640608e-07, "loss": 0.0084, "step": 197390 }, { "epoch": 1.6668425830149247, "grad_norm": 0.25489866733551025, "learning_rate": 8.216883192914571e-07, "loss": 0.005, "step": 197400 }, { "epoch": 1.6669270228620885, "grad_norm": 0.26184961199760437, "learning_rate": 8.212836370709282e-07, "loss": 0.0072, "step": 197410 }, { "epoch": 1.6670114627092525, "grad_norm": 0.38303372263908386, "learning_rate": 8.20879045611262e-07, "loss": 0.0039, "step": 197420 }, { "epoch": 1.6670959025564165, "grad_norm": 0.395389586687088, "learning_rate": 8.204745449212459e-07, "loss": 0.0053, "step": 197430 }, { "epoch": 1.6671803424035803, "grad_norm": 0.13309429585933685, "learning_rate": 8.200701350096674e-07, "loss": 0.0089, "step": 197440 }, { "epoch": 1.667264782250744, "grad_norm": 0.0010505698155611753, "learning_rate": 8.196658158853071e-07, "loss": 0.0092, "step": 197450 }, { "epoch": 1.667349222097908, "grad_norm": 0.26214274764060974, "learning_rate": 8.192615875569505e-07, "loss": 0.0059, "step": 197460 }, { "epoch": 1.667433661945072, "grad_norm": 0.5047287940979004, "learning_rate": 8.188574500333757e-07, "loss": 0.0104, "step": 197470 }, { "epoch": 1.6675181017922358, "grad_norm": 0.07256511598825455, "learning_rate": 8.184534033233598e-07, "loss": 0.0049, "step": 197480 }, { "epoch": 1.6676025416393996, "grad_norm": 0.061416372656822205, "learning_rate": 8.180494474356781e-07, "loss": 0.0092, "step": 197490 }, { "epoch": 1.6676869814865634, "grad_norm": 0.3233337700366974, "learning_rate": 8.176455823791063e-07, "loss": 0.0054, "step": 197500 }, { "epoch": 1.6677714213337274, "grad_norm": 0.12100085616111755, "learning_rate": 8.172418081624145e-07, "loss": 0.01, "step": 197510 }, { "epoch": 1.6678558611808914, "grad_norm": 0.4842434525489807, "learning_rate": 8.168381247943752e-07, "loss": 0.0045, "step": 197520 }, { "epoch": 1.6679403010280551, "grad_norm": 0.07906617224216461, "learning_rate": 8.164345322837542e-07, "loss": 0.0055, "step": 197530 }, { "epoch": 1.668024740875219, "grad_norm": 0.14434872567653656, "learning_rate": 8.160310306393176e-07, "loss": 0.0039, "step": 197540 }, { "epoch": 1.668109180722383, "grad_norm": 0.3455774188041687, "learning_rate": 8.156276198698299e-07, "loss": 0.0032, "step": 197550 }, { "epoch": 1.668193620569547, "grad_norm": 0.19221071898937225, "learning_rate": 8.152242999840515e-07, "loss": 0.0073, "step": 197560 }, { "epoch": 1.6682780604167107, "grad_norm": 0.20858041942119598, "learning_rate": 8.14821070990745e-07, "loss": 0.0199, "step": 197570 }, { "epoch": 1.6683625002638744, "grad_norm": 0.7469038963317871, "learning_rate": 8.144179328986657e-07, "loss": 0.0056, "step": 197580 }, { "epoch": 1.6684469401110384, "grad_norm": 0.1882641762495041, "learning_rate": 8.140148857165731e-07, "loss": 0.0052, "step": 197590 }, { "epoch": 1.6685313799582022, "grad_norm": 0.056023988872766495, "learning_rate": 8.136119294532175e-07, "loss": 0.005, "step": 197600 }, { "epoch": 1.6686158198053662, "grad_norm": 0.2999013662338257, "learning_rate": 8.132090641173546e-07, "loss": 0.0057, "step": 197610 }, { "epoch": 1.66870025965253, "grad_norm": 0.5257301926612854, "learning_rate": 8.128062897177324e-07, "loss": 0.0114, "step": 197620 }, { "epoch": 1.6687846994996938, "grad_norm": 0.04854792356491089, "learning_rate": 8.124036062631002e-07, "loss": 0.0025, "step": 197630 }, { "epoch": 1.6688691393468578, "grad_norm": 0.4622918963432312, "learning_rate": 8.12001013762202e-07, "loss": 0.0063, "step": 197640 }, { "epoch": 1.6689535791940218, "grad_norm": 0.3430914282798767, "learning_rate": 8.115985122237846e-07, "loss": 0.0108, "step": 197650 }, { "epoch": 1.6690380190411855, "grad_norm": 0.026903541758656502, "learning_rate": 8.111961016565894e-07, "loss": 0.0084, "step": 197660 }, { "epoch": 1.6691224588883493, "grad_norm": 0.19218717515468597, "learning_rate": 8.107937820693556e-07, "loss": 0.0051, "step": 197670 }, { "epoch": 1.6692068987355133, "grad_norm": 0.12654384970664978, "learning_rate": 8.103915534708234e-07, "loss": 0.0056, "step": 197680 }, { "epoch": 1.6692913385826773, "grad_norm": 0.066302590072155, "learning_rate": 8.099894158697269e-07, "loss": 0.0043, "step": 197690 }, { "epoch": 1.669375778429841, "grad_norm": 0.0011417089262977242, "learning_rate": 8.095873692748024e-07, "loss": 0.0062, "step": 197700 }, { "epoch": 1.6694602182770049, "grad_norm": 0.23896580934524536, "learning_rate": 8.091854136947824e-07, "loss": 0.0068, "step": 197710 }, { "epoch": 1.6695446581241686, "grad_norm": 0.09926792234182358, "learning_rate": 8.087835491383961e-07, "loss": 0.0054, "step": 197720 }, { "epoch": 1.6696290979713326, "grad_norm": 0.2831880748271942, "learning_rate": 8.083817756143708e-07, "loss": 0.0086, "step": 197730 }, { "epoch": 1.6697135378184966, "grad_norm": 0.3283390998840332, "learning_rate": 8.079800931314357e-07, "loss": 0.007, "step": 197740 }, { "epoch": 1.6697979776656604, "grad_norm": 0.08919303864240646, "learning_rate": 8.075785016983129e-07, "loss": 0.0089, "step": 197750 }, { "epoch": 1.6698824175128242, "grad_norm": 0.16389796137809753, "learning_rate": 8.071770013237268e-07, "loss": 0.004, "step": 197760 }, { "epoch": 1.6699668573599882, "grad_norm": 0.037930238991975784, "learning_rate": 8.067755920163961e-07, "loss": 0.0041, "step": 197770 }, { "epoch": 1.6700512972071522, "grad_norm": 0.5194370150566101, "learning_rate": 8.063742737850417e-07, "loss": 0.0062, "step": 197780 }, { "epoch": 1.670135737054316, "grad_norm": 0.18185709416866302, "learning_rate": 8.059730466383798e-07, "loss": 0.005, "step": 197790 }, { "epoch": 1.6702201769014797, "grad_norm": 0.19447822868824005, "learning_rate": 8.055719105851212e-07, "loss": 0.0052, "step": 197800 }, { "epoch": 1.6703046167486437, "grad_norm": 0.2606687545776367, "learning_rate": 8.051708656339824e-07, "loss": 0.0053, "step": 197810 }, { "epoch": 1.6703890565958077, "grad_norm": 0.06022704020142555, "learning_rate": 8.047699117936719e-07, "loss": 0.005, "step": 197820 }, { "epoch": 1.6704734964429715, "grad_norm": 0.31858521699905396, "learning_rate": 8.043690490729e-07, "loss": 0.0061, "step": 197830 }, { "epoch": 1.6705579362901353, "grad_norm": 0.14861205220222473, "learning_rate": 8.039682774803714e-07, "loss": 0.0054, "step": 197840 }, { "epoch": 1.670642376137299, "grad_norm": 0.08790941536426544, "learning_rate": 8.035675970247936e-07, "loss": 0.0107, "step": 197850 }, { "epoch": 1.670726815984463, "grad_norm": 0.271201491355896, "learning_rate": 8.031670077148673e-07, "loss": 0.0087, "step": 197860 }, { "epoch": 1.670811255831627, "grad_norm": 0.20734137296676636, "learning_rate": 8.027665095592941e-07, "loss": 0.0075, "step": 197870 }, { "epoch": 1.6708956956787908, "grad_norm": 0.11287426203489304, "learning_rate": 8.023661025667706e-07, "loss": 0.0044, "step": 197880 }, { "epoch": 1.6709801355259546, "grad_norm": 0.292178213596344, "learning_rate": 8.019657867459962e-07, "loss": 0.0087, "step": 197890 }, { "epoch": 1.6710645753731186, "grad_norm": 0.20666082203388214, "learning_rate": 8.01565562105665e-07, "loss": 0.0113, "step": 197900 }, { "epoch": 1.6711490152202826, "grad_norm": 0.19862288236618042, "learning_rate": 8.011654286544685e-07, "loss": 0.0056, "step": 197910 }, { "epoch": 1.6712334550674464, "grad_norm": 0.3449951410293579, "learning_rate": 8.007653864010989e-07, "loss": 0.0069, "step": 197920 }, { "epoch": 1.6713178949146101, "grad_norm": 0.29354625940322876, "learning_rate": 8.00365435354244e-07, "loss": 0.0045, "step": 197930 }, { "epoch": 1.6714023347617741, "grad_norm": 0.2871437668800354, "learning_rate": 7.999655755225921e-07, "loss": 0.0079, "step": 197940 }, { "epoch": 1.671486774608938, "grad_norm": 0.36275118589401245, "learning_rate": 7.995658069148276e-07, "loss": 0.006, "step": 197950 }, { "epoch": 1.671571214456102, "grad_norm": 0.2778639793395996, "learning_rate": 7.99166129539633e-07, "loss": 0.0058, "step": 197960 }, { "epoch": 1.6716556543032657, "grad_norm": 0.21171429753303528, "learning_rate": 7.98766543405688e-07, "loss": 0.0039, "step": 197970 }, { "epoch": 1.6717400941504295, "grad_norm": 0.23116907477378845, "learning_rate": 7.983670485216733e-07, "loss": 0.0076, "step": 197980 }, { "epoch": 1.6718245339975935, "grad_norm": 0.4070948362350464, "learning_rate": 7.979676448962648e-07, "loss": 0.0055, "step": 197990 }, { "epoch": 1.6719089738447575, "grad_norm": 0.2016516774892807, "learning_rate": 7.975683325381389e-07, "loss": 0.0045, "step": 198000 }, { "epoch": 1.6719934136919212, "grad_norm": 0.15643349289894104, "learning_rate": 7.971691114559683e-07, "loss": 0.0061, "step": 198010 }, { "epoch": 1.672077853539085, "grad_norm": 0.3150442838668823, "learning_rate": 7.967699816584213e-07, "loss": 0.0055, "step": 198020 }, { "epoch": 1.672162293386249, "grad_norm": 0.10313758254051208, "learning_rate": 7.963709431541705e-07, "loss": 0.005, "step": 198030 }, { "epoch": 1.672246733233413, "grad_norm": 0.004037763923406601, "learning_rate": 7.959719959518814e-07, "loss": 0.0087, "step": 198040 }, { "epoch": 1.6723311730805768, "grad_norm": 0.03545515239238739, "learning_rate": 7.95573140060219e-07, "loss": 0.0042, "step": 198050 }, { "epoch": 1.6724156129277405, "grad_norm": 0.361585795879364, "learning_rate": 7.951743754878455e-07, "loss": 0.0054, "step": 198060 }, { "epoch": 1.6725000527749043, "grad_norm": 0.15943068265914917, "learning_rate": 7.947757022434238e-07, "loss": 0.0027, "step": 198070 }, { "epoch": 1.6725844926220683, "grad_norm": 0.30800390243530273, "learning_rate": 7.94377120335611e-07, "loss": 0.0033, "step": 198080 }, { "epoch": 1.6726689324692323, "grad_norm": 0.3988848626613617, "learning_rate": 7.939786297730668e-07, "loss": 0.012, "step": 198090 }, { "epoch": 1.672753372316396, "grad_norm": 0.21768809854984283, "learning_rate": 7.93580230564443e-07, "loss": 0.0028, "step": 198100 }, { "epoch": 1.6728378121635599, "grad_norm": 0.01917785033583641, "learning_rate": 7.931819227183974e-07, "loss": 0.0053, "step": 198110 }, { "epoch": 1.6729222520107239, "grad_norm": 0.131780207157135, "learning_rate": 7.927837062435761e-07, "loss": 0.015, "step": 198120 }, { "epoch": 1.6730066918578879, "grad_norm": 0.05698626860976219, "learning_rate": 7.92385581148632e-07, "loss": 0.0038, "step": 198130 }, { "epoch": 1.6730911317050516, "grad_norm": 0.46603095531463623, "learning_rate": 7.919875474422101e-07, "loss": 0.0049, "step": 198140 }, { "epoch": 1.6731755715522154, "grad_norm": 0.029787233099341393, "learning_rate": 7.915896051329558e-07, "loss": 0.0103, "step": 198150 }, { "epoch": 1.6732600113993794, "grad_norm": 0.09743566066026688, "learning_rate": 7.911917542295138e-07, "loss": 0.0071, "step": 198160 }, { "epoch": 1.6733444512465434, "grad_norm": 0.4210813641548157, "learning_rate": 7.907939947405236e-07, "loss": 0.0032, "step": 198170 }, { "epoch": 1.6734288910937072, "grad_norm": 0.03608599677681923, "learning_rate": 7.903963266746262e-07, "loss": 0.0068, "step": 198180 }, { "epoch": 1.673513330940871, "grad_norm": 0.12833577394485474, "learning_rate": 7.899987500404577e-07, "loss": 0.0062, "step": 198190 }, { "epoch": 1.6735977707880347, "grad_norm": 0.19769905507564545, "learning_rate": 7.896012648466539e-07, "loss": 0.0058, "step": 198200 }, { "epoch": 1.6736822106351987, "grad_norm": 0.15977853536605835, "learning_rate": 7.892038711018468e-07, "loss": 0.0079, "step": 198210 }, { "epoch": 1.6737666504823627, "grad_norm": 0.04598601907491684, "learning_rate": 7.888065688146696e-07, "loss": 0.015, "step": 198220 }, { "epoch": 1.6738510903295265, "grad_norm": 0.13641685247421265, "learning_rate": 7.884093579937496e-07, "loss": 0.0085, "step": 198230 }, { "epoch": 1.6739355301766903, "grad_norm": 0.42192649841308594, "learning_rate": 7.880122386477168e-07, "loss": 0.0087, "step": 198240 }, { "epoch": 1.6740199700238543, "grad_norm": 0.35471922159194946, "learning_rate": 7.876152107851953e-07, "loss": 0.0057, "step": 198250 }, { "epoch": 1.6741044098710183, "grad_norm": 0.11641980707645416, "learning_rate": 7.872182744148066e-07, "loss": 0.0032, "step": 198260 }, { "epoch": 1.674188849718182, "grad_norm": 0.36293837428092957, "learning_rate": 7.868214295451748e-07, "loss": 0.0063, "step": 198270 }, { "epoch": 1.6742732895653458, "grad_norm": 0.37125200033187866, "learning_rate": 7.864246761849181e-07, "loss": 0.0044, "step": 198280 }, { "epoch": 1.6743577294125096, "grad_norm": 0.28289714455604553, "learning_rate": 7.860280143426546e-07, "loss": 0.0028, "step": 198290 }, { "epoch": 1.6744421692596736, "grad_norm": 0.12509170174598694, "learning_rate": 7.856314440269975e-07, "loss": 0.005, "step": 198300 }, { "epoch": 1.6745266091068376, "grad_norm": 0.41754674911499023, "learning_rate": 7.852349652465629e-07, "loss": 0.0088, "step": 198310 }, { "epoch": 1.6746110489540014, "grad_norm": 0.594588577747345, "learning_rate": 7.848385780099599e-07, "loss": 0.0077, "step": 198320 }, { "epoch": 1.6746954888011651, "grad_norm": 0.05661500617861748, "learning_rate": 7.844422823258008e-07, "loss": 0.006, "step": 198330 }, { "epoch": 1.6747799286483291, "grad_norm": 0.05989556759595871, "learning_rate": 7.840460782026899e-07, "loss": 0.0124, "step": 198340 }, { "epoch": 1.6748643684954931, "grad_norm": 0.20385777950286865, "learning_rate": 7.836499656492358e-07, "loss": 0.0061, "step": 198350 }, { "epoch": 1.674948808342657, "grad_norm": 0.14203257858753204, "learning_rate": 7.832539446740405e-07, "loss": 0.0039, "step": 198360 }, { "epoch": 1.6750332481898207, "grad_norm": 0.25577661395072937, "learning_rate": 7.828580152857052e-07, "loss": 0.0073, "step": 198370 }, { "epoch": 1.6751176880369847, "grad_norm": 0.07262439280748367, "learning_rate": 7.824621774928299e-07, "loss": 0.0057, "step": 198380 }, { "epoch": 1.6752021278841487, "grad_norm": 0.19307570159435272, "learning_rate": 7.8206643130401e-07, "loss": 0.0082, "step": 198390 }, { "epoch": 1.6752865677313125, "grad_norm": 0.07645147293806076, "learning_rate": 7.816707767278448e-07, "loss": 0.0035, "step": 198400 }, { "epoch": 1.6753710075784762, "grad_norm": 0.1373286098241806, "learning_rate": 7.81275213772924e-07, "loss": 0.0045, "step": 198410 }, { "epoch": 1.67545544742564, "grad_norm": 0.44756102561950684, "learning_rate": 7.808797424478426e-07, "loss": 0.01, "step": 198420 }, { "epoch": 1.675539887272804, "grad_norm": 0.1805514395236969, "learning_rate": 7.804843627611891e-07, "loss": 0.0076, "step": 198430 }, { "epoch": 1.675624327119968, "grad_norm": 0.0586024634540081, "learning_rate": 7.8008907472155e-07, "loss": 0.0119, "step": 198440 }, { "epoch": 1.6757087669671318, "grad_norm": 0.41207456588745117, "learning_rate": 7.796938783375102e-07, "loss": 0.0026, "step": 198450 }, { "epoch": 1.6757932068142956, "grad_norm": 0.15396589040756226, "learning_rate": 7.79298773617656e-07, "loss": 0.0084, "step": 198460 }, { "epoch": 1.6758776466614596, "grad_norm": 0.05152503028512001, "learning_rate": 7.789037605705663e-07, "loss": 0.0056, "step": 198470 }, { "epoch": 1.6759620865086235, "grad_norm": 0.3537181317806244, "learning_rate": 7.785088392048229e-07, "loss": 0.0128, "step": 198480 }, { "epoch": 1.6760465263557873, "grad_norm": 0.4543892741203308, "learning_rate": 7.78114009529003e-07, "loss": 0.0065, "step": 198490 }, { "epoch": 1.676130966202951, "grad_norm": 0.0698976069688797, "learning_rate": 7.777192715516802e-07, "loss": 0.0046, "step": 198500 }, { "epoch": 1.676215406050115, "grad_norm": 0.20550453662872314, "learning_rate": 7.773246252814304e-07, "loss": 0.0049, "step": 198510 }, { "epoch": 1.6762998458972789, "grad_norm": 0.2796315550804138, "learning_rate": 7.769300707268252e-07, "loss": 0.0113, "step": 198520 }, { "epoch": 1.6763842857444429, "grad_norm": 0.5319380164146423, "learning_rate": 7.765356078964325e-07, "loss": 0.0076, "step": 198530 }, { "epoch": 1.6764687255916066, "grad_norm": 0.004305204376578331, "learning_rate": 7.761412367988202e-07, "loss": 0.0036, "step": 198540 }, { "epoch": 1.6765531654387704, "grad_norm": 0.13779160380363464, "learning_rate": 7.757469574425563e-07, "loss": 0.0047, "step": 198550 }, { "epoch": 1.6766376052859344, "grad_norm": 0.3140757381916046, "learning_rate": 7.753527698362012e-07, "loss": 0.0078, "step": 198560 }, { "epoch": 1.6767220451330984, "grad_norm": 0.48998206853866577, "learning_rate": 7.749586739883197e-07, "loss": 0.0043, "step": 198570 }, { "epoch": 1.6768064849802622, "grad_norm": 0.1371280699968338, "learning_rate": 7.745646699074682e-07, "loss": 0.0041, "step": 198580 }, { "epoch": 1.676890924827426, "grad_norm": 0.10099484771490097, "learning_rate": 7.741707576022079e-07, "loss": 0.0031, "step": 198590 }, { "epoch": 1.67697536467459, "grad_norm": 0.21750813722610474, "learning_rate": 7.737769370810927e-07, "loss": 0.0055, "step": 198600 }, { "epoch": 1.677059804521754, "grad_norm": 0.16670313477516174, "learning_rate": 7.733832083526765e-07, "loss": 0.005, "step": 198610 }, { "epoch": 1.6771442443689177, "grad_norm": 0.35270804166793823, "learning_rate": 7.729895714255109e-07, "loss": 0.0048, "step": 198620 }, { "epoch": 1.6772286842160815, "grad_norm": 0.5197751522064209, "learning_rate": 7.725960263081445e-07, "loss": 0.0071, "step": 198630 }, { "epoch": 1.6773131240632453, "grad_norm": 0.16612029075622559, "learning_rate": 7.72202573009127e-07, "loss": 0.0039, "step": 198640 }, { "epoch": 1.6773975639104093, "grad_norm": 0.136849507689476, "learning_rate": 7.718092115370019e-07, "loss": 0.0069, "step": 198650 }, { "epoch": 1.6774820037575733, "grad_norm": 0.1333499401807785, "learning_rate": 7.714159419003159e-07, "loss": 0.008, "step": 198660 }, { "epoch": 1.677566443604737, "grad_norm": 0.3566865622997284, "learning_rate": 7.710227641076079e-07, "loss": 0.006, "step": 198670 }, { "epoch": 1.6776508834519008, "grad_norm": 0.44564196467399597, "learning_rate": 7.706296781674199e-07, "loss": 0.004, "step": 198680 }, { "epoch": 1.6777353232990648, "grad_norm": 0.14594316482543945, "learning_rate": 7.702366840882885e-07, "loss": 0.0033, "step": 198690 }, { "epoch": 1.6778197631462288, "grad_norm": 0.012951108627021313, "learning_rate": 7.698437818787502e-07, "loss": 0.0068, "step": 198700 }, { "epoch": 1.6779042029933926, "grad_norm": 0.6851198673248291, "learning_rate": 7.694509715473364e-07, "loss": 0.0049, "step": 198710 }, { "epoch": 1.6779886428405564, "grad_norm": 0.18046817183494568, "learning_rate": 7.690582531025815e-07, "loss": 0.0053, "step": 198720 }, { "epoch": 1.6780730826877204, "grad_norm": 0.37042486667633057, "learning_rate": 7.686656265530146e-07, "loss": 0.0088, "step": 198730 }, { "epoch": 1.6781575225348844, "grad_norm": 0.01234622485935688, "learning_rate": 7.682730919071624e-07, "loss": 0.0066, "step": 198740 }, { "epoch": 1.6782419623820481, "grad_norm": 0.36973774433135986, "learning_rate": 7.678806491735524e-07, "loss": 0.0093, "step": 198750 }, { "epoch": 1.678326402229212, "grad_norm": 0.10969101637601852, "learning_rate": 7.674882983607074e-07, "loss": 0.01, "step": 198760 }, { "epoch": 1.6784108420763757, "grad_norm": 0.10728654265403748, "learning_rate": 7.670960394771493e-07, "loss": 0.0077, "step": 198770 }, { "epoch": 1.6784952819235397, "grad_norm": 0.19028697907924652, "learning_rate": 7.667038725313964e-07, "loss": 0.0052, "step": 198780 }, { "epoch": 1.6785797217707037, "grad_norm": 0.040810953825712204, "learning_rate": 7.663117975319695e-07, "loss": 0.0063, "step": 198790 }, { "epoch": 1.6786641616178675, "grad_norm": 0.18633706867694855, "learning_rate": 7.659198144873814e-07, "loss": 0.0023, "step": 198800 }, { "epoch": 1.6787486014650312, "grad_norm": 0.12314063310623169, "learning_rate": 7.65527923406148e-07, "loss": 0.0053, "step": 198810 }, { "epoch": 1.6788330413121952, "grad_norm": 0.057259202003479004, "learning_rate": 7.651361242967798e-07, "loss": 0.0028, "step": 198820 }, { "epoch": 1.6789174811593592, "grad_norm": 0.07491375505924225, "learning_rate": 7.647444171677882e-07, "loss": 0.0054, "step": 198830 }, { "epoch": 1.679001921006523, "grad_norm": 0.010332560166716576, "learning_rate": 7.643528020276802e-07, "loss": 0.0056, "step": 198840 }, { "epoch": 1.6790863608536868, "grad_norm": 0.9227121472358704, "learning_rate": 7.63961278884961e-07, "loss": 0.006, "step": 198850 }, { "epoch": 1.6791708007008508, "grad_norm": 0.000638508761767298, "learning_rate": 7.635698477481352e-07, "loss": 0.0044, "step": 198860 }, { "epoch": 1.6792552405480146, "grad_norm": 0.40141791105270386, "learning_rate": 7.631785086257026e-07, "loss": 0.0057, "step": 198870 }, { "epoch": 1.6793396803951786, "grad_norm": 0.272007554769516, "learning_rate": 7.627872615261655e-07, "loss": 0.0075, "step": 198880 }, { "epoch": 1.6794241202423423, "grad_norm": 0.08168049901723862, "learning_rate": 7.623961064580204e-07, "loss": 0.0035, "step": 198890 }, { "epoch": 1.679508560089506, "grad_norm": 0.48858878016471863, "learning_rate": 7.620050434297643e-07, "loss": 0.0037, "step": 198900 }, { "epoch": 1.67959299993667, "grad_norm": 1.4271873235702515, "learning_rate": 7.616140724498888e-07, "loss": 0.0075, "step": 198910 }, { "epoch": 1.679677439783834, "grad_norm": 0.459751695394516, "learning_rate": 7.612231935268882e-07, "loss": 0.0049, "step": 198920 }, { "epoch": 1.6797618796309979, "grad_norm": 0.06355112046003342, "learning_rate": 7.608324066692519e-07, "loss": 0.0064, "step": 198930 }, { "epoch": 1.6798463194781617, "grad_norm": 0.1410302072763443, "learning_rate": 7.604417118854668e-07, "loss": 0.0034, "step": 198940 }, { "epoch": 1.6799307593253257, "grad_norm": 0.2394833117723465, "learning_rate": 7.600511091840174e-07, "loss": 0.0048, "step": 198950 }, { "epoch": 1.6800151991724896, "grad_norm": 0.7755501866340637, "learning_rate": 7.596605985733907e-07, "loss": 0.0101, "step": 198960 }, { "epoch": 1.6800996390196534, "grad_norm": 0.3660961389541626, "learning_rate": 7.592701800620667e-07, "loss": 0.0087, "step": 198970 }, { "epoch": 1.6801840788668172, "grad_norm": 0.36831361055374146, "learning_rate": 7.58879853658524e-07, "loss": 0.007, "step": 198980 }, { "epoch": 1.680268518713981, "grad_norm": 0.4672396779060364, "learning_rate": 7.584896193712432e-07, "loss": 0.0101, "step": 198990 }, { "epoch": 1.680352958561145, "grad_norm": 0.5445511937141418, "learning_rate": 7.580994772086975e-07, "loss": 0.0071, "step": 199000 }, { "epoch": 1.680437398408309, "grad_norm": 0.3689594268798828, "learning_rate": 7.57709427179364e-07, "loss": 0.0096, "step": 199010 }, { "epoch": 1.6805218382554727, "grad_norm": 0.3374733626842499, "learning_rate": 7.573194692917101e-07, "loss": 0.0055, "step": 199020 }, { "epoch": 1.6806062781026365, "grad_norm": 0.15438027679920197, "learning_rate": 7.569296035542096e-07, "loss": 0.0065, "step": 199030 }, { "epoch": 1.6806907179498005, "grad_norm": 0.6078308820724487, "learning_rate": 7.565398299753268e-07, "loss": 0.0077, "step": 199040 }, { "epoch": 1.6807751577969645, "grad_norm": 0.22662757337093353, "learning_rate": 7.561501485635309e-07, "loss": 0.0055, "step": 199050 }, { "epoch": 1.6808595976441283, "grad_norm": 0.41582804918289185, "learning_rate": 7.557605593272832e-07, "loss": 0.0062, "step": 199060 }, { "epoch": 1.680944037491292, "grad_norm": 0.10346340388059616, "learning_rate": 7.553710622750471e-07, "loss": 0.0089, "step": 199070 }, { "epoch": 1.681028477338456, "grad_norm": 0.06636784970760345, "learning_rate": 7.549816574152819e-07, "loss": 0.0045, "step": 199080 }, { "epoch": 1.68111291718562, "grad_norm": 0.02611277811229229, "learning_rate": 7.545923447564451e-07, "loss": 0.0107, "step": 199090 }, { "epoch": 1.6811973570327838, "grad_norm": 0.7467963099479675, "learning_rate": 7.542031243069925e-07, "loss": 0.0053, "step": 199100 }, { "epoch": 1.6812817968799476, "grad_norm": 0.27880606055259705, "learning_rate": 7.538139960753771e-07, "loss": 0.0069, "step": 199110 }, { "epoch": 1.6813662367271114, "grad_norm": 0.04939423128962517, "learning_rate": 7.534249600700528e-07, "loss": 0.0089, "step": 199120 }, { "epoch": 1.6814506765742754, "grad_norm": 0.17116110026836395, "learning_rate": 7.530360162994671e-07, "loss": 0.0097, "step": 199130 }, { "epoch": 1.6815351164214394, "grad_norm": 0.7291713953018188, "learning_rate": 7.526471647720696e-07, "loss": 0.0056, "step": 199140 }, { "epoch": 1.6816195562686032, "grad_norm": 0.09485426545143127, "learning_rate": 7.522584054963044e-07, "loss": 0.0048, "step": 199150 }, { "epoch": 1.681703996115767, "grad_norm": 0.0719342976808548, "learning_rate": 7.518697384806173e-07, "loss": 0.0019, "step": 199160 }, { "epoch": 1.681788435962931, "grad_norm": 0.26926279067993164, "learning_rate": 7.514811637334485e-07, "loss": 0.0062, "step": 199170 }, { "epoch": 1.681872875810095, "grad_norm": 0.2530626058578491, "learning_rate": 7.510926812632391e-07, "loss": 0.0095, "step": 199180 }, { "epoch": 1.6819573156572587, "grad_norm": 0.42763999104499817, "learning_rate": 7.50704291078424e-07, "loss": 0.0077, "step": 199190 }, { "epoch": 1.6820417555044225, "grad_norm": 0.15807443857192993, "learning_rate": 7.503159931874426e-07, "loss": 0.0059, "step": 199200 }, { "epoch": 1.6821261953515863, "grad_norm": 0.1619488000869751, "learning_rate": 7.49927787598727e-07, "loss": 0.0047, "step": 199210 }, { "epoch": 1.6822106351987502, "grad_norm": 0.3409877121448517, "learning_rate": 7.495396743207073e-07, "loss": 0.0065, "step": 199220 }, { "epoch": 1.6822950750459142, "grad_norm": 0.21048317849636078, "learning_rate": 7.491516533618165e-07, "loss": 0.0027, "step": 199230 }, { "epoch": 1.682379514893078, "grad_norm": 0.09324347227811813, "learning_rate": 7.487637247304797e-07, "loss": 0.0047, "step": 199240 }, { "epoch": 1.6824639547402418, "grad_norm": 0.27193284034729004, "learning_rate": 7.483758884351244e-07, "loss": 0.005, "step": 199250 }, { "epoch": 1.6825483945874058, "grad_norm": 0.2062254548072815, "learning_rate": 7.479881444841735e-07, "loss": 0.004, "step": 199260 }, { "epoch": 1.6826328344345698, "grad_norm": 0.12475664913654327, "learning_rate": 7.476004928860492e-07, "loss": 0.0096, "step": 199270 }, { "epoch": 1.6827172742817336, "grad_norm": 0.5145549774169922, "learning_rate": 7.47212933649169e-07, "loss": 0.0086, "step": 199280 }, { "epoch": 1.6828017141288973, "grad_norm": 0.07471869885921478, "learning_rate": 7.468254667819541e-07, "loss": 0.0078, "step": 199290 }, { "epoch": 1.6828861539760613, "grad_norm": 0.6267348527908325, "learning_rate": 7.464380922928171e-07, "loss": 0.0067, "step": 199300 }, { "epoch": 1.6829705938232253, "grad_norm": 0.36992260813713074, "learning_rate": 7.460508101901743e-07, "loss": 0.0039, "step": 199310 }, { "epoch": 1.6830550336703891, "grad_norm": 0.22186581790447235, "learning_rate": 7.456636204824364e-07, "loss": 0.0037, "step": 199320 }, { "epoch": 1.6831394735175529, "grad_norm": 0.1712551712989807, "learning_rate": 7.452765231780124e-07, "loss": 0.005, "step": 199330 }, { "epoch": 1.6832239133647167, "grad_norm": 0.2910207211971283, "learning_rate": 7.448895182853111e-07, "loss": 0.0088, "step": 199340 }, { "epoch": 1.6833083532118807, "grad_norm": 0.4046761989593506, "learning_rate": 7.445026058127358e-07, "loss": 0.0087, "step": 199350 }, { "epoch": 1.6833927930590447, "grad_norm": 0.26653072237968445, "learning_rate": 7.441157857686932e-07, "loss": 0.0106, "step": 199360 }, { "epoch": 1.6834772329062084, "grad_norm": 0.17038202285766602, "learning_rate": 7.437290581615825e-07, "loss": 0.0054, "step": 199370 }, { "epoch": 1.6835616727533722, "grad_norm": 1.206546425819397, "learning_rate": 7.433424229998054e-07, "loss": 0.0088, "step": 199380 }, { "epoch": 1.6836461126005362, "grad_norm": 0.18363060057163239, "learning_rate": 7.42955880291758e-07, "loss": 0.0043, "step": 199390 }, { "epoch": 1.6837305524477002, "grad_norm": 0.15055115520954132, "learning_rate": 7.425694300458369e-07, "loss": 0.0033, "step": 199400 }, { "epoch": 1.683814992294864, "grad_norm": 0.0079448027536273, "learning_rate": 7.42183072270436e-07, "loss": 0.0038, "step": 199410 }, { "epoch": 1.6838994321420278, "grad_norm": 0.40858766436576843, "learning_rate": 7.417968069739462e-07, "loss": 0.0049, "step": 199420 }, { "epoch": 1.6839838719891917, "grad_norm": 0.39888906478881836, "learning_rate": 7.41410634164757e-07, "loss": 0.0093, "step": 199430 }, { "epoch": 1.6840683118363555, "grad_norm": 0.4523158371448517, "learning_rate": 7.410245538512551e-07, "loss": 0.0032, "step": 199440 }, { "epoch": 1.6841527516835195, "grad_norm": 0.41921159625053406, "learning_rate": 7.406385660418281e-07, "loss": 0.0101, "step": 199450 }, { "epoch": 1.6842371915306833, "grad_norm": 0.23848362267017365, "learning_rate": 7.402526707448577e-07, "loss": 0.0048, "step": 199460 }, { "epoch": 1.684321631377847, "grad_norm": 0.014161777682602406, "learning_rate": 7.398668679687271e-07, "loss": 0.0049, "step": 199470 }, { "epoch": 1.684406071225011, "grad_norm": 0.6161975860595703, "learning_rate": 7.394811577218142e-07, "loss": 0.0093, "step": 199480 }, { "epoch": 1.684490511072175, "grad_norm": 0.05825187638401985, "learning_rate": 7.390955400124989e-07, "loss": 0.0037, "step": 199490 }, { "epoch": 1.6845749509193388, "grad_norm": 0.2987583875656128, "learning_rate": 7.387100148491549e-07, "loss": 0.0044, "step": 199500 }, { "epoch": 1.6846593907665026, "grad_norm": 0.000228463439270854, "learning_rate": 7.383245822401564e-07, "loss": 0.003, "step": 199510 }, { "epoch": 1.6847438306136666, "grad_norm": 0.25703781843185425, "learning_rate": 7.379392421938736e-07, "loss": 0.0052, "step": 199520 }, { "epoch": 1.6848282704608306, "grad_norm": 0.15720584988594055, "learning_rate": 7.375539947186783e-07, "loss": 0.0063, "step": 199530 }, { "epoch": 1.6849127103079944, "grad_norm": 0.2300424724817276, "learning_rate": 7.371688398229354e-07, "loss": 0.0074, "step": 199540 }, { "epoch": 1.6849971501551582, "grad_norm": 0.16772547364234924, "learning_rate": 7.367837775150133e-07, "loss": 0.0086, "step": 199550 }, { "epoch": 1.685081590002322, "grad_norm": 0.3728487193584442, "learning_rate": 7.363988078032736e-07, "loss": 0.0049, "step": 199560 }, { "epoch": 1.685166029849486, "grad_norm": 0.5936920046806335, "learning_rate": 7.360139306960767e-07, "loss": 0.0072, "step": 199570 }, { "epoch": 1.68525046969665, "grad_norm": 0.38740459084510803, "learning_rate": 7.35629146201785e-07, "loss": 0.0085, "step": 199580 }, { "epoch": 1.6853349095438137, "grad_norm": 0.22325608134269714, "learning_rate": 7.352444543287545e-07, "loss": 0.0068, "step": 199590 }, { "epoch": 1.6854193493909775, "grad_norm": 0.2776568531990051, "learning_rate": 7.348598550853403e-07, "loss": 0.004, "step": 199600 }, { "epoch": 1.6855037892381415, "grad_norm": 0.3029499053955078, "learning_rate": 7.344753484798949e-07, "loss": 0.0051, "step": 199610 }, { "epoch": 1.6855882290853055, "grad_norm": 0.02483958750963211, "learning_rate": 7.34090934520772e-07, "loss": 0.0049, "step": 199620 }, { "epoch": 1.6856726689324693, "grad_norm": 0.10459232330322266, "learning_rate": 7.337066132163185e-07, "loss": 0.0074, "step": 199630 }, { "epoch": 1.685757108779633, "grad_norm": 0.11930502206087112, "learning_rate": 7.333223845748844e-07, "loss": 0.0064, "step": 199640 }, { "epoch": 1.685841548626797, "grad_norm": 0.2594504952430725, "learning_rate": 7.329382486048137e-07, "loss": 0.0052, "step": 199650 }, { "epoch": 1.685925988473961, "grad_norm": 0.7609896063804626, "learning_rate": 7.325542053144502e-07, "loss": 0.0158, "step": 199660 }, { "epoch": 1.6860104283211248, "grad_norm": 0.35107871890068054, "learning_rate": 7.321702547121345e-07, "loss": 0.0106, "step": 199670 }, { "epoch": 1.6860948681682886, "grad_norm": 0.3466162085533142, "learning_rate": 7.317863968062056e-07, "loss": 0.0072, "step": 199680 }, { "epoch": 1.6861793080154523, "grad_norm": 0.5276210904121399, "learning_rate": 7.314026316050021e-07, "loss": 0.0081, "step": 199690 }, { "epoch": 1.6862637478626163, "grad_norm": 0.04744169861078262, "learning_rate": 7.310189591168576e-07, "loss": 0.0065, "step": 199700 }, { "epoch": 1.6863481877097803, "grad_norm": 0.2169080674648285, "learning_rate": 7.306353793501081e-07, "loss": 0.0044, "step": 199710 }, { "epoch": 1.6864326275569441, "grad_norm": 0.14470891654491425, "learning_rate": 7.302518923130819e-07, "loss": 0.0047, "step": 199720 }, { "epoch": 1.686517067404108, "grad_norm": 0.5889421701431274, "learning_rate": 7.298684980141108e-07, "loss": 0.0086, "step": 199730 }, { "epoch": 1.686601507251272, "grad_norm": 0.08628473430871964, "learning_rate": 7.294851964615207e-07, "loss": 0.0039, "step": 199740 }, { "epoch": 1.686685947098436, "grad_norm": 0.01702028326690197, "learning_rate": 7.291019876636373e-07, "loss": 0.0024, "step": 199750 }, { "epoch": 1.6867703869455997, "grad_norm": 0.7175745964050293, "learning_rate": 7.287188716287824e-07, "loss": 0.0119, "step": 199760 }, { "epoch": 1.6868548267927634, "grad_norm": 0.047972168773412704, "learning_rate": 7.283358483652797e-07, "loss": 0.0041, "step": 199770 }, { "epoch": 1.6869392666399274, "grad_norm": 0.12328443676233292, "learning_rate": 7.279529178814454e-07, "loss": 0.0091, "step": 199780 }, { "epoch": 1.6870237064870912, "grad_norm": 0.23432105779647827, "learning_rate": 7.275700801855994e-07, "loss": 0.0056, "step": 199790 }, { "epoch": 1.6871081463342552, "grad_norm": 0.07760684937238693, "learning_rate": 7.271873352860565e-07, "loss": 0.0077, "step": 199800 }, { "epoch": 1.687192586181419, "grad_norm": 0.059614941477775574, "learning_rate": 7.268046831911274e-07, "loss": 0.0038, "step": 199810 }, { "epoch": 1.6872770260285828, "grad_norm": 0.2135496437549591, "learning_rate": 7.264221239091257e-07, "loss": 0.0081, "step": 199820 }, { "epoch": 1.6873614658757468, "grad_norm": 0.36143162846565247, "learning_rate": 7.260396574483602e-07, "loss": 0.0094, "step": 199830 }, { "epoch": 1.6874459057229108, "grad_norm": 0.1835263967514038, "learning_rate": 7.256572838171377e-07, "loss": 0.0056, "step": 199840 }, { "epoch": 1.6875303455700745, "grad_norm": 0.23934005200862885, "learning_rate": 7.252750030237615e-07, "loss": 0.0099, "step": 199850 }, { "epoch": 1.6876147854172383, "grad_norm": 0.10606858134269714, "learning_rate": 7.248928150765377e-07, "loss": 0.0073, "step": 199860 }, { "epoch": 1.6876992252644023, "grad_norm": 0.4380916655063629, "learning_rate": 7.245107199837647e-07, "loss": 0.0036, "step": 199870 }, { "epoch": 1.6877836651115663, "grad_norm": 0.2694410979747772, "learning_rate": 7.241287177537437e-07, "loss": 0.0038, "step": 199880 }, { "epoch": 1.68786810495873, "grad_norm": 0.15239782631397247, "learning_rate": 7.237468083947697e-07, "loss": 0.0031, "step": 199890 }, { "epoch": 1.6879525448058939, "grad_norm": 0.021248959004878998, "learning_rate": 7.233649919151414e-07, "loss": 0.0079, "step": 199900 }, { "epoch": 1.6880369846530576, "grad_norm": 0.18144115805625916, "learning_rate": 7.229832683231475e-07, "loss": 0.0085, "step": 199910 }, { "epoch": 1.6881214245002216, "grad_norm": 0.1841108351945877, "learning_rate": 7.226016376270794e-07, "loss": 0.0035, "step": 199920 }, { "epoch": 1.6882058643473856, "grad_norm": 0.1401318609714508, "learning_rate": 7.222200998352286e-07, "loss": 0.0111, "step": 199930 }, { "epoch": 1.6882903041945494, "grad_norm": 0.07812075316905975, "learning_rate": 7.218386549558793e-07, "loss": 0.009, "step": 199940 }, { "epoch": 1.6883747440417132, "grad_norm": 0.14114734530448914, "learning_rate": 7.214573029973193e-07, "loss": 0.0052, "step": 199950 }, { "epoch": 1.6884591838888772, "grad_norm": 0.10969382524490356, "learning_rate": 7.210760439678283e-07, "loss": 0.004, "step": 199960 }, { "epoch": 1.6885436237360412, "grad_norm": 0.056833308190107346, "learning_rate": 7.206948778756901e-07, "loss": 0.0053, "step": 199970 }, { "epoch": 1.688628063583205, "grad_norm": 0.09677676111459732, "learning_rate": 7.203138047291824e-07, "loss": 0.007, "step": 199980 }, { "epoch": 1.6887125034303687, "grad_norm": 0.0668778121471405, "learning_rate": 7.199328245365816e-07, "loss": 0.0087, "step": 199990 }, { "epoch": 1.6887969432775327, "grad_norm": 0.3416828513145447, "learning_rate": 7.195519373061615e-07, "loss": 0.0054, "step": 200000 }, { "epoch": 1.6888813831246965, "grad_norm": 0.2828131914138794, "learning_rate": 7.191711430461979e-07, "loss": 0.0079, "step": 200010 }, { "epoch": 1.6889658229718605, "grad_norm": 0.19890816509723663, "learning_rate": 7.187904417649594e-07, "loss": 0.0034, "step": 200020 }, { "epoch": 1.6890502628190243, "grad_norm": 0.7377771735191345, "learning_rate": 7.184098334707141e-07, "loss": 0.0089, "step": 200030 }, { "epoch": 1.689134702666188, "grad_norm": 1.0351024866104126, "learning_rate": 7.180293181717313e-07, "loss": 0.0122, "step": 200040 }, { "epoch": 1.689219142513352, "grad_norm": 0.31723836064338684, "learning_rate": 7.176488958762729e-07, "loss": 0.0047, "step": 200050 }, { "epoch": 1.689303582360516, "grad_norm": 0.31256625056266785, "learning_rate": 7.172685665926038e-07, "loss": 0.0057, "step": 200060 }, { "epoch": 1.6893880222076798, "grad_norm": 0.010580594651401043, "learning_rate": 7.168883303289842e-07, "loss": 0.0017, "step": 200070 }, { "epoch": 1.6894724620548436, "grad_norm": 0.3553716242313385, "learning_rate": 7.165081870936719e-07, "loss": 0.0054, "step": 200080 }, { "epoch": 1.6895569019020076, "grad_norm": 0.003742281114682555, "learning_rate": 7.161281368949236e-07, "loss": 0.0097, "step": 200090 }, { "epoch": 1.6896413417491716, "grad_norm": 0.43922537565231323, "learning_rate": 7.157481797409949e-07, "loss": 0.0045, "step": 200100 }, { "epoch": 1.6897257815963354, "grad_norm": 0.4362843632698059, "learning_rate": 7.153683156401365e-07, "loss": 0.006, "step": 200110 }, { "epoch": 1.6898102214434991, "grad_norm": 0.2516019344329834, "learning_rate": 7.149885446006021e-07, "loss": 0.0042, "step": 200120 }, { "epoch": 1.689894661290663, "grad_norm": 0.4711785614490509, "learning_rate": 7.146088666306367e-07, "loss": 0.0057, "step": 200130 }, { "epoch": 1.689979101137827, "grad_norm": 0.25293710827827454, "learning_rate": 7.142292817384899e-07, "loss": 0.0052, "step": 200140 }, { "epoch": 1.690063540984991, "grad_norm": 0.20864737033843994, "learning_rate": 7.138497899324048e-07, "loss": 0.0108, "step": 200150 }, { "epoch": 1.6901479808321547, "grad_norm": 0.3957558572292328, "learning_rate": 7.134703912206242e-07, "loss": 0.0089, "step": 200160 }, { "epoch": 1.6902324206793184, "grad_norm": 0.25778377056121826, "learning_rate": 7.130910856113882e-07, "loss": 0.0065, "step": 200170 }, { "epoch": 1.6903168605264824, "grad_norm": 0.07259788364171982, "learning_rate": 7.12711873112934e-07, "loss": 0.0063, "step": 200180 }, { "epoch": 1.6904013003736464, "grad_norm": 0.23414631187915802, "learning_rate": 7.123327537335006e-07, "loss": 0.0047, "step": 200190 }, { "epoch": 1.6904857402208102, "grad_norm": 0.39398491382598877, "learning_rate": 7.119537274813204e-07, "loss": 0.0065, "step": 200200 }, { "epoch": 1.690570180067974, "grad_norm": 0.3738270401954651, "learning_rate": 7.115747943646273e-07, "loss": 0.0085, "step": 200210 }, { "epoch": 1.690654619915138, "grad_norm": 0.0263950377702713, "learning_rate": 7.111959543916508e-07, "loss": 0.0053, "step": 200220 }, { "epoch": 1.690739059762302, "grad_norm": 0.09305639564990997, "learning_rate": 7.108172075706193e-07, "loss": 0.0061, "step": 200230 }, { "epoch": 1.6908234996094658, "grad_norm": 0.015490281395614147, "learning_rate": 7.104385539097586e-07, "loss": 0.011, "step": 200240 }, { "epoch": 1.6909079394566295, "grad_norm": 0.18065400421619415, "learning_rate": 7.100599934172942e-07, "loss": 0.0073, "step": 200250 }, { "epoch": 1.6909923793037933, "grad_norm": 0.5796464681625366, "learning_rate": 7.096815261014473e-07, "loss": 0.0042, "step": 200260 }, { "epoch": 1.6910768191509573, "grad_norm": 0.19719961285591125, "learning_rate": 7.093031519704374e-07, "loss": 0.0031, "step": 200270 }, { "epoch": 1.6911612589981213, "grad_norm": 0.1652771383523941, "learning_rate": 7.089248710324853e-07, "loss": 0.0047, "step": 200280 }, { "epoch": 1.691245698845285, "grad_norm": 0.37030354142189026, "learning_rate": 7.085466832958044e-07, "loss": 0.0056, "step": 200290 }, { "epoch": 1.6913301386924489, "grad_norm": 0.21181361377239227, "learning_rate": 7.081685887686113e-07, "loss": 0.0043, "step": 200300 }, { "epoch": 1.6914145785396129, "grad_norm": 0.12764504551887512, "learning_rate": 7.077905874591173e-07, "loss": 0.0039, "step": 200310 }, { "epoch": 1.6914990183867769, "grad_norm": 0.1476736068725586, "learning_rate": 7.074126793755315e-07, "loss": 0.0044, "step": 200320 }, { "epoch": 1.6915834582339406, "grad_norm": 0.01944250613451004, "learning_rate": 7.070348645260622e-07, "loss": 0.0049, "step": 200330 }, { "epoch": 1.6916678980811044, "grad_norm": 0.1420675367116928, "learning_rate": 7.066571429189167e-07, "loss": 0.0053, "step": 200340 }, { "epoch": 1.6917523379282684, "grad_norm": 0.03462759405374527, "learning_rate": 7.062795145622969e-07, "loss": 0.0053, "step": 200350 }, { "epoch": 1.6918367777754322, "grad_norm": 0.16081395745277405, "learning_rate": 7.059019794644079e-07, "loss": 0.0051, "step": 200360 }, { "epoch": 1.6919212176225962, "grad_norm": 0.45098191499710083, "learning_rate": 7.055245376334463e-07, "loss": 0.0041, "step": 200370 }, { "epoch": 1.69200565746976, "grad_norm": 0.04666632413864136, "learning_rate": 7.051471890776135e-07, "loss": 0.0067, "step": 200380 }, { "epoch": 1.6920900973169237, "grad_norm": 0.4283400774002075, "learning_rate": 7.047699338051028e-07, "loss": 0.0078, "step": 200390 }, { "epoch": 1.6921745371640877, "grad_norm": 0.18455615639686584, "learning_rate": 7.043927718241095e-07, "loss": 0.0049, "step": 200400 }, { "epoch": 1.6922589770112517, "grad_norm": 0.40979957580566406, "learning_rate": 7.040157031428252e-07, "loss": 0.0069, "step": 200410 }, { "epoch": 1.6923434168584155, "grad_norm": 0.36459991335868835, "learning_rate": 7.036387277694379e-07, "loss": 0.0073, "step": 200420 }, { "epoch": 1.6924278567055793, "grad_norm": 0.2217129021883011, "learning_rate": 7.032618457121382e-07, "loss": 0.004, "step": 200430 }, { "epoch": 1.6925122965527433, "grad_norm": 0.03563535958528519, "learning_rate": 7.028850569791096e-07, "loss": 0.0099, "step": 200440 }, { "epoch": 1.6925967363999073, "grad_norm": 1.197526216506958, "learning_rate": 7.025083615785383e-07, "loss": 0.0066, "step": 200450 }, { "epoch": 1.692681176247071, "grad_norm": 0.07553798705339432, "learning_rate": 7.021317595186034e-07, "loss": 0.0038, "step": 200460 }, { "epoch": 1.6927656160942348, "grad_norm": 0.32049310207366943, "learning_rate": 7.017552508074871e-07, "loss": 0.0037, "step": 200470 }, { "epoch": 1.6928500559413986, "grad_norm": 0.004499239381402731, "learning_rate": 7.013788354533663e-07, "loss": 0.0063, "step": 200480 }, { "epoch": 1.6929344957885626, "grad_norm": 0.21874436736106873, "learning_rate": 7.01002513464416e-07, "loss": 0.0096, "step": 200490 }, { "epoch": 1.6930189356357266, "grad_norm": 0.24262994527816772, "learning_rate": 7.006262848488099e-07, "loss": 0.0054, "step": 200500 }, { "epoch": 1.6931033754828904, "grad_norm": 0.2020850032567978, "learning_rate": 7.00250149614719e-07, "loss": 0.0073, "step": 200510 }, { "epoch": 1.6931878153300541, "grad_norm": 0.22824923694133759, "learning_rate": 6.998741077703153e-07, "loss": 0.0031, "step": 200520 }, { "epoch": 1.6932722551772181, "grad_norm": 0.18647727370262146, "learning_rate": 6.994981593237632e-07, "loss": 0.0069, "step": 200530 }, { "epoch": 1.6933566950243821, "grad_norm": 0.5151616334915161, "learning_rate": 6.991223042832312e-07, "loss": 0.0114, "step": 200540 }, { "epoch": 1.693441134871546, "grad_norm": 0.20201103389263153, "learning_rate": 6.987465426568813e-07, "loss": 0.0094, "step": 200550 }, { "epoch": 1.6935255747187097, "grad_norm": 0.5350152254104614, "learning_rate": 6.98370874452875e-07, "loss": 0.0064, "step": 200560 }, { "epoch": 1.6936100145658737, "grad_norm": 0.27135199308395386, "learning_rate": 6.97995299679371e-07, "loss": 0.0059, "step": 200570 }, { "epoch": 1.6936944544130377, "grad_norm": 0.20396688580513, "learning_rate": 6.976198183445282e-07, "loss": 0.008, "step": 200580 }, { "epoch": 1.6937788942602015, "grad_norm": 0.2527354657649994, "learning_rate": 6.972444304565007e-07, "loss": 0.0053, "step": 200590 }, { "epoch": 1.6938633341073652, "grad_norm": 0.23266535997390747, "learning_rate": 6.968691360234431e-07, "loss": 0.0065, "step": 200600 }, { "epoch": 1.693947773954529, "grad_norm": 0.4044002592563629, "learning_rate": 6.964939350535061e-07, "loss": 0.0054, "step": 200610 }, { "epoch": 1.694032213801693, "grad_norm": 0.37036606669425964, "learning_rate": 6.96118827554838e-07, "loss": 0.0082, "step": 200620 }, { "epoch": 1.694116653648857, "grad_norm": 0.29893529415130615, "learning_rate": 6.957438135355882e-07, "loss": 0.0043, "step": 200630 }, { "epoch": 1.6942010934960208, "grad_norm": 0.07533468306064606, "learning_rate": 6.953688930039004e-07, "loss": 0.0046, "step": 200640 }, { "epoch": 1.6942855333431845, "grad_norm": 0.11536595970392227, "learning_rate": 6.949940659679188e-07, "loss": 0.0046, "step": 200650 }, { "epoch": 1.6943699731903485, "grad_norm": 0.06715279817581177, "learning_rate": 6.946193324357825e-07, "loss": 0.0047, "step": 200660 }, { "epoch": 1.6944544130375125, "grad_norm": 0.37160375714302063, "learning_rate": 6.942446924156327e-07, "loss": 0.0042, "step": 200670 }, { "epoch": 1.6945388528846763, "grad_norm": 0.32666558027267456, "learning_rate": 6.93870145915605e-07, "loss": 0.0073, "step": 200680 }, { "epoch": 1.69462329273184, "grad_norm": 0.5926395058631897, "learning_rate": 6.934956929438369e-07, "loss": 0.0054, "step": 200690 }, { "epoch": 1.6947077325790039, "grad_norm": 0.016083551570773125, "learning_rate": 6.93121333508458e-07, "loss": 0.0076, "step": 200700 }, { "epoch": 1.6947921724261679, "grad_norm": 0.34350764751434326, "learning_rate": 6.927470676176029e-07, "loss": 0.0061, "step": 200710 }, { "epoch": 1.6948766122733319, "grad_norm": 0.23530139029026031, "learning_rate": 6.923728952793984e-07, "loss": 0.0083, "step": 200720 }, { "epoch": 1.6949610521204956, "grad_norm": 0.1649843454360962, "learning_rate": 6.919988165019725e-07, "loss": 0.0039, "step": 200730 }, { "epoch": 1.6950454919676594, "grad_norm": 0.049673695117235184, "learning_rate": 6.916248312934487e-07, "loss": 0.0109, "step": 200740 }, { "epoch": 1.6951299318148234, "grad_norm": 0.2511315941810608, "learning_rate": 6.912509396619505e-07, "loss": 0.006, "step": 200750 }, { "epoch": 1.6952143716619874, "grad_norm": 0.21584072709083557, "learning_rate": 6.908771416155991e-07, "loss": 0.003, "step": 200760 }, { "epoch": 1.6952988115091512, "grad_norm": 0.3279682993888855, "learning_rate": 6.905034371625125e-07, "loss": 0.0083, "step": 200770 }, { "epoch": 1.695383251356315, "grad_norm": 0.13977183401584625, "learning_rate": 6.901298263108092e-07, "loss": 0.0083, "step": 200780 }, { "epoch": 1.695467691203479, "grad_norm": 0.500801682472229, "learning_rate": 6.897563090686021e-07, "loss": 0.0057, "step": 200790 }, { "epoch": 1.695552131050643, "grad_norm": 0.18285296857357025, "learning_rate": 6.893828854440066e-07, "loss": 0.0067, "step": 200800 }, { "epoch": 1.6956365708978067, "grad_norm": 0.1846114993095398, "learning_rate": 6.890095554451293e-07, "loss": 0.0133, "step": 200810 }, { "epoch": 1.6957210107449705, "grad_norm": 0.06462684273719788, "learning_rate": 6.886363190800821e-07, "loss": 0.0093, "step": 200820 }, { "epoch": 1.6958054505921343, "grad_norm": 0.33875128626823425, "learning_rate": 6.882631763569697e-07, "loss": 0.0044, "step": 200830 }, { "epoch": 1.6958898904392983, "grad_norm": 0.3173229694366455, "learning_rate": 6.87890127283899e-07, "loss": 0.006, "step": 200840 }, { "epoch": 1.6959743302864623, "grad_norm": 0.4455939531326294, "learning_rate": 6.875171718689705e-07, "loss": 0.0071, "step": 200850 }, { "epoch": 1.696058770133626, "grad_norm": 0.07875692844390869, "learning_rate": 6.87144310120284e-07, "loss": 0.0065, "step": 200860 }, { "epoch": 1.6961432099807898, "grad_norm": 0.14448870718479156, "learning_rate": 6.867715420459409e-07, "loss": 0.0068, "step": 200870 }, { "epoch": 1.6962276498279538, "grad_norm": 0.3681449890136719, "learning_rate": 6.863988676540362e-07, "loss": 0.0047, "step": 200880 }, { "epoch": 1.6963120896751178, "grad_norm": 0.12888358533382416, "learning_rate": 6.860262869526635e-07, "loss": 0.0091, "step": 200890 }, { "epoch": 1.6963965295222816, "grad_norm": 0.4409758150577545, "learning_rate": 6.856537999499152e-07, "loss": 0.0073, "step": 200900 }, { "epoch": 1.6964809693694454, "grad_norm": 0.5834875106811523, "learning_rate": 6.852814066538832e-07, "loss": 0.0067, "step": 200910 }, { "epoch": 1.6965654092166094, "grad_norm": 0.2345791608095169, "learning_rate": 6.849091070726538e-07, "loss": 0.0067, "step": 200920 }, { "epoch": 1.6966498490637731, "grad_norm": 0.41242390871047974, "learning_rate": 6.845369012143155e-07, "loss": 0.0074, "step": 200930 }, { "epoch": 1.6967342889109371, "grad_norm": 0.41632747650146484, "learning_rate": 6.841647890869502e-07, "loss": 0.008, "step": 200940 }, { "epoch": 1.696818728758101, "grad_norm": 0.018811963498592377, "learning_rate": 6.837927706986425e-07, "loss": 0.0032, "step": 200950 }, { "epoch": 1.6969031686052647, "grad_norm": 0.35298603773117065, "learning_rate": 6.834208460574715e-07, "loss": 0.0074, "step": 200960 }, { "epoch": 1.6969876084524287, "grad_norm": 0.04956527799367905, "learning_rate": 6.830490151715146e-07, "loss": 0.0058, "step": 200970 }, { "epoch": 1.6970720482995927, "grad_norm": 0.13235802948474884, "learning_rate": 6.826772780488489e-07, "loss": 0.0064, "step": 200980 }, { "epoch": 1.6971564881467565, "grad_norm": 0.039627864956855774, "learning_rate": 6.823056346975471e-07, "loss": 0.0079, "step": 200990 }, { "epoch": 1.6972409279939202, "grad_norm": 0.2382020354270935, "learning_rate": 6.819340851256828e-07, "loss": 0.0082, "step": 201000 }, { "epoch": 1.6973253678410842, "grad_norm": 0.5339088439941406, "learning_rate": 6.815626293413241e-07, "loss": 0.0062, "step": 201010 }, { "epoch": 1.6974098076882482, "grad_norm": 0.38264748454093933, "learning_rate": 6.811912673525417e-07, "loss": 0.0072, "step": 201020 }, { "epoch": 1.697494247535412, "grad_norm": 0.2976997196674347, "learning_rate": 6.808199991673986e-07, "loss": 0.0057, "step": 201030 }, { "epoch": 1.6975786873825758, "grad_norm": 0.8539022207260132, "learning_rate": 6.804488247939617e-07, "loss": 0.0075, "step": 201040 }, { "epoch": 1.6976631272297396, "grad_norm": 0.3806805908679962, "learning_rate": 6.800777442402907e-07, "loss": 0.0038, "step": 201050 }, { "epoch": 1.6977475670769036, "grad_norm": 0.2927439510822296, "learning_rate": 6.797067575144455e-07, "loss": 0.0053, "step": 201060 }, { "epoch": 1.6978320069240675, "grad_norm": 0.36130642890930176, "learning_rate": 6.793358646244835e-07, "loss": 0.0059, "step": 201070 }, { "epoch": 1.6979164467712313, "grad_norm": 0.33206063508987427, "learning_rate": 6.789650655784624e-07, "loss": 0.0045, "step": 201080 }, { "epoch": 1.698000886618395, "grad_norm": 0.07807387411594391, "learning_rate": 6.785943603844342e-07, "loss": 0.0029, "step": 201090 }, { "epoch": 1.698085326465559, "grad_norm": 0.12051831185817719, "learning_rate": 6.782237490504501e-07, "loss": 0.0034, "step": 201100 }, { "epoch": 1.698169766312723, "grad_norm": 0.020420072600245476, "learning_rate": 6.778532315845615e-07, "loss": 0.0037, "step": 201110 }, { "epoch": 1.6982542061598869, "grad_norm": 0.1038014218211174, "learning_rate": 6.774828079948154e-07, "loss": 0.0093, "step": 201120 }, { "epoch": 1.6983386460070506, "grad_norm": 0.14796660840511322, "learning_rate": 6.771124782892569e-07, "loss": 0.0047, "step": 201130 }, { "epoch": 1.6984230858542146, "grad_norm": 0.0027017646934837103, "learning_rate": 6.76742242475928e-07, "loss": 0.004, "step": 201140 }, { "epoch": 1.6985075257013786, "grad_norm": 0.26470038294792175, "learning_rate": 6.763721005628732e-07, "loss": 0.0117, "step": 201150 }, { "epoch": 1.6985919655485424, "grad_norm": 0.027678437530994415, "learning_rate": 6.76002052558129e-07, "loss": 0.007, "step": 201160 }, { "epoch": 1.6986764053957062, "grad_norm": 0.23687468469142914, "learning_rate": 6.75632098469735e-07, "loss": 0.0065, "step": 201170 }, { "epoch": 1.69876084524287, "grad_norm": 0.26778262853622437, "learning_rate": 6.75262238305725e-07, "loss": 0.005, "step": 201180 }, { "epoch": 1.698845285090034, "grad_norm": 0.1393168419599533, "learning_rate": 6.74892472074134e-07, "loss": 0.0088, "step": 201190 }, { "epoch": 1.698929724937198, "grad_norm": 0.11356525868177414, "learning_rate": 6.745227997829923e-07, "loss": 0.0043, "step": 201200 }, { "epoch": 1.6990141647843617, "grad_norm": 0.23816849291324615, "learning_rate": 6.74153221440329e-07, "loss": 0.0064, "step": 201210 }, { "epoch": 1.6990986046315255, "grad_norm": 0.20939849317073822, "learning_rate": 6.737837370541711e-07, "loss": 0.0062, "step": 201220 }, { "epoch": 1.6991830444786895, "grad_norm": 0.3201127350330353, "learning_rate": 6.734143466325426e-07, "loss": 0.0054, "step": 201230 }, { "epoch": 1.6992674843258535, "grad_norm": 0.004435877315700054, "learning_rate": 6.730450501834696e-07, "loss": 0.0071, "step": 201240 }, { "epoch": 1.6993519241730173, "grad_norm": 0.3094900846481323, "learning_rate": 6.726758477149697e-07, "loss": 0.0068, "step": 201250 }, { "epoch": 1.699436364020181, "grad_norm": 0.4945911169052124, "learning_rate": 6.723067392350652e-07, "loss": 0.0071, "step": 201260 }, { "epoch": 1.699520803867345, "grad_norm": 0.10742135345935822, "learning_rate": 6.7193772475177e-07, "loss": 0.0056, "step": 201270 }, { "epoch": 1.6996052437145088, "grad_norm": 0.04937974736094475, "learning_rate": 6.715688042731022e-07, "loss": 0.0046, "step": 201280 }, { "epoch": 1.6996896835616728, "grad_norm": 0.23743239045143127, "learning_rate": 6.711999778070727e-07, "loss": 0.0045, "step": 201290 }, { "epoch": 1.6997741234088366, "grad_norm": 0.12250056117773056, "learning_rate": 6.708312453616922e-07, "loss": 0.0078, "step": 201300 }, { "epoch": 1.6998585632560004, "grad_norm": 0.11135664582252502, "learning_rate": 6.704626069449693e-07, "loss": 0.0035, "step": 201310 }, { "epoch": 1.6999430031031644, "grad_norm": 0.1555575430393219, "learning_rate": 6.700940625649121e-07, "loss": 0.0042, "step": 201320 }, { "epoch": 1.7000274429503284, "grad_norm": 0.14396023750305176, "learning_rate": 6.697256122295248e-07, "loss": 0.0084, "step": 201330 }, { "epoch": 1.7001118827974921, "grad_norm": 0.48089951276779175, "learning_rate": 6.693572559468092e-07, "loss": 0.0078, "step": 201340 }, { "epoch": 1.700196322644656, "grad_norm": 0.15571801364421844, "learning_rate": 6.689889937247668e-07, "loss": 0.0054, "step": 201350 }, { "epoch": 1.70028076249182, "grad_norm": 0.2141646444797516, "learning_rate": 6.686208255713955e-07, "loss": 0.0075, "step": 201360 }, { "epoch": 1.700365202338984, "grad_norm": 0.44665008783340454, "learning_rate": 6.682527514946929e-07, "loss": 0.0057, "step": 201370 }, { "epoch": 1.7004496421861477, "grad_norm": 0.3205092251300812, "learning_rate": 6.678847715026532e-07, "loss": 0.0083, "step": 201380 }, { "epoch": 1.7005340820333115, "grad_norm": 0.0515463761985302, "learning_rate": 6.675168856032687e-07, "loss": 0.0074, "step": 201390 }, { "epoch": 1.7006185218804752, "grad_norm": 0.0764535516500473, "learning_rate": 6.671490938045283e-07, "loss": 0.0088, "step": 201400 }, { "epoch": 1.7007029617276392, "grad_norm": 0.21717554330825806, "learning_rate": 6.667813961144226e-07, "loss": 0.0066, "step": 201410 }, { "epoch": 1.7007874015748032, "grad_norm": 0.12562575936317444, "learning_rate": 6.664137925409365e-07, "loss": 0.0049, "step": 201420 }, { "epoch": 1.700871841421967, "grad_norm": 0.10981259495019913, "learning_rate": 6.660462830920556e-07, "loss": 0.0034, "step": 201430 }, { "epoch": 1.7009562812691308, "grad_norm": 0.37333589792251587, "learning_rate": 6.656788677757609e-07, "loss": 0.0076, "step": 201440 }, { "epoch": 1.7010407211162948, "grad_norm": 0.16510136425495148, "learning_rate": 6.653115466000331e-07, "loss": 0.0059, "step": 201450 }, { "epoch": 1.7011251609634588, "grad_norm": 0.4285390377044678, "learning_rate": 6.649443195728505e-07, "loss": 0.0058, "step": 201460 }, { "epoch": 1.7012096008106226, "grad_norm": 0.22232942283153534, "learning_rate": 6.645771867021883e-07, "loss": 0.0032, "step": 201470 }, { "epoch": 1.7012940406577863, "grad_norm": 0.17979925870895386, "learning_rate": 6.642101479960217e-07, "loss": 0.0049, "step": 201480 }, { "epoch": 1.7013784805049503, "grad_norm": 0.2460166960954666, "learning_rate": 6.63843203462321e-07, "loss": 0.0061, "step": 201490 }, { "epoch": 1.7014629203521143, "grad_norm": 0.006082736887037754, "learning_rate": 6.634763531090587e-07, "loss": 0.0051, "step": 201500 }, { "epoch": 1.701547360199278, "grad_norm": 0.2095392495393753, "learning_rate": 6.631095969442003e-07, "loss": 0.0051, "step": 201510 }, { "epoch": 1.7016318000464419, "grad_norm": 0.3424622714519501, "learning_rate": 6.627429349757136e-07, "loss": 0.0054, "step": 201520 }, { "epoch": 1.7017162398936057, "grad_norm": 0.3953072726726532, "learning_rate": 6.623763672115619e-07, "loss": 0.0046, "step": 201530 }, { "epoch": 1.7018006797407697, "grad_norm": 0.055197861045598984, "learning_rate": 6.620098936597064e-07, "loss": 0.0023, "step": 201540 }, { "epoch": 1.7018851195879336, "grad_norm": 0.35603490471839905, "learning_rate": 6.616435143281064e-07, "loss": 0.0061, "step": 201550 }, { "epoch": 1.7019695594350974, "grad_norm": 0.25112876296043396, "learning_rate": 6.61277229224721e-07, "loss": 0.0084, "step": 201560 }, { "epoch": 1.7020539992822612, "grad_norm": 0.4025475084781647, "learning_rate": 6.609110383575051e-07, "loss": 0.0051, "step": 201570 }, { "epoch": 1.7021384391294252, "grad_norm": 0.13645030558109283, "learning_rate": 6.605449417344112e-07, "loss": 0.0078, "step": 201580 }, { "epoch": 1.7022228789765892, "grad_norm": 0.3603900372982025, "learning_rate": 6.601789393633928e-07, "loss": 0.0051, "step": 201590 }, { "epoch": 1.702307318823753, "grad_norm": 0.16497762501239777, "learning_rate": 6.598130312523976e-07, "loss": 0.006, "step": 201600 }, { "epoch": 1.7023917586709167, "grad_norm": 0.45893529057502747, "learning_rate": 6.594472174093752e-07, "loss": 0.0059, "step": 201610 }, { "epoch": 1.7024761985180805, "grad_norm": 0.18634505569934845, "learning_rate": 6.5908149784227e-07, "loss": 0.0034, "step": 201620 }, { "epoch": 1.7025606383652445, "grad_norm": 0.18380041420459747, "learning_rate": 6.587158725590243e-07, "loss": 0.0083, "step": 201630 }, { "epoch": 1.7026450782124085, "grad_norm": 0.14719714224338531, "learning_rate": 6.583503415675796e-07, "loss": 0.0077, "step": 201640 }, { "epoch": 1.7027295180595723, "grad_norm": 0.24313995242118835, "learning_rate": 6.579849048758775e-07, "loss": 0.01, "step": 201650 }, { "epoch": 1.702813957906736, "grad_norm": 0.7245133519172668, "learning_rate": 6.576195624918514e-07, "loss": 0.0067, "step": 201660 }, { "epoch": 1.7028983977539, "grad_norm": 0.26801633834838867, "learning_rate": 6.5725431442344e-07, "loss": 0.0061, "step": 201670 }, { "epoch": 1.702982837601064, "grad_norm": 0.20505094528198242, "learning_rate": 6.568891606785754e-07, "loss": 0.002, "step": 201680 }, { "epoch": 1.7030672774482278, "grad_norm": 0.08004441112279892, "learning_rate": 6.565241012651868e-07, "loss": 0.0019, "step": 201690 }, { "epoch": 1.7031517172953916, "grad_norm": 0.330239862203598, "learning_rate": 6.561591361912067e-07, "loss": 0.005, "step": 201700 }, { "epoch": 1.7032361571425556, "grad_norm": 0.42352068424224854, "learning_rate": 6.557942654645583e-07, "loss": 0.0097, "step": 201710 }, { "epoch": 1.7033205969897196, "grad_norm": 0.16170112788677216, "learning_rate": 6.554294890931689e-07, "loss": 0.0051, "step": 201720 }, { "epoch": 1.7034050368368834, "grad_norm": 0.3049619495868683, "learning_rate": 6.550648070849596e-07, "loss": 0.0054, "step": 201730 }, { "epoch": 1.7034894766840472, "grad_norm": 0.300167441368103, "learning_rate": 6.547002194478535e-07, "loss": 0.0079, "step": 201740 }, { "epoch": 1.703573916531211, "grad_norm": 0.41332703828811646, "learning_rate": 6.543357261897676e-07, "loss": 0.0064, "step": 201750 }, { "epoch": 1.703658356378375, "grad_norm": 0.10343296080827713, "learning_rate": 6.539713273186199e-07, "loss": 0.0058, "step": 201760 }, { "epoch": 1.703742796225539, "grad_norm": 0.15548360347747803, "learning_rate": 6.536070228423241e-07, "loss": 0.004, "step": 201770 }, { "epoch": 1.7038272360727027, "grad_norm": 0.08722671121358871, "learning_rate": 6.532428127687934e-07, "loss": 0.0054, "step": 201780 }, { "epoch": 1.7039116759198665, "grad_norm": 0.45746538043022156, "learning_rate": 6.52878697105937e-07, "loss": 0.0048, "step": 201790 }, { "epoch": 1.7039961157670305, "grad_norm": 0.1317010223865509, "learning_rate": 6.525146758616657e-07, "loss": 0.0034, "step": 201800 }, { "epoch": 1.7040805556141945, "grad_norm": 0.36021459102630615, "learning_rate": 6.521507490438844e-07, "loss": 0.0079, "step": 201810 }, { "epoch": 1.7041649954613582, "grad_norm": 0.3267810642719269, "learning_rate": 6.517869166604974e-07, "loss": 0.0053, "step": 201820 }, { "epoch": 1.704249435308522, "grad_norm": 0.005746874492615461, "learning_rate": 6.514231787194081e-07, "loss": 0.0032, "step": 201830 }, { "epoch": 1.704333875155686, "grad_norm": 0.16112826764583588, "learning_rate": 6.510595352285154e-07, "loss": 0.0052, "step": 201840 }, { "epoch": 1.7044183150028498, "grad_norm": 0.23298296332359314, "learning_rate": 6.506959861957191e-07, "loss": 0.003, "step": 201850 }, { "epoch": 1.7045027548500138, "grad_norm": 0.4292927384376526, "learning_rate": 6.50332531628915e-07, "loss": 0.0047, "step": 201860 }, { "epoch": 1.7045871946971776, "grad_norm": 0.03683247044682503, "learning_rate": 6.499691715359968e-07, "loss": 0.0051, "step": 201870 }, { "epoch": 1.7046716345443413, "grad_norm": 0.17096351087093353, "learning_rate": 6.49605905924856e-07, "loss": 0.0049, "step": 201880 }, { "epoch": 1.7047560743915053, "grad_norm": 0.28620854020118713, "learning_rate": 6.492427348033848e-07, "loss": 0.0061, "step": 201890 }, { "epoch": 1.7048405142386693, "grad_norm": 0.05991678312420845, "learning_rate": 6.488796581794682e-07, "loss": 0.0065, "step": 201900 }, { "epoch": 1.704924954085833, "grad_norm": 0.003133449237793684, "learning_rate": 6.485166760609951e-07, "loss": 0.0061, "step": 201910 }, { "epoch": 1.7050093939329969, "grad_norm": 0.3025497794151306, "learning_rate": 6.481537884558481e-07, "loss": 0.0072, "step": 201920 }, { "epoch": 1.7050938337801609, "grad_norm": 0.4699555039405823, "learning_rate": 6.477909953719075e-07, "loss": 0.0095, "step": 201930 }, { "epoch": 1.7051782736273249, "grad_norm": 0.0004067405534442514, "learning_rate": 6.474282968170564e-07, "loss": 0.0038, "step": 201940 }, { "epoch": 1.7052627134744887, "grad_norm": 0.22411984205245972, "learning_rate": 6.470656927991703e-07, "loss": 0.006, "step": 201950 }, { "epoch": 1.7053471533216524, "grad_norm": 0.27301958203315735, "learning_rate": 6.467031833261256e-07, "loss": 0.0077, "step": 201960 }, { "epoch": 1.7054315931688162, "grad_norm": 0.13330812752246857, "learning_rate": 6.463407684057943e-07, "loss": 0.006, "step": 201970 }, { "epoch": 1.7055160330159802, "grad_norm": 0.22400711476802826, "learning_rate": 6.459784480460507e-07, "loss": 0.0043, "step": 201980 }, { "epoch": 1.7056004728631442, "grad_norm": 0.29893654584884644, "learning_rate": 6.456162222547619e-07, "loss": 0.0088, "step": 201990 }, { "epoch": 1.705684912710308, "grad_norm": 0.32181626558303833, "learning_rate": 6.452540910397975e-07, "loss": 0.0096, "step": 202000 }, { "epoch": 1.7057693525574718, "grad_norm": 0.26463577151298523, "learning_rate": 6.448920544090209e-07, "loss": 0.0045, "step": 202010 }, { "epoch": 1.7058537924046357, "grad_norm": 0.013528996147215366, "learning_rate": 6.445301123702985e-07, "loss": 0.007, "step": 202020 }, { "epoch": 1.7059382322517997, "grad_norm": 0.5434668064117432, "learning_rate": 6.441682649314885e-07, "loss": 0.0068, "step": 202030 }, { "epoch": 1.7060226720989635, "grad_norm": 0.3327663540840149, "learning_rate": 6.4380651210045e-07, "loss": 0.0046, "step": 202040 }, { "epoch": 1.7061071119461273, "grad_norm": 0.28096091747283936, "learning_rate": 6.434448538850424e-07, "loss": 0.0067, "step": 202050 }, { "epoch": 1.7061915517932913, "grad_norm": 0.28074586391448975, "learning_rate": 6.430832902931189e-07, "loss": 0.0071, "step": 202060 }, { "epoch": 1.7062759916404553, "grad_norm": 0.3674762547016144, "learning_rate": 6.427218213325348e-07, "loss": 0.0038, "step": 202070 }, { "epoch": 1.706360431487619, "grad_norm": 0.29069700837135315, "learning_rate": 6.423604470111383e-07, "loss": 0.0074, "step": 202080 }, { "epoch": 1.7064448713347828, "grad_norm": 0.45986366271972656, "learning_rate": 6.419991673367815e-07, "loss": 0.005, "step": 202090 }, { "epoch": 1.7065293111819466, "grad_norm": 0.35969093441963196, "learning_rate": 6.416379823173091e-07, "loss": 0.0084, "step": 202100 }, { "epoch": 1.7066137510291106, "grad_norm": 0.9183733463287354, "learning_rate": 6.412768919605672e-07, "loss": 0.0068, "step": 202110 }, { "epoch": 1.7066981908762746, "grad_norm": 0.2777779996395111, "learning_rate": 6.409158962743967e-07, "loss": 0.008, "step": 202120 }, { "epoch": 1.7067826307234384, "grad_norm": 0.2322624772787094, "learning_rate": 6.405549952666406e-07, "loss": 0.0047, "step": 202130 }, { "epoch": 1.7068670705706022, "grad_norm": 0.1643291711807251, "learning_rate": 6.401941889451363e-07, "loss": 0.0102, "step": 202140 }, { "epoch": 1.7069515104177662, "grad_norm": 0.14121106266975403, "learning_rate": 6.398334773177212e-07, "loss": 0.0092, "step": 202150 }, { "epoch": 1.7070359502649302, "grad_norm": 0.010880934074521065, "learning_rate": 6.394728603922295e-07, "loss": 0.0076, "step": 202160 }, { "epoch": 1.707120390112094, "grad_norm": 0.15007485449314117, "learning_rate": 6.39112338176493e-07, "loss": 0.0048, "step": 202170 }, { "epoch": 1.7072048299592577, "grad_norm": 0.18773657083511353, "learning_rate": 6.387519106783435e-07, "loss": 0.0073, "step": 202180 }, { "epoch": 1.7072892698064217, "grad_norm": 0.122561514377594, "learning_rate": 6.383915779056093e-07, "loss": 0.0066, "step": 202190 }, { "epoch": 1.7073737096535855, "grad_norm": 0.6723592281341553, "learning_rate": 6.380313398661159e-07, "loss": 0.0067, "step": 202200 }, { "epoch": 1.7074581495007495, "grad_norm": 0.017788942903280258, "learning_rate": 6.376711965676868e-07, "loss": 0.0047, "step": 202210 }, { "epoch": 1.7075425893479133, "grad_norm": 0.47212642431259155, "learning_rate": 6.373111480181471e-07, "loss": 0.0051, "step": 202220 }, { "epoch": 1.707627029195077, "grad_norm": 0.4647305905818939, "learning_rate": 6.369511942253137e-07, "loss": 0.0061, "step": 202230 }, { "epoch": 1.707711469042241, "grad_norm": 0.15854085981845856, "learning_rate": 6.365913351970071e-07, "loss": 0.0063, "step": 202240 }, { "epoch": 1.707795908889405, "grad_norm": 0.1441379338502884, "learning_rate": 6.362315709410416e-07, "loss": 0.0064, "step": 202250 }, { "epoch": 1.7078803487365688, "grad_norm": 0.7773351669311523, "learning_rate": 6.358719014652337e-07, "loss": 0.0046, "step": 202260 }, { "epoch": 1.7079647885837326, "grad_norm": 0.278422474861145, "learning_rate": 6.355123267773933e-07, "loss": 0.008, "step": 202270 }, { "epoch": 1.7080492284308966, "grad_norm": 0.28143423795700073, "learning_rate": 6.351528468853314e-07, "loss": 0.0041, "step": 202280 }, { "epoch": 1.7081336682780606, "grad_norm": 0.2076808512210846, "learning_rate": 6.347934617968543e-07, "loss": 0.0055, "step": 202290 }, { "epoch": 1.7082181081252243, "grad_norm": 0.4122810959815979, "learning_rate": 6.344341715197683e-07, "loss": 0.0056, "step": 202300 }, { "epoch": 1.7083025479723881, "grad_norm": 0.01830058917403221, "learning_rate": 6.340749760618786e-07, "loss": 0.011, "step": 202310 }, { "epoch": 1.708386987819552, "grad_norm": 0.43962064385414124, "learning_rate": 6.337158754309847e-07, "loss": 0.0093, "step": 202320 }, { "epoch": 1.708471427666716, "grad_norm": 0.5320491194725037, "learning_rate": 6.33356869634888e-07, "loss": 0.0108, "step": 202330 }, { "epoch": 1.7085558675138799, "grad_norm": 0.3281700313091278, "learning_rate": 6.32997958681385e-07, "loss": 0.0067, "step": 202340 }, { "epoch": 1.7086403073610437, "grad_norm": 0.13611753284931183, "learning_rate": 6.326391425782725e-07, "loss": 0.0029, "step": 202350 }, { "epoch": 1.7087247472082074, "grad_norm": 0.23538941144943237, "learning_rate": 6.322804213333406e-07, "loss": 0.0069, "step": 202360 }, { "epoch": 1.7088091870553714, "grad_norm": 0.03803178668022156, "learning_rate": 6.319217949543849e-07, "loss": 0.0081, "step": 202370 }, { "epoch": 1.7088936269025354, "grad_norm": 0.07549908012151718, "learning_rate": 6.315632634491908e-07, "loss": 0.0081, "step": 202380 }, { "epoch": 1.7089780667496992, "grad_norm": 0.09710264950990677, "learning_rate": 6.312048268255494e-07, "loss": 0.0054, "step": 202390 }, { "epoch": 1.709062506596863, "grad_norm": 0.13887596130371094, "learning_rate": 6.308464850912433e-07, "loss": 0.0063, "step": 202400 }, { "epoch": 1.709146946444027, "grad_norm": 0.23818407952785492, "learning_rate": 6.304882382540556e-07, "loss": 0.0042, "step": 202410 }, { "epoch": 1.709231386291191, "grad_norm": 0.0536685548722744, "learning_rate": 6.301300863217685e-07, "loss": 0.0069, "step": 202420 }, { "epoch": 1.7093158261383548, "grad_norm": 0.3868841826915741, "learning_rate": 6.297720293021609e-07, "loss": 0.0067, "step": 202430 }, { "epoch": 1.7094002659855185, "grad_norm": 0.09976523369550705, "learning_rate": 6.294140672030091e-07, "loss": 0.0052, "step": 202440 }, { "epoch": 1.7094847058326823, "grad_norm": 0.31916606426239014, "learning_rate": 6.290562000320871e-07, "loss": 0.0106, "step": 202450 }, { "epoch": 1.7095691456798463, "grad_norm": 0.5538168549537659, "learning_rate": 6.286984277971697e-07, "loss": 0.0084, "step": 202460 }, { "epoch": 1.7096535855270103, "grad_norm": 0.50728440284729, "learning_rate": 6.283407505060257e-07, "loss": 0.0109, "step": 202470 }, { "epoch": 1.709738025374174, "grad_norm": 0.38317441940307617, "learning_rate": 6.279831681664261e-07, "loss": 0.0041, "step": 202480 }, { "epoch": 1.7098224652213379, "grad_norm": 0.201816126704216, "learning_rate": 6.276256807861347e-07, "loss": 0.0065, "step": 202490 }, { "epoch": 1.7099069050685018, "grad_norm": 0.2304094284772873, "learning_rate": 6.272682883729186e-07, "loss": 0.005, "step": 202500 }, { "epoch": 1.7099913449156658, "grad_norm": 0.5021151900291443, "learning_rate": 6.269109909345394e-07, "loss": 0.0075, "step": 202510 }, { "epoch": 1.7100757847628296, "grad_norm": 0.2417002171278, "learning_rate": 6.265537884787576e-07, "loss": 0.0084, "step": 202520 }, { "epoch": 1.7101602246099934, "grad_norm": 0.2360183745622635, "learning_rate": 6.261966810133308e-07, "loss": 0.0081, "step": 202530 }, { "epoch": 1.7102446644571572, "grad_norm": 0.22402451932430267, "learning_rate": 6.258396685460149e-07, "loss": 0.0093, "step": 202540 }, { "epoch": 1.7103291043043212, "grad_norm": 0.5906317830085754, "learning_rate": 6.254827510845663e-07, "loss": 0.009, "step": 202550 }, { "epoch": 1.7104135441514852, "grad_norm": 0.43618831038475037, "learning_rate": 6.251259286367351e-07, "loss": 0.0087, "step": 202560 }, { "epoch": 1.710497983998649, "grad_norm": 0.20991544425487518, "learning_rate": 6.24769201210273e-07, "loss": 0.0082, "step": 202570 }, { "epoch": 1.7105824238458127, "grad_norm": 0.44110962748527527, "learning_rate": 6.24412568812926e-07, "loss": 0.0045, "step": 202580 }, { "epoch": 1.7106668636929767, "grad_norm": 0.05905953422188759, "learning_rate": 6.240560314524424e-07, "loss": 0.0061, "step": 202590 }, { "epoch": 1.7107513035401407, "grad_norm": 0.24157209694385529, "learning_rate": 6.236995891365655e-07, "loss": 0.005, "step": 202600 }, { "epoch": 1.7108357433873045, "grad_norm": 0.3230695128440857, "learning_rate": 6.233432418730362e-07, "loss": 0.0047, "step": 202610 }, { "epoch": 1.7109201832344683, "grad_norm": 0.056289058178663254, "learning_rate": 6.22986989669595e-07, "loss": 0.0112, "step": 202620 }, { "epoch": 1.7110046230816323, "grad_norm": 0.25774338841438293, "learning_rate": 6.226308325339786e-07, "loss": 0.0043, "step": 202630 }, { "epoch": 1.7110890629287963, "grad_norm": 0.13647328317165375, "learning_rate": 6.222747704739246e-07, "loss": 0.0029, "step": 202640 }, { "epoch": 1.71117350277596, "grad_norm": 0.22287431359291077, "learning_rate": 6.219188034971646e-07, "loss": 0.0069, "step": 202650 }, { "epoch": 1.7112579426231238, "grad_norm": 0.1645364910364151, "learning_rate": 6.215629316114314e-07, "loss": 0.0071, "step": 202660 }, { "epoch": 1.7113423824702876, "grad_norm": 0.5972925424575806, "learning_rate": 6.212071548244547e-07, "loss": 0.0123, "step": 202670 }, { "epoch": 1.7114268223174516, "grad_norm": 0.20551882684230804, "learning_rate": 6.208514731439613e-07, "loss": 0.0072, "step": 202680 }, { "epoch": 1.7115112621646156, "grad_norm": 0.4565887153148651, "learning_rate": 6.20495886577675e-07, "loss": 0.0088, "step": 202690 }, { "epoch": 1.7115957020117794, "grad_norm": 0.03316107019782066, "learning_rate": 6.201403951333218e-07, "loss": 0.0076, "step": 202700 }, { "epoch": 1.7116801418589431, "grad_norm": 0.3229402005672455, "learning_rate": 6.197849988186211e-07, "loss": 0.0065, "step": 202710 }, { "epoch": 1.7117645817061071, "grad_norm": 0.16285686194896698, "learning_rate": 6.194296976412933e-07, "loss": 0.005, "step": 202720 }, { "epoch": 1.7118490215532711, "grad_norm": 0.11460782587528229, "learning_rate": 6.190744916090541e-07, "loss": 0.0052, "step": 202730 }, { "epoch": 1.711933461400435, "grad_norm": 0.3795543611049652, "learning_rate": 6.187193807296199e-07, "loss": 0.0072, "step": 202740 }, { "epoch": 1.7120179012475987, "grad_norm": 0.5195199251174927, "learning_rate": 6.18364365010703e-07, "loss": 0.0039, "step": 202750 }, { "epoch": 1.7121023410947627, "grad_norm": 0.21348142623901367, "learning_rate": 6.180094444600143e-07, "loss": 0.004, "step": 202760 }, { "epoch": 1.7121867809419264, "grad_norm": 0.5470805168151855, "learning_rate": 6.176546190852623e-07, "loss": 0.0053, "step": 202770 }, { "epoch": 1.7122712207890904, "grad_norm": 0.40987923741340637, "learning_rate": 6.172998888941528e-07, "loss": 0.0064, "step": 202780 }, { "epoch": 1.7123556606362542, "grad_norm": 0.08675134927034378, "learning_rate": 6.169452538943932e-07, "loss": 0.0105, "step": 202790 }, { "epoch": 1.712440100483418, "grad_norm": 0.08217944949865341, "learning_rate": 6.165907140936834e-07, "loss": 0.0029, "step": 202800 }, { "epoch": 1.712524540330582, "grad_norm": 0.2836213707923889, "learning_rate": 6.162362694997254e-07, "loss": 0.0054, "step": 202810 }, { "epoch": 1.712608980177746, "grad_norm": 0.17691102623939514, "learning_rate": 6.158819201202171e-07, "loss": 0.0056, "step": 202820 }, { "epoch": 1.7126934200249098, "grad_norm": 0.5804842114448547, "learning_rate": 6.155276659628557e-07, "loss": 0.0079, "step": 202830 }, { "epoch": 1.7127778598720735, "grad_norm": 0.23883973062038422, "learning_rate": 6.151735070353348e-07, "loss": 0.0064, "step": 202840 }, { "epoch": 1.7128622997192375, "grad_norm": 0.2803304195404053, "learning_rate": 6.148194433453469e-07, "loss": 0.0083, "step": 202850 }, { "epoch": 1.7129467395664015, "grad_norm": 0.1322200745344162, "learning_rate": 6.144654749005824e-07, "loss": 0.0055, "step": 202860 }, { "epoch": 1.7130311794135653, "grad_norm": 0.431428462266922, "learning_rate": 6.141116017087278e-07, "loss": 0.0114, "step": 202870 }, { "epoch": 1.713115619260729, "grad_norm": 0.19321884214878082, "learning_rate": 6.137578237774716e-07, "loss": 0.0028, "step": 202880 }, { "epoch": 1.7132000591078929, "grad_norm": 0.12412809580564499, "learning_rate": 6.134041411144954e-07, "loss": 0.0066, "step": 202890 }, { "epoch": 1.7132844989550569, "grad_norm": 0.35655492544174194, "learning_rate": 6.130505537274833e-07, "loss": 0.0051, "step": 202900 }, { "epoch": 1.7133689388022209, "grad_norm": 0.0857539251446724, "learning_rate": 6.126970616241135e-07, "loss": 0.0051, "step": 202910 }, { "epoch": 1.7134533786493846, "grad_norm": 0.11689229309558868, "learning_rate": 6.123436648120667e-07, "loss": 0.0057, "step": 202920 }, { "epoch": 1.7135378184965484, "grad_norm": 0.3503459095954895, "learning_rate": 6.119903632990138e-07, "loss": 0.0039, "step": 202930 }, { "epoch": 1.7136222583437124, "grad_norm": 0.20730245113372803, "learning_rate": 6.116371570926327e-07, "loss": 0.004, "step": 202940 }, { "epoch": 1.7137066981908764, "grad_norm": 0.10519173741340637, "learning_rate": 6.112840462005915e-07, "loss": 0.0096, "step": 202950 }, { "epoch": 1.7137911380380402, "grad_norm": 0.21968315541744232, "learning_rate": 6.109310306305633e-07, "loss": 0.0048, "step": 202960 }, { "epoch": 1.713875577885204, "grad_norm": 0.4987393915653229, "learning_rate": 6.10578110390212e-07, "loss": 0.0084, "step": 202970 }, { "epoch": 1.713960017732368, "grad_norm": 1.3027795553207397, "learning_rate": 6.10225285487206e-07, "loss": 0.0094, "step": 202980 }, { "epoch": 1.714044457579532, "grad_norm": 0.40871188044548035, "learning_rate": 6.098725559292074e-07, "loss": 0.0051, "step": 202990 }, { "epoch": 1.7141288974266957, "grad_norm": 0.262453556060791, "learning_rate": 6.095199217238768e-07, "loss": 0.0034, "step": 203000 }, { "epoch": 1.7142133372738595, "grad_norm": 0.19309917092323303, "learning_rate": 6.091673828788742e-07, "loss": 0.0055, "step": 203010 }, { "epoch": 1.7142977771210233, "grad_norm": 0.5983437895774841, "learning_rate": 6.08814939401855e-07, "loss": 0.0069, "step": 203020 }, { "epoch": 1.7143822169681873, "grad_norm": 0.19808092713356018, "learning_rate": 6.084625913004766e-07, "loss": 0.0046, "step": 203030 }, { "epoch": 1.7144666568153513, "grad_norm": 0.028847070410847664, "learning_rate": 6.0811033858239e-07, "loss": 0.0058, "step": 203040 }, { "epoch": 1.714551096662515, "grad_norm": 0.15504255890846252, "learning_rate": 6.077581812552474e-07, "loss": 0.0033, "step": 203050 }, { "epoch": 1.7146355365096788, "grad_norm": 0.2423662543296814, "learning_rate": 6.074061193266961e-07, "loss": 0.0055, "step": 203060 }, { "epoch": 1.7147199763568428, "grad_norm": 0.18621191382408142, "learning_rate": 6.070541528043855e-07, "loss": 0.0043, "step": 203070 }, { "epoch": 1.7148044162040068, "grad_norm": 0.1110483929514885, "learning_rate": 6.067022816959578e-07, "loss": 0.0067, "step": 203080 }, { "epoch": 1.7148888560511706, "grad_norm": 0.3106522560119629, "learning_rate": 6.063505060090563e-07, "loss": 0.0068, "step": 203090 }, { "epoch": 1.7149732958983344, "grad_norm": 0.31930333375930786, "learning_rate": 6.059988257513216e-07, "loss": 0.0055, "step": 203100 }, { "epoch": 1.7150577357454981, "grad_norm": 0.19466553628444672, "learning_rate": 6.056472409303915e-07, "loss": 0.0096, "step": 203110 }, { "epoch": 1.7151421755926621, "grad_norm": 0.19991867244243622, "learning_rate": 6.052957515539032e-07, "loss": 0.0043, "step": 203120 }, { "epoch": 1.7152266154398261, "grad_norm": 0.31970956921577454, "learning_rate": 6.049443576294895e-07, "loss": 0.0036, "step": 203130 }, { "epoch": 1.71531105528699, "grad_norm": 0.21669790148735046, "learning_rate": 6.045930591647853e-07, "loss": 0.0063, "step": 203140 }, { "epoch": 1.7153954951341537, "grad_norm": 0.29679012298583984, "learning_rate": 6.042418561674179e-07, "loss": 0.0126, "step": 203150 }, { "epoch": 1.7154799349813177, "grad_norm": 0.1210331916809082, "learning_rate": 6.038907486450174e-07, "loss": 0.0085, "step": 203160 }, { "epoch": 1.7155643748284817, "grad_norm": 1.452939510345459, "learning_rate": 6.035397366052092e-07, "loss": 0.0092, "step": 203170 }, { "epoch": 1.7156488146756455, "grad_norm": 0.7595951557159424, "learning_rate": 6.031888200556168e-07, "loss": 0.0128, "step": 203180 }, { "epoch": 1.7157332545228092, "grad_norm": 0.09550270438194275, "learning_rate": 6.028379990038613e-07, "loss": 0.0107, "step": 203190 }, { "epoch": 1.7158176943699732, "grad_norm": 0.38985538482666016, "learning_rate": 6.024872734575643e-07, "loss": 0.0094, "step": 203200 }, { "epoch": 1.7159021342171372, "grad_norm": 0.07712186127901077, "learning_rate": 6.02136643424343e-07, "loss": 0.0052, "step": 203210 }, { "epoch": 1.715986574064301, "grad_norm": 0.2938324511051178, "learning_rate": 6.017861089118116e-07, "loss": 0.0087, "step": 203220 }, { "epoch": 1.7160710139114648, "grad_norm": 0.4260913133621216, "learning_rate": 6.014356699275853e-07, "loss": 0.0041, "step": 203230 }, { "epoch": 1.7161554537586285, "grad_norm": 0.04475310817360878, "learning_rate": 6.010853264792748e-07, "loss": 0.0026, "step": 203240 }, { "epoch": 1.7162398936057925, "grad_norm": 0.41711634397506714, "learning_rate": 6.007350785744897e-07, "loss": 0.0108, "step": 203250 }, { "epoch": 1.7163243334529565, "grad_norm": 0.33352789282798767, "learning_rate": 6.003849262208367e-07, "loss": 0.0033, "step": 203260 }, { "epoch": 1.7164087733001203, "grad_norm": 0.3846430778503418, "learning_rate": 6.000348694259217e-07, "loss": 0.0041, "step": 203270 }, { "epoch": 1.716493213147284, "grad_norm": 0.38295668363571167, "learning_rate": 5.996849081973472e-07, "loss": 0.0055, "step": 203280 }, { "epoch": 1.716577652994448, "grad_norm": 0.1532876044511795, "learning_rate": 5.993350425427158e-07, "loss": 0.0053, "step": 203290 }, { "epoch": 1.716662092841612, "grad_norm": 0.1664898693561554, "learning_rate": 5.989852724696243e-07, "loss": 0.0071, "step": 203300 }, { "epoch": 1.7167465326887759, "grad_norm": 0.11072289943695068, "learning_rate": 5.986355979856717e-07, "loss": 0.0059, "step": 203310 }, { "epoch": 1.7168309725359396, "grad_norm": 0.2694834768772125, "learning_rate": 5.982860190984524e-07, "loss": 0.0068, "step": 203320 }, { "epoch": 1.7169154123831036, "grad_norm": 0.42027896642684937, "learning_rate": 5.979365358155587e-07, "loss": 0.0057, "step": 203330 }, { "epoch": 1.7169998522302674, "grad_norm": 0.08707426488399506, "learning_rate": 5.975871481445811e-07, "loss": 0.0052, "step": 203340 }, { "epoch": 1.7170842920774314, "grad_norm": 0.836212694644928, "learning_rate": 5.972378560931074e-07, "loss": 0.0072, "step": 203350 }, { "epoch": 1.7171687319245952, "grad_norm": 0.30330967903137207, "learning_rate": 5.968886596687268e-07, "loss": 0.0036, "step": 203360 }, { "epoch": 1.717253171771759, "grad_norm": 0.2524656057357788, "learning_rate": 5.965395588790207e-07, "loss": 0.0167, "step": 203370 }, { "epoch": 1.717337611618923, "grad_norm": 0.6110755205154419, "learning_rate": 5.961905537315744e-07, "loss": 0.0158, "step": 203380 }, { "epoch": 1.717422051466087, "grad_norm": 0.33041825890541077, "learning_rate": 5.958416442339654e-07, "loss": 0.0037, "step": 203390 }, { "epoch": 1.7175064913132507, "grad_norm": 0.34360364079475403, "learning_rate": 5.954928303937751e-07, "loss": 0.0084, "step": 203400 }, { "epoch": 1.7175909311604145, "grad_norm": 0.026903387159109116, "learning_rate": 5.951441122185774e-07, "loss": 0.0068, "step": 203410 }, { "epoch": 1.7176753710075785, "grad_norm": 0.43299633264541626, "learning_rate": 5.947954897159469e-07, "loss": 0.0049, "step": 203420 }, { "epoch": 1.7177598108547425, "grad_norm": 0.20526568591594696, "learning_rate": 5.944469628934552e-07, "loss": 0.0058, "step": 203430 }, { "epoch": 1.7178442507019063, "grad_norm": 0.21647845208644867, "learning_rate": 5.940985317586734e-07, "loss": 0.005, "step": 203440 }, { "epoch": 1.71792869054907, "grad_norm": 0.12066876888275146, "learning_rate": 5.937501963191683e-07, "loss": 0.0048, "step": 203450 }, { "epoch": 1.7180131303962338, "grad_norm": 0.36757194995880127, "learning_rate": 5.934019565825055e-07, "loss": 0.0061, "step": 203460 }, { "epoch": 1.7180975702433978, "grad_norm": 0.11353367567062378, "learning_rate": 5.930538125562497e-07, "loss": 0.0065, "step": 203470 }, { "epoch": 1.7181820100905618, "grad_norm": 0.2970181703567505, "learning_rate": 5.927057642479611e-07, "loss": 0.0065, "step": 203480 }, { "epoch": 1.7182664499377256, "grad_norm": 0.2937351167201996, "learning_rate": 5.923578116652018e-07, "loss": 0.0058, "step": 203490 }, { "epoch": 1.7183508897848894, "grad_norm": 0.3365839123725891, "learning_rate": 5.920099548155267e-07, "loss": 0.0074, "step": 203500 }, { "epoch": 1.7184353296320534, "grad_norm": 0.2166600078344345, "learning_rate": 5.916621937064926e-07, "loss": 0.0036, "step": 203510 }, { "epoch": 1.7185197694792174, "grad_norm": 0.08350540697574615, "learning_rate": 5.913145283456517e-07, "loss": 0.0073, "step": 203520 }, { "epoch": 1.7186042093263811, "grad_norm": 0.21519556641578674, "learning_rate": 5.909669587405564e-07, "loss": 0.0028, "step": 203530 }, { "epoch": 1.718688649173545, "grad_norm": 0.1304427683353424, "learning_rate": 5.90619484898754e-07, "loss": 0.0043, "step": 203540 }, { "epoch": 1.718773089020709, "grad_norm": 0.04085157439112663, "learning_rate": 5.902721068277939e-07, "loss": 0.0076, "step": 203550 }, { "epoch": 1.718857528867873, "grad_norm": 0.5392792820930481, "learning_rate": 5.899248245352201e-07, "loss": 0.0041, "step": 203560 }, { "epoch": 1.7189419687150367, "grad_norm": 0.10632631927728653, "learning_rate": 5.895776380285756e-07, "loss": 0.0077, "step": 203570 }, { "epoch": 1.7190264085622005, "grad_norm": 0.19518150389194489, "learning_rate": 5.892305473154008e-07, "loss": 0.0083, "step": 203580 }, { "epoch": 1.7191108484093642, "grad_norm": 0.05994636192917824, "learning_rate": 5.888835524032338e-07, "loss": 0.006, "step": 203590 }, { "epoch": 1.7191952882565282, "grad_norm": 0.12097887694835663, "learning_rate": 5.885366532996134e-07, "loss": 0.0084, "step": 203600 }, { "epoch": 1.7192797281036922, "grad_norm": 0.019122235476970673, "learning_rate": 5.881898500120715e-07, "loss": 0.0073, "step": 203610 }, { "epoch": 1.719364167950856, "grad_norm": 0.017972107976675034, "learning_rate": 5.878431425481429e-07, "loss": 0.003, "step": 203620 }, { "epoch": 1.7194486077980198, "grad_norm": 0.4763021469116211, "learning_rate": 5.874965309153563e-07, "loss": 0.0071, "step": 203630 }, { "epoch": 1.7195330476451838, "grad_norm": 0.0919172614812851, "learning_rate": 5.871500151212417e-07, "loss": 0.0087, "step": 203640 }, { "epoch": 1.7196174874923478, "grad_norm": 0.31151899695396423, "learning_rate": 5.868035951733247e-07, "loss": 0.0043, "step": 203650 }, { "epoch": 1.7197019273395115, "grad_norm": 0.7237499356269836, "learning_rate": 5.864572710791289e-07, "loss": 0.0056, "step": 203660 }, { "epoch": 1.7197863671866753, "grad_norm": 0.1424119770526886, "learning_rate": 5.861110428461764e-07, "loss": 0.0049, "step": 203670 }, { "epoch": 1.7198708070338393, "grad_norm": 0.009148101322352886, "learning_rate": 5.85764910481988e-07, "loss": 0.0026, "step": 203680 }, { "epoch": 1.719955246881003, "grad_norm": 0.349148690700531, "learning_rate": 5.854188739940814e-07, "loss": 0.0102, "step": 203690 }, { "epoch": 1.720039686728167, "grad_norm": 0.3751187026500702, "learning_rate": 5.850729333899713e-07, "loss": 0.0084, "step": 203700 }, { "epoch": 1.7201241265753309, "grad_norm": 0.0021535875275731087, "learning_rate": 5.847270886771733e-07, "loss": 0.0054, "step": 203710 }, { "epoch": 1.7202085664224946, "grad_norm": 0.087384894490242, "learning_rate": 5.84381339863197e-07, "loss": 0.0051, "step": 203720 }, { "epoch": 1.7202930062696586, "grad_norm": 0.3653983175754547, "learning_rate": 5.840356869555542e-07, "loss": 0.0055, "step": 203730 }, { "epoch": 1.7203774461168226, "grad_norm": 0.5358380079269409, "learning_rate": 5.836901299617515e-07, "loss": 0.0057, "step": 203740 }, { "epoch": 1.7204618859639864, "grad_norm": 0.2849360406398773, "learning_rate": 5.833446688892941e-07, "loss": 0.0042, "step": 203750 }, { "epoch": 1.7205463258111502, "grad_norm": 0.4027312695980072, "learning_rate": 5.829993037456844e-07, "loss": 0.0047, "step": 203760 }, { "epoch": 1.7206307656583142, "grad_norm": 0.2531573474407196, "learning_rate": 5.826540345384258e-07, "loss": 0.0029, "step": 203770 }, { "epoch": 1.7207152055054782, "grad_norm": 0.10895035415887833, "learning_rate": 5.82308861275015e-07, "loss": 0.003, "step": 203780 }, { "epoch": 1.720799645352642, "grad_norm": 0.4622771143913269, "learning_rate": 5.819637839629522e-07, "loss": 0.0072, "step": 203790 }, { "epoch": 1.7208840851998057, "grad_norm": 0.27069908380508423, "learning_rate": 5.816188026097291e-07, "loss": 0.0065, "step": 203800 }, { "epoch": 1.7209685250469695, "grad_norm": 0.22952602803707123, "learning_rate": 5.81273917222841e-07, "loss": 0.0073, "step": 203810 }, { "epoch": 1.7210529648941335, "grad_norm": 0.5679394006729126, "learning_rate": 5.809291278097795e-07, "loss": 0.0072, "step": 203820 }, { "epoch": 1.7211374047412975, "grad_norm": 0.30309733748435974, "learning_rate": 5.805844343780298e-07, "loss": 0.0088, "step": 203830 }, { "epoch": 1.7212218445884613, "grad_norm": 0.05991014838218689, "learning_rate": 5.802398369350809e-07, "loss": 0.0069, "step": 203840 }, { "epoch": 1.721306284435625, "grad_norm": 0.4518982768058777, "learning_rate": 5.798953354884168e-07, "loss": 0.0062, "step": 203850 }, { "epoch": 1.721390724282789, "grad_norm": 0.48139485716819763, "learning_rate": 5.795509300455209e-07, "loss": 0.007, "step": 203860 }, { "epoch": 1.721475164129953, "grad_norm": 0.3024638891220093, "learning_rate": 5.792066206138719e-07, "loss": 0.01, "step": 203870 }, { "epoch": 1.7215596039771168, "grad_norm": 0.19733496010303497, "learning_rate": 5.788624072009502e-07, "loss": 0.0036, "step": 203880 }, { "epoch": 1.7216440438242806, "grad_norm": 0.17534954845905304, "learning_rate": 5.785182898142311e-07, "loss": 0.0067, "step": 203890 }, { "epoch": 1.7217284836714446, "grad_norm": 0.4137745201587677, "learning_rate": 5.781742684611885e-07, "loss": 0.0044, "step": 203900 }, { "epoch": 1.7218129235186086, "grad_norm": 0.08906836807727814, "learning_rate": 5.778303431492943e-07, "loss": 0.0033, "step": 203910 }, { "epoch": 1.7218973633657724, "grad_norm": 0.24499671161174774, "learning_rate": 5.774865138860192e-07, "loss": 0.0053, "step": 203920 }, { "epoch": 1.7219818032129361, "grad_norm": 0.12801818549633026, "learning_rate": 5.77142780678831e-07, "loss": 0.0035, "step": 203930 }, { "epoch": 1.7220662430601, "grad_norm": 0.16233401000499725, "learning_rate": 5.767991435351944e-07, "loss": 0.0038, "step": 203940 }, { "epoch": 1.722150682907264, "grad_norm": 0.2531115412712097, "learning_rate": 5.764556024625751e-07, "loss": 0.0038, "step": 203950 }, { "epoch": 1.722235122754428, "grad_norm": 0.2464779168367386, "learning_rate": 5.76112157468432e-07, "loss": 0.0036, "step": 203960 }, { "epoch": 1.7223195626015917, "grad_norm": 0.13201546669006348, "learning_rate": 5.757688085602281e-07, "loss": 0.0138, "step": 203970 }, { "epoch": 1.7224040024487555, "grad_norm": 0.07359634339809418, "learning_rate": 5.754255557454186e-07, "loss": 0.0037, "step": 203980 }, { "epoch": 1.7224884422959195, "grad_norm": 0.0005484460270963609, "learning_rate": 5.750823990314591e-07, "loss": 0.0021, "step": 203990 }, { "epoch": 1.7225728821430835, "grad_norm": 0.21987178921699524, "learning_rate": 5.747393384258027e-07, "loss": 0.0041, "step": 204000 }, { "epoch": 1.7226573219902472, "grad_norm": 0.2631534934043884, "learning_rate": 5.743963739359016e-07, "loss": 0.0043, "step": 204010 }, { "epoch": 1.722741761837411, "grad_norm": 0.003574164118617773, "learning_rate": 5.740535055692036e-07, "loss": 0.0039, "step": 204020 }, { "epoch": 1.7228262016845748, "grad_norm": 0.3609747588634491, "learning_rate": 5.737107333331576e-07, "loss": 0.0038, "step": 204030 }, { "epoch": 1.7229106415317388, "grad_norm": 0.038062017410993576, "learning_rate": 5.733680572352073e-07, "loss": 0.0063, "step": 204040 }, { "epoch": 1.7229950813789028, "grad_norm": 0.24703222513198853, "learning_rate": 5.730254772827948e-07, "loss": 0.0065, "step": 204050 }, { "epoch": 1.7230795212260666, "grad_norm": 0.22752371430397034, "learning_rate": 5.726829934833627e-07, "loss": 0.0063, "step": 204060 }, { "epoch": 1.7231639610732303, "grad_norm": 0.11863359808921814, "learning_rate": 5.723406058443487e-07, "loss": 0.0064, "step": 204070 }, { "epoch": 1.7232484009203943, "grad_norm": 0.37617015838623047, "learning_rate": 5.719983143731894e-07, "loss": 0.0085, "step": 204080 }, { "epoch": 1.7233328407675583, "grad_norm": 0.12911711633205414, "learning_rate": 5.716561190773185e-07, "loss": 0.0065, "step": 204090 }, { "epoch": 1.723417280614722, "grad_norm": 0.25543835759162903, "learning_rate": 5.713140199641698e-07, "loss": 0.0045, "step": 204100 }, { "epoch": 1.7235017204618859, "grad_norm": 0.027767637744545937, "learning_rate": 5.709720170411725e-07, "loss": 0.0024, "step": 204110 }, { "epoch": 1.7235861603090499, "grad_norm": 0.6863728165626526, "learning_rate": 5.706301103157564e-07, "loss": 0.007, "step": 204120 }, { "epoch": 1.7236706001562139, "grad_norm": 0.1945432871580124, "learning_rate": 5.702882997953452e-07, "loss": 0.0104, "step": 204130 }, { "epoch": 1.7237550400033776, "grad_norm": 0.2362293303012848, "learning_rate": 5.69946585487367e-07, "loss": 0.0084, "step": 204140 }, { "epoch": 1.7238394798505414, "grad_norm": 0.08100870251655579, "learning_rate": 5.696049673992387e-07, "loss": 0.0128, "step": 204150 }, { "epoch": 1.7239239196977052, "grad_norm": 0.2842606008052826, "learning_rate": 5.692634455383838e-07, "loss": 0.008, "step": 204160 }, { "epoch": 1.7240083595448692, "grad_norm": 0.26006075739860535, "learning_rate": 5.689220199122192e-07, "loss": 0.0082, "step": 204170 }, { "epoch": 1.7240927993920332, "grad_norm": 0.24354827404022217, "learning_rate": 5.685806905281588e-07, "loss": 0.0034, "step": 204180 }, { "epoch": 1.724177239239197, "grad_norm": 0.036133140325546265, "learning_rate": 5.68239457393619e-07, "loss": 0.0058, "step": 204190 }, { "epoch": 1.7242616790863607, "grad_norm": 0.8597552180290222, "learning_rate": 5.678983205160088e-07, "loss": 0.0041, "step": 204200 }, { "epoch": 1.7243461189335247, "grad_norm": 0.19744279980659485, "learning_rate": 5.675572799027401e-07, "loss": 0.0065, "step": 204210 }, { "epoch": 1.7244305587806887, "grad_norm": 0.2085847109556198, "learning_rate": 5.672163355612193e-07, "loss": 0.0047, "step": 204220 }, { "epoch": 1.7245149986278525, "grad_norm": 0.2555058002471924, "learning_rate": 5.668754874988508e-07, "loss": 0.0034, "step": 204230 }, { "epoch": 1.7245994384750163, "grad_norm": 0.2145465761423111, "learning_rate": 5.665347357230372e-07, "loss": 0.0036, "step": 204240 }, { "epoch": 1.7246838783221803, "grad_norm": 0.30117934942245483, "learning_rate": 5.661940802411819e-07, "loss": 0.0066, "step": 204250 }, { "epoch": 1.724768318169344, "grad_norm": 0.454629123210907, "learning_rate": 5.658535210606813e-07, "loss": 0.01, "step": 204260 }, { "epoch": 1.724852758016508, "grad_norm": 0.09248178452253342, "learning_rate": 5.655130581889351e-07, "loss": 0.0025, "step": 204270 }, { "epoch": 1.7249371978636718, "grad_norm": 0.42101210355758667, "learning_rate": 5.651726916333361e-07, "loss": 0.0047, "step": 204280 }, { "epoch": 1.7250216377108356, "grad_norm": 0.0012502525933086872, "learning_rate": 5.648324214012768e-07, "loss": 0.0069, "step": 204290 }, { "epoch": 1.7251060775579996, "grad_norm": 0.09326779097318649, "learning_rate": 5.644922475001497e-07, "loss": 0.0084, "step": 204300 }, { "epoch": 1.7251905174051636, "grad_norm": 0.3458123505115509, "learning_rate": 5.641521699373414e-07, "loss": 0.01, "step": 204310 }, { "epoch": 1.7252749572523274, "grad_norm": 0.296207457780838, "learning_rate": 5.638121887202392e-07, "loss": 0.0041, "step": 204320 }, { "epoch": 1.7253593970994912, "grad_norm": 0.1666547805070877, "learning_rate": 5.634723038562262e-07, "loss": 0.0104, "step": 204330 }, { "epoch": 1.7254438369466552, "grad_norm": 0.1256430298089981, "learning_rate": 5.631325153526868e-07, "loss": 0.0066, "step": 204340 }, { "epoch": 1.7255282767938191, "grad_norm": 0.5636076331138611, "learning_rate": 5.627928232169988e-07, "loss": 0.0046, "step": 204350 }, { "epoch": 1.725612716640983, "grad_norm": 0.30513662099838257, "learning_rate": 5.624532274565425e-07, "loss": 0.0052, "step": 204360 }, { "epoch": 1.7256971564881467, "grad_norm": 0.04119737818837166, "learning_rate": 5.62113728078692e-07, "loss": 0.0055, "step": 204370 }, { "epoch": 1.7257815963353105, "grad_norm": 0.10062414407730103, "learning_rate": 5.617743250908226e-07, "loss": 0.0047, "step": 204380 }, { "epoch": 1.7258660361824745, "grad_norm": 0.08213609457015991, "learning_rate": 5.614350185003053e-07, "loss": 0.0105, "step": 204390 }, { "epoch": 1.7259504760296385, "grad_norm": 0.2166948765516281, "learning_rate": 5.610958083145102e-07, "loss": 0.0076, "step": 204400 }, { "epoch": 1.7260349158768022, "grad_norm": 0.2201981395483017, "learning_rate": 5.607566945408039e-07, "loss": 0.007, "step": 204410 }, { "epoch": 1.726119355723966, "grad_norm": 0.34147176146507263, "learning_rate": 5.604176771865522e-07, "loss": 0.0073, "step": 204420 }, { "epoch": 1.72620379557113, "grad_norm": 0.2361835092306137, "learning_rate": 5.600787562591192e-07, "loss": 0.0063, "step": 204430 }, { "epoch": 1.726288235418294, "grad_norm": 0.41730937361717224, "learning_rate": 5.597399317658653e-07, "loss": 0.0065, "step": 204440 }, { "epoch": 1.7263726752654578, "grad_norm": 0.27315470576286316, "learning_rate": 5.594012037141511e-07, "loss": 0.0054, "step": 204450 }, { "epoch": 1.7264571151126216, "grad_norm": 0.06726057827472687, "learning_rate": 5.590625721113325e-07, "loss": 0.0049, "step": 204460 }, { "epoch": 1.7265415549597856, "grad_norm": 0.4257272779941559, "learning_rate": 5.587240369647651e-07, "loss": 0.0039, "step": 204470 }, { "epoch": 1.7266259948069496, "grad_norm": 0.24630610644817352, "learning_rate": 5.583855982818004e-07, "loss": 0.0047, "step": 204480 }, { "epoch": 1.7267104346541133, "grad_norm": 0.04158070310950279, "learning_rate": 5.580472560697909e-07, "loss": 0.0066, "step": 204490 }, { "epoch": 1.726794874501277, "grad_norm": 0.13147714734077454, "learning_rate": 5.577090103360844e-07, "loss": 0.0077, "step": 204500 }, { "epoch": 1.7268793143484409, "grad_norm": 0.3975057005882263, "learning_rate": 5.573708610880286e-07, "loss": 0.0031, "step": 204510 }, { "epoch": 1.7269637541956049, "grad_norm": 0.5330538153648376, "learning_rate": 5.570328083329674e-07, "loss": 0.0055, "step": 204520 }, { "epoch": 1.7270481940427689, "grad_norm": 0.32047295570373535, "learning_rate": 5.566948520782423e-07, "loss": 0.0076, "step": 204530 }, { "epoch": 1.7271326338899327, "grad_norm": 0.3322320580482483, "learning_rate": 5.563569923311951e-07, "loss": 0.004, "step": 204540 }, { "epoch": 1.7272170737370964, "grad_norm": 0.11927733570337296, "learning_rate": 5.560192290991633e-07, "loss": 0.0098, "step": 204550 }, { "epoch": 1.7273015135842604, "grad_norm": 0.17492733895778656, "learning_rate": 5.55681562389484e-07, "loss": 0.0044, "step": 204560 }, { "epoch": 1.7273859534314244, "grad_norm": 0.40548175573349, "learning_rate": 5.553439922094888e-07, "loss": 0.0052, "step": 204570 }, { "epoch": 1.7274703932785882, "grad_norm": 0.008317412808537483, "learning_rate": 5.550065185665127e-07, "loss": 0.0033, "step": 204580 }, { "epoch": 1.727554833125752, "grad_norm": 0.15728355944156647, "learning_rate": 5.546691414678828e-07, "loss": 0.0061, "step": 204590 }, { "epoch": 1.727639272972916, "grad_norm": 0.21449507772922516, "learning_rate": 5.543318609209291e-07, "loss": 0.0059, "step": 204600 }, { "epoch": 1.7277237128200797, "grad_norm": 0.36216652393341064, "learning_rate": 5.539946769329757e-07, "loss": 0.0069, "step": 204610 }, { "epoch": 1.7278081526672437, "grad_norm": 0.4342421293258667, "learning_rate": 5.536575895113477e-07, "loss": 0.0081, "step": 204620 }, { "epoch": 1.7278925925144075, "grad_norm": 0.25760239362716675, "learning_rate": 5.533205986633655e-07, "loss": 0.005, "step": 204630 }, { "epoch": 1.7279770323615713, "grad_norm": 0.048501431941986084, "learning_rate": 5.529837043963493e-07, "loss": 0.0085, "step": 204640 }, { "epoch": 1.7280614722087353, "grad_norm": 0.1635485142469406, "learning_rate": 5.526469067176154e-07, "loss": 0.0056, "step": 204650 }, { "epoch": 1.7281459120558993, "grad_norm": 0.3419742286205292, "learning_rate": 5.523102056344781e-07, "loss": 0.0074, "step": 204660 }, { "epoch": 1.728230351903063, "grad_norm": 0.24343453347682953, "learning_rate": 5.519736011542531e-07, "loss": 0.007, "step": 204670 }, { "epoch": 1.7283147917502268, "grad_norm": 0.4753543436527252, "learning_rate": 5.51637093284249e-07, "loss": 0.0105, "step": 204680 }, { "epoch": 1.7283992315973908, "grad_norm": 0.31319567561149597, "learning_rate": 5.513006820317762e-07, "loss": 0.0047, "step": 204690 }, { "epoch": 1.7284836714445548, "grad_norm": 0.11280389130115509, "learning_rate": 5.509643674041404e-07, "loss": 0.0035, "step": 204700 }, { "epoch": 1.7285681112917186, "grad_norm": 0.0697961077094078, "learning_rate": 5.50628149408648e-07, "loss": 0.0079, "step": 204710 }, { "epoch": 1.7286525511388824, "grad_norm": 0.5361480712890625, "learning_rate": 5.502920280526003e-07, "loss": 0.0072, "step": 204720 }, { "epoch": 1.7287369909860462, "grad_norm": 0.42887356877326965, "learning_rate": 5.499560033432977e-07, "loss": 0.0108, "step": 204730 }, { "epoch": 1.7288214308332102, "grad_norm": 0.41708865761756897, "learning_rate": 5.496200752880377e-07, "loss": 0.0075, "step": 204740 }, { "epoch": 1.7289058706803742, "grad_norm": 0.09285682439804077, "learning_rate": 5.492842438941188e-07, "loss": 0.0051, "step": 204750 }, { "epoch": 1.728990310527538, "grad_norm": 0.5468642115592957, "learning_rate": 5.48948509168834e-07, "loss": 0.004, "step": 204760 }, { "epoch": 1.7290747503747017, "grad_norm": 0.26643437147140503, "learning_rate": 5.486128711194749e-07, "loss": 0.0107, "step": 204770 }, { "epoch": 1.7291591902218657, "grad_norm": 0.003456325037404895, "learning_rate": 5.482773297533328e-07, "loss": 0.0045, "step": 204780 }, { "epoch": 1.7292436300690297, "grad_norm": 0.0969933345913887, "learning_rate": 5.479418850776946e-07, "loss": 0.0026, "step": 204790 }, { "epoch": 1.7293280699161935, "grad_norm": 0.36824578046798706, "learning_rate": 5.476065370998468e-07, "loss": 0.0048, "step": 204800 }, { "epoch": 1.7294125097633573, "grad_norm": 0.3755458891391754, "learning_rate": 5.472712858270718e-07, "loss": 0.0086, "step": 204810 }, { "epoch": 1.7294969496105213, "grad_norm": 0.47580331563949585, "learning_rate": 5.469361312666521e-07, "loss": 0.0042, "step": 204820 }, { "epoch": 1.7295813894576852, "grad_norm": 0.2840750217437744, "learning_rate": 5.46601073425867e-07, "loss": 0.0046, "step": 204830 }, { "epoch": 1.729665829304849, "grad_norm": 0.18517239391803741, "learning_rate": 5.462661123119945e-07, "loss": 0.0053, "step": 204840 }, { "epoch": 1.7297502691520128, "grad_norm": 0.09099981188774109, "learning_rate": 5.459312479323087e-07, "loss": 0.0035, "step": 204850 }, { "epoch": 1.7298347089991766, "grad_norm": 0.16491740942001343, "learning_rate": 5.455964802940844e-07, "loss": 0.0122, "step": 204860 }, { "epoch": 1.7299191488463406, "grad_norm": 0.07926394790410995, "learning_rate": 5.452618094045914e-07, "loss": 0.0041, "step": 204870 }, { "epoch": 1.7300035886935046, "grad_norm": 0.00898733176290989, "learning_rate": 5.449272352710993e-07, "loss": 0.0062, "step": 204880 }, { "epoch": 1.7300880285406683, "grad_norm": 0.564253032207489, "learning_rate": 5.445927579008747e-07, "loss": 0.004, "step": 204890 }, { "epoch": 1.7301724683878321, "grad_norm": 0.20101147890090942, "learning_rate": 5.442583773011816e-07, "loss": 0.0066, "step": 204900 }, { "epoch": 1.7302569082349961, "grad_norm": 0.39929065108299255, "learning_rate": 5.439240934792844e-07, "loss": 0.0078, "step": 204910 }, { "epoch": 1.7303413480821601, "grad_norm": 0.23538251221179962, "learning_rate": 5.435899064424416e-07, "loss": 0.0024, "step": 204920 }, { "epoch": 1.7304257879293239, "grad_norm": 0.011652501299977303, "learning_rate": 5.43255816197914e-07, "loss": 0.006, "step": 204930 }, { "epoch": 1.7305102277764877, "grad_norm": 0.5342100858688354, "learning_rate": 5.429218227529553e-07, "loss": 0.0072, "step": 204940 }, { "epoch": 1.7305946676236514, "grad_norm": 0.3954441547393799, "learning_rate": 5.425879261148232e-07, "loss": 0.0084, "step": 204950 }, { "epoch": 1.7306791074708154, "grad_norm": 0.020585665479302406, "learning_rate": 5.422541262907672e-07, "loss": 0.006, "step": 204960 }, { "epoch": 1.7307635473179794, "grad_norm": 0.07315046340227127, "learning_rate": 5.419204232880381e-07, "loss": 0.0105, "step": 204970 }, { "epoch": 1.7308479871651432, "grad_norm": 0.005394382867962122, "learning_rate": 5.415868171138833e-07, "loss": 0.0037, "step": 204980 }, { "epoch": 1.730932427012307, "grad_norm": 0.2902320623397827, "learning_rate": 5.4125330777555e-07, "loss": 0.0082, "step": 204990 }, { "epoch": 1.731016866859471, "grad_norm": 0.05989059805870056, "learning_rate": 5.409198952802808e-07, "loss": 0.004, "step": 205000 }, { "epoch": 1.731101306706635, "grad_norm": 0.0792713314294815, "learning_rate": 5.405865796353171e-07, "loss": 0.0084, "step": 205010 }, { "epoch": 1.7311857465537988, "grad_norm": 0.21174216270446777, "learning_rate": 5.402533608478999e-07, "loss": 0.0043, "step": 205020 }, { "epoch": 1.7312701864009625, "grad_norm": 0.1868610829114914, "learning_rate": 5.399202389252651e-07, "loss": 0.0047, "step": 205030 }, { "epoch": 1.7313546262481265, "grad_norm": 0.462200790643692, "learning_rate": 5.395872138746505e-07, "loss": 0.0105, "step": 205040 }, { "epoch": 1.7314390660952905, "grad_norm": 0.2707598805427551, "learning_rate": 5.392542857032857e-07, "loss": 0.0057, "step": 205050 }, { "epoch": 1.7315235059424543, "grad_norm": 0.02248859964311123, "learning_rate": 5.389214544184046e-07, "loss": 0.006, "step": 205060 }, { "epoch": 1.731607945789618, "grad_norm": 0.18699905276298523, "learning_rate": 5.385887200272349e-07, "loss": 0.007, "step": 205070 }, { "epoch": 1.7316923856367819, "grad_norm": 0.5921980738639832, "learning_rate": 5.382560825370042e-07, "loss": 0.0086, "step": 205080 }, { "epoch": 1.7317768254839458, "grad_norm": 0.10257245600223541, "learning_rate": 5.379235419549367e-07, "loss": 0.0066, "step": 205090 }, { "epoch": 1.7318612653311098, "grad_norm": 0.6155782341957092, "learning_rate": 5.37591098288256e-07, "loss": 0.0052, "step": 205100 }, { "epoch": 1.7319457051782736, "grad_norm": 0.14011608064174652, "learning_rate": 5.372587515441824e-07, "loss": 0.0051, "step": 205110 }, { "epoch": 1.7320301450254374, "grad_norm": 0.11145371943712234, "learning_rate": 5.369265017299341e-07, "loss": 0.0047, "step": 205120 }, { "epoch": 1.7321145848726014, "grad_norm": 0.2028665691614151, "learning_rate": 5.365943488527281e-07, "loss": 0.0077, "step": 205130 }, { "epoch": 1.7321990247197654, "grad_norm": 0.13623002171516418, "learning_rate": 5.362622929197769e-07, "loss": 0.0052, "step": 205140 }, { "epoch": 1.7322834645669292, "grad_norm": 0.3785041868686676, "learning_rate": 5.359303339382949e-07, "loss": 0.0077, "step": 205150 }, { "epoch": 1.732367904414093, "grad_norm": 0.15544554591178894, "learning_rate": 5.3559847191549e-07, "loss": 0.0059, "step": 205160 }, { "epoch": 1.732452344261257, "grad_norm": 0.1383344829082489, "learning_rate": 5.352667068585726e-07, "loss": 0.004, "step": 205170 }, { "epoch": 1.7325367841084207, "grad_norm": 0.40088951587677, "learning_rate": 5.34935038774747e-07, "loss": 0.0078, "step": 205180 }, { "epoch": 1.7326212239555847, "grad_norm": 0.3715192675590515, "learning_rate": 5.34603467671218e-07, "loss": 0.0065, "step": 205190 }, { "epoch": 1.7327056638027485, "grad_norm": 0.29350030422210693, "learning_rate": 5.342719935551865e-07, "loss": 0.0112, "step": 205200 }, { "epoch": 1.7327901036499123, "grad_norm": 0.17087788879871368, "learning_rate": 5.339406164338523e-07, "loss": 0.0036, "step": 205210 }, { "epoch": 1.7328745434970763, "grad_norm": 0.2090778797864914, "learning_rate": 5.336093363144123e-07, "loss": 0.0063, "step": 205220 }, { "epoch": 1.7329589833442403, "grad_norm": 0.4135516285896301, "learning_rate": 5.332781532040621e-07, "loss": 0.0064, "step": 205230 }, { "epoch": 1.733043423191404, "grad_norm": 0.20384109020233154, "learning_rate": 5.329470671099957e-07, "loss": 0.0042, "step": 205240 }, { "epoch": 1.7331278630385678, "grad_norm": 0.10273367166519165, "learning_rate": 5.326160780394024e-07, "loss": 0.0029, "step": 205250 }, { "epoch": 1.7332123028857318, "grad_norm": 0.27639538049697876, "learning_rate": 5.322851859994738e-07, "loss": 0.0095, "step": 205260 }, { "epoch": 1.7332967427328958, "grad_norm": 0.21477459371089935, "learning_rate": 5.31954390997394e-07, "loss": 0.005, "step": 205270 }, { "epoch": 1.7333811825800596, "grad_norm": 0.09056713432073593, "learning_rate": 5.316236930403506e-07, "loss": 0.0113, "step": 205280 }, { "epoch": 1.7334656224272234, "grad_norm": 0.27187836170196533, "learning_rate": 5.312930921355247e-07, "loss": 0.0052, "step": 205290 }, { "epoch": 1.7335500622743871, "grad_norm": 0.24322177469730377, "learning_rate": 5.309625882900976e-07, "loss": 0.008, "step": 205300 }, { "epoch": 1.7336345021215511, "grad_norm": 0.2283494770526886, "learning_rate": 5.306321815112458e-07, "loss": 0.0057, "step": 205310 }, { "epoch": 1.7337189419687151, "grad_norm": 0.23463571071624756, "learning_rate": 5.303018718061481e-07, "loss": 0.0043, "step": 205320 }, { "epoch": 1.733803381815879, "grad_norm": 0.46500205993652344, "learning_rate": 5.29971659181977e-07, "loss": 0.0039, "step": 205330 }, { "epoch": 1.7338878216630427, "grad_norm": 0.2795797288417816, "learning_rate": 5.296415436459063e-07, "loss": 0.0053, "step": 205340 }, { "epoch": 1.7339722615102067, "grad_norm": 0.04692850634455681, "learning_rate": 5.293115252051057e-07, "loss": 0.004, "step": 205350 }, { "epoch": 1.7340567013573707, "grad_norm": 0.11713121831417084, "learning_rate": 5.289816038667422e-07, "loss": 0.0025, "step": 205360 }, { "epoch": 1.7341411412045344, "grad_norm": 1.4900295734405518, "learning_rate": 5.286517796379819e-07, "loss": 0.006, "step": 205370 }, { "epoch": 1.7342255810516982, "grad_norm": 0.35897859930992126, "learning_rate": 5.28322052525988e-07, "loss": 0.0059, "step": 205380 }, { "epoch": 1.7343100208988622, "grad_norm": 0.13217788934707642, "learning_rate": 5.27992422537924e-07, "loss": 0.0063, "step": 205390 }, { "epoch": 1.7343944607460262, "grad_norm": 0.22415019571781158, "learning_rate": 5.276628896809471e-07, "loss": 0.0063, "step": 205400 }, { "epoch": 1.73447890059319, "grad_norm": 0.12837573885917664, "learning_rate": 5.273334539622165e-07, "loss": 0.006, "step": 205410 }, { "epoch": 1.7345633404403538, "grad_norm": 0.581489086151123, "learning_rate": 5.270041153888855e-07, "loss": 0.0091, "step": 205420 }, { "epoch": 1.7346477802875175, "grad_norm": 0.2210930585861206, "learning_rate": 5.266748739681099e-07, "loss": 0.0072, "step": 205430 }, { "epoch": 1.7347322201346815, "grad_norm": 0.14828045666217804, "learning_rate": 5.263457297070396e-07, "loss": 0.006, "step": 205440 }, { "epoch": 1.7348166599818455, "grad_norm": 0.471072793006897, "learning_rate": 5.260166826128227e-07, "loss": 0.007, "step": 205450 }, { "epoch": 1.7349010998290093, "grad_norm": 1.107719898223877, "learning_rate": 5.256877326926074e-07, "loss": 0.0105, "step": 205460 }, { "epoch": 1.734985539676173, "grad_norm": 0.021222930401563644, "learning_rate": 5.253588799535358e-07, "loss": 0.0075, "step": 205470 }, { "epoch": 1.735069979523337, "grad_norm": 0.013968459330499172, "learning_rate": 5.25030124402754e-07, "loss": 0.0031, "step": 205480 }, { "epoch": 1.735154419370501, "grad_norm": 0.16904816031455994, "learning_rate": 5.247014660473998e-07, "loss": 0.0028, "step": 205490 }, { "epoch": 1.7352388592176649, "grad_norm": 0.23489409685134888, "learning_rate": 5.243729048946133e-07, "loss": 0.006, "step": 205500 }, { "epoch": 1.7353232990648286, "grad_norm": 0.023010673001408577, "learning_rate": 5.2404444095153e-07, "loss": 0.0046, "step": 205510 }, { "epoch": 1.7354077389119924, "grad_norm": 0.2377074956893921, "learning_rate": 5.237160742252844e-07, "loss": 0.0081, "step": 205520 }, { "epoch": 1.7354921787591564, "grad_norm": 0.37512022256851196, "learning_rate": 5.233878047230084e-07, "loss": 0.0072, "step": 205530 }, { "epoch": 1.7355766186063204, "grad_norm": 0.33253225684165955, "learning_rate": 5.230596324518328e-07, "loss": 0.0071, "step": 205540 }, { "epoch": 1.7356610584534842, "grad_norm": 0.050185397267341614, "learning_rate": 5.227315574188829e-07, "loss": 0.0033, "step": 205550 }, { "epoch": 1.735745498300648, "grad_norm": 0.191918283700943, "learning_rate": 5.224035796312871e-07, "loss": 0.008, "step": 205560 }, { "epoch": 1.735829938147812, "grad_norm": 0.16758038103580475, "learning_rate": 5.220756990961667e-07, "loss": 0.0052, "step": 205570 }, { "epoch": 1.735914377994976, "grad_norm": 0.29606732726097107, "learning_rate": 5.217479158206463e-07, "loss": 0.0049, "step": 205580 }, { "epoch": 1.7359988178421397, "grad_norm": 0.21417604386806488, "learning_rate": 5.214202298118432e-07, "loss": 0.0081, "step": 205590 }, { "epoch": 1.7360832576893035, "grad_norm": 0.27981096506118774, "learning_rate": 5.210926410768735e-07, "loss": 0.0057, "step": 205600 }, { "epoch": 1.7361676975364675, "grad_norm": 0.37364909052848816, "learning_rate": 5.207651496228555e-07, "loss": 0.0064, "step": 205610 }, { "epoch": 1.7362521373836315, "grad_norm": 0.36578914523124695, "learning_rate": 5.204377554569001e-07, "loss": 0.0123, "step": 205620 }, { "epoch": 1.7363365772307953, "grad_norm": 0.215984508395195, "learning_rate": 5.201104585861189e-07, "loss": 0.0053, "step": 205630 }, { "epoch": 1.736421017077959, "grad_norm": 0.4050317704677582, "learning_rate": 5.197832590176194e-07, "loss": 0.0067, "step": 205640 }, { "epoch": 1.7365054569251228, "grad_norm": 0.4903091788291931, "learning_rate": 5.194561567585104e-07, "loss": 0.0116, "step": 205650 }, { "epoch": 1.7365898967722868, "grad_norm": 0.24310052394866943, "learning_rate": 5.191291518158948e-07, "loss": 0.0042, "step": 205660 }, { "epoch": 1.7366743366194508, "grad_norm": 0.19948409497737885, "learning_rate": 5.188022441968765e-07, "loss": 0.0057, "step": 205670 }, { "epoch": 1.7367587764666146, "grad_norm": 0.5035216212272644, "learning_rate": 5.184754339085552e-07, "loss": 0.0082, "step": 205680 }, { "epoch": 1.7368432163137784, "grad_norm": 0.09443892538547516, "learning_rate": 5.181487209580294e-07, "loss": 0.0068, "step": 205690 }, { "epoch": 1.7369276561609424, "grad_norm": 0.1788904219865799, "learning_rate": 5.178221053523946e-07, "loss": 0.0058, "step": 205700 }, { "epoch": 1.7370120960081064, "grad_norm": 0.5393658876419067, "learning_rate": 5.174955870987441e-07, "loss": 0.0086, "step": 205710 }, { "epoch": 1.7370965358552701, "grad_norm": 0.5268831253051758, "learning_rate": 5.171691662041723e-07, "loss": 0.011, "step": 205720 }, { "epoch": 1.737180975702434, "grad_norm": 0.21503472328186035, "learning_rate": 5.168428426757665e-07, "loss": 0.003, "step": 205730 }, { "epoch": 1.737265415549598, "grad_norm": 0.2465408742427826, "learning_rate": 5.165166165206165e-07, "loss": 0.0053, "step": 205740 }, { "epoch": 1.7373498553967617, "grad_norm": 0.16379697620868683, "learning_rate": 5.161904877458057e-07, "loss": 0.0038, "step": 205750 }, { "epoch": 1.7374342952439257, "grad_norm": 0.07656274735927582, "learning_rate": 5.158644563584198e-07, "loss": 0.0064, "step": 205760 }, { "epoch": 1.7375187350910895, "grad_norm": 0.13881975412368774, "learning_rate": 5.15538522365539e-07, "loss": 0.0036, "step": 205770 }, { "epoch": 1.7376031749382532, "grad_norm": 0.13337057828903198, "learning_rate": 5.152126857742423e-07, "loss": 0.0055, "step": 205780 }, { "epoch": 1.7376876147854172, "grad_norm": 0.20568899810314178, "learning_rate": 5.148869465916063e-07, "loss": 0.0079, "step": 205790 }, { "epoch": 1.7377720546325812, "grad_norm": 0.03921296074986458, "learning_rate": 5.145613048247077e-07, "loss": 0.0044, "step": 205800 }, { "epoch": 1.737856494479745, "grad_norm": 0.18546487390995026, "learning_rate": 5.142357604806175e-07, "loss": 0.004, "step": 205810 }, { "epoch": 1.7379409343269088, "grad_norm": 0.5084309577941895, "learning_rate": 5.139103135664081e-07, "loss": 0.0076, "step": 205820 }, { "epoch": 1.7380253741740728, "grad_norm": 0.18379732966423035, "learning_rate": 5.135849640891477e-07, "loss": 0.0049, "step": 205830 }, { "epoch": 1.7381098140212368, "grad_norm": 0.10361236333847046, "learning_rate": 5.132597120559013e-07, "loss": 0.0035, "step": 205840 }, { "epoch": 1.7381942538684005, "grad_norm": 0.0776888057589531, "learning_rate": 5.129345574737355e-07, "loss": 0.0051, "step": 205850 }, { "epoch": 1.7382786937155643, "grad_norm": 0.34960976243019104, "learning_rate": 5.126095003497117e-07, "loss": 0.0065, "step": 205860 }, { "epoch": 1.738363133562728, "grad_norm": 0.39150699973106384, "learning_rate": 5.122845406908899e-07, "loss": 0.0043, "step": 205870 }, { "epoch": 1.738447573409892, "grad_norm": 0.20337742567062378, "learning_rate": 5.119596785043274e-07, "loss": 0.0045, "step": 205880 }, { "epoch": 1.738532013257056, "grad_norm": 0.35991406440734863, "learning_rate": 5.116349137970822e-07, "loss": 0.006, "step": 205890 }, { "epoch": 1.7386164531042199, "grad_norm": 0.12557768821716309, "learning_rate": 5.113102465762055e-07, "loss": 0.0044, "step": 205900 }, { "epoch": 1.7387008929513836, "grad_norm": 0.10450237989425659, "learning_rate": 5.10985676848752e-07, "loss": 0.0075, "step": 205910 }, { "epoch": 1.7387853327985476, "grad_norm": 0.3237680196762085, "learning_rate": 5.106612046217679e-07, "loss": 0.0069, "step": 205920 }, { "epoch": 1.7388697726457116, "grad_norm": 0.395554780960083, "learning_rate": 5.103368299023048e-07, "loss": 0.0038, "step": 205930 }, { "epoch": 1.7389542124928754, "grad_norm": 0.11569875478744507, "learning_rate": 5.100125526974053e-07, "loss": 0.0055, "step": 205940 }, { "epoch": 1.7390386523400392, "grad_norm": 0.2549099624156952, "learning_rate": 5.096883730141117e-07, "loss": 0.0082, "step": 205950 }, { "epoch": 1.7391230921872032, "grad_norm": 0.2345075160264969, "learning_rate": 5.093642908594671e-07, "loss": 0.0054, "step": 205960 }, { "epoch": 1.7392075320343672, "grad_norm": 0.40800341963768005, "learning_rate": 5.090403062405092e-07, "loss": 0.0063, "step": 205970 }, { "epoch": 1.739291971881531, "grad_norm": 0.0580623485147953, "learning_rate": 5.087164191642768e-07, "loss": 0.0057, "step": 205980 }, { "epoch": 1.7393764117286947, "grad_norm": 0.22455070912837982, "learning_rate": 5.083926296378023e-07, "loss": 0.0076, "step": 205990 }, { "epoch": 1.7394608515758585, "grad_norm": 0.3742291033267975, "learning_rate": 5.080689376681202e-07, "loss": 0.007, "step": 206000 }, { "epoch": 1.7395452914230225, "grad_norm": 0.3733861446380615, "learning_rate": 5.077453432622603e-07, "loss": 0.007, "step": 206010 }, { "epoch": 1.7396297312701865, "grad_norm": 0.1978204846382141, "learning_rate": 5.074218464272512e-07, "loss": 0.0045, "step": 206020 }, { "epoch": 1.7397141711173503, "grad_norm": 0.26535412669181824, "learning_rate": 5.07098447170118e-07, "loss": 0.0062, "step": 206030 }, { "epoch": 1.739798610964514, "grad_norm": 0.9711024165153503, "learning_rate": 5.067751454978864e-07, "loss": 0.0119, "step": 206040 }, { "epoch": 1.739883050811678, "grad_norm": 0.4490130841732025, "learning_rate": 5.064519414175783e-07, "loss": 0.0078, "step": 206050 }, { "epoch": 1.739967490658842, "grad_norm": 0.03760529309511185, "learning_rate": 5.061288349362126e-07, "loss": 0.0088, "step": 206060 }, { "epoch": 1.7400519305060058, "grad_norm": 0.00363972969353199, "learning_rate": 5.058058260608078e-07, "loss": 0.0032, "step": 206070 }, { "epoch": 1.7401363703531696, "grad_norm": 0.03777451813220978, "learning_rate": 5.054829147983792e-07, "loss": 0.0057, "step": 206080 }, { "epoch": 1.7402208102003336, "grad_norm": 0.5626773238182068, "learning_rate": 5.051601011559415e-07, "loss": 0.0049, "step": 206090 }, { "epoch": 1.7403052500474974, "grad_norm": 0.06224822252988815, "learning_rate": 5.048373851405053e-07, "loss": 0.0077, "step": 206100 }, { "epoch": 1.7403896898946614, "grad_norm": 0.3795340955257416, "learning_rate": 5.0451476675908e-07, "loss": 0.0064, "step": 206110 }, { "epoch": 1.7404741297418251, "grad_norm": 0.2050059735774994, "learning_rate": 5.041922460186716e-07, "loss": 0.007, "step": 206120 }, { "epoch": 1.740558569588989, "grad_norm": 0.35306429862976074, "learning_rate": 5.038698229262872e-07, "loss": 0.0037, "step": 206130 }, { "epoch": 1.740643009436153, "grad_norm": 0.5294801592826843, "learning_rate": 5.035474974889281e-07, "loss": 0.0094, "step": 206140 }, { "epoch": 1.740727449283317, "grad_norm": 0.15257377922534943, "learning_rate": 5.03225269713597e-07, "loss": 0.0046, "step": 206150 }, { "epoch": 1.7408118891304807, "grad_norm": 0.3524959087371826, "learning_rate": 5.029031396072909e-07, "loss": 0.0059, "step": 206160 }, { "epoch": 1.7408963289776445, "grad_norm": 0.16665518283843994, "learning_rate": 5.025811071770076e-07, "loss": 0.0046, "step": 206170 }, { "epoch": 1.7409807688248085, "grad_norm": 0.45016366243362427, "learning_rate": 5.02259172429741e-07, "loss": 0.0117, "step": 206180 }, { "epoch": 1.7410652086719725, "grad_norm": 0.2765675485134125, "learning_rate": 5.019373353724838e-07, "loss": 0.0071, "step": 206190 }, { "epoch": 1.7411496485191362, "grad_norm": 0.22156475484371185, "learning_rate": 5.01615596012226e-07, "loss": 0.0036, "step": 206200 }, { "epoch": 1.7412340883663, "grad_norm": 0.24823372066020966, "learning_rate": 5.012939543559542e-07, "loss": 0.0103, "step": 206210 }, { "epoch": 1.7413185282134638, "grad_norm": 0.07085438072681427, "learning_rate": 5.009724104106573e-07, "loss": 0.0044, "step": 206220 }, { "epoch": 1.7414029680606278, "grad_norm": 0.23898260295391083, "learning_rate": 5.006509641833163e-07, "loss": 0.0066, "step": 206230 }, { "epoch": 1.7414874079077918, "grad_norm": 0.23758366703987122, "learning_rate": 5.003296156809157e-07, "loss": 0.0113, "step": 206240 }, { "epoch": 1.7415718477549555, "grad_norm": 0.10127775371074677, "learning_rate": 5.000083649104337e-07, "loss": 0.0046, "step": 206250 }, { "epoch": 1.7416562876021193, "grad_norm": 0.15875138342380524, "learning_rate": 4.996872118788477e-07, "loss": 0.0094, "step": 206260 }, { "epoch": 1.7417407274492833, "grad_norm": 0.029188893735408783, "learning_rate": 4.993661565931324e-07, "loss": 0.0028, "step": 206270 }, { "epoch": 1.7418251672964473, "grad_norm": 0.26933690905570984, "learning_rate": 4.990451990602624e-07, "loss": 0.0051, "step": 206280 }, { "epoch": 1.741909607143611, "grad_norm": 0.06908519566059113, "learning_rate": 4.987243392872087e-07, "loss": 0.0068, "step": 206290 }, { "epoch": 1.7419940469907749, "grad_norm": 0.44612371921539307, "learning_rate": 4.984035772809393e-07, "loss": 0.0077, "step": 206300 }, { "epoch": 1.7420784868379389, "grad_norm": 0.18255478143692017, "learning_rate": 4.980829130484228e-07, "loss": 0.0075, "step": 206310 }, { "epoch": 1.7421629266851029, "grad_norm": 0.3214050829410553, "learning_rate": 4.977623465966213e-07, "loss": 0.008, "step": 206320 }, { "epoch": 1.7422473665322666, "grad_norm": 0.29969656467437744, "learning_rate": 4.974418779325002e-07, "loss": 0.0032, "step": 206330 }, { "epoch": 1.7423318063794304, "grad_norm": 0.002793380059301853, "learning_rate": 4.971215070630192e-07, "loss": 0.0058, "step": 206340 }, { "epoch": 1.7424162462265942, "grad_norm": 0.21441040933132172, "learning_rate": 4.968012339951361e-07, "loss": 0.0071, "step": 206350 }, { "epoch": 1.7425006860737582, "grad_norm": 0.48293161392211914, "learning_rate": 4.964810587358065e-07, "loss": 0.0103, "step": 206360 }, { "epoch": 1.7425851259209222, "grad_norm": 0.000590445997659117, "learning_rate": 4.961609812919866e-07, "loss": 0.0062, "step": 206370 }, { "epoch": 1.742669565768086, "grad_norm": 0.17410700023174286, "learning_rate": 4.958410016706256e-07, "loss": 0.0023, "step": 206380 }, { "epoch": 1.7427540056152497, "grad_norm": 0.2875536382198334, "learning_rate": 4.955211198786763e-07, "loss": 0.0049, "step": 206390 }, { "epoch": 1.7428384454624137, "grad_norm": 0.0009756861254572868, "learning_rate": 4.952013359230846e-07, "loss": 0.0053, "step": 206400 }, { "epoch": 1.7429228853095777, "grad_norm": 0.0026094040367752314, "learning_rate": 4.948816498107978e-07, "loss": 0.004, "step": 206410 }, { "epoch": 1.7430073251567415, "grad_norm": 0.3041199743747711, "learning_rate": 4.945620615487578e-07, "loss": 0.0068, "step": 206420 }, { "epoch": 1.7430917650039053, "grad_norm": 0.13211014866828918, "learning_rate": 4.942425711439069e-07, "loss": 0.01, "step": 206430 }, { "epoch": 1.743176204851069, "grad_norm": 0.08397512137889862, "learning_rate": 4.939231786031845e-07, "loss": 0.0106, "step": 206440 }, { "epoch": 1.743260644698233, "grad_norm": 0.14515385031700134, "learning_rate": 4.936038839335255e-07, "loss": 0.0052, "step": 206450 }, { "epoch": 1.743345084545397, "grad_norm": 0.17118504643440247, "learning_rate": 4.932846871418678e-07, "loss": 0.0051, "step": 206460 }, { "epoch": 1.7434295243925608, "grad_norm": 0.23103442788124084, "learning_rate": 4.929655882351425e-07, "loss": 0.0031, "step": 206470 }, { "epoch": 1.7435139642397246, "grad_norm": 0.71855229139328, "learning_rate": 4.926465872202823e-07, "loss": 0.0038, "step": 206480 }, { "epoch": 1.7435984040868886, "grad_norm": 0.23442025482654572, "learning_rate": 4.92327684104213e-07, "loss": 0.0049, "step": 206490 }, { "epoch": 1.7436828439340526, "grad_norm": 0.822557806968689, "learning_rate": 4.920088788938643e-07, "loss": 0.0078, "step": 206500 }, { "epoch": 1.7437672837812164, "grad_norm": 0.31091731786727905, "learning_rate": 4.916901715961581e-07, "loss": 0.0062, "step": 206510 }, { "epoch": 1.7438517236283801, "grad_norm": 0.11378229409456253, "learning_rate": 4.913715622180182e-07, "loss": 0.0135, "step": 206520 }, { "epoch": 1.7439361634755441, "grad_norm": 0.15296566486358643, "learning_rate": 4.910530507663641e-07, "loss": 0.0058, "step": 206530 }, { "epoch": 1.7440206033227081, "grad_norm": 0.22672544419765472, "learning_rate": 4.907346372481126e-07, "loss": 0.0114, "step": 206540 }, { "epoch": 1.744105043169872, "grad_norm": 0.4902113080024719, "learning_rate": 4.904163216701813e-07, "loss": 0.006, "step": 206550 }, { "epoch": 1.7441894830170357, "grad_norm": 0.33545058965682983, "learning_rate": 4.900981040394831e-07, "loss": 0.0082, "step": 206560 }, { "epoch": 1.7442739228641995, "grad_norm": 0.46532028913497925, "learning_rate": 4.897799843629303e-07, "loss": 0.0059, "step": 206570 }, { "epoch": 1.7443583627113635, "grad_norm": 0.22394172847270966, "learning_rate": 4.894619626474323e-07, "loss": 0.0044, "step": 206580 }, { "epoch": 1.7444428025585275, "grad_norm": 0.20125249028205872, "learning_rate": 4.891440388998964e-07, "loss": 0.0066, "step": 206590 }, { "epoch": 1.7445272424056912, "grad_norm": 0.26620516180992126, "learning_rate": 4.888262131272265e-07, "loss": 0.0045, "step": 206600 }, { "epoch": 1.744611682252855, "grad_norm": 0.34650516510009766, "learning_rate": 4.885084853363276e-07, "loss": 0.0044, "step": 206610 }, { "epoch": 1.744696122100019, "grad_norm": 0.16011755168437958, "learning_rate": 4.881908555340992e-07, "loss": 0.0137, "step": 206620 }, { "epoch": 1.744780561947183, "grad_norm": 0.1978701949119568, "learning_rate": 4.878733237274413e-07, "loss": 0.0114, "step": 206630 }, { "epoch": 1.7448650017943468, "grad_norm": 0.14634153246879578, "learning_rate": 4.875558899232507e-07, "loss": 0.0054, "step": 206640 }, { "epoch": 1.7449494416415106, "grad_norm": 0.9228848218917847, "learning_rate": 4.872385541284202e-07, "loss": 0.0054, "step": 206650 }, { "epoch": 1.7450338814886746, "grad_norm": 0.30923908948898315, "learning_rate": 4.869213163498448e-07, "loss": 0.0034, "step": 206660 }, { "epoch": 1.7451183213358383, "grad_norm": 0.256648987531662, "learning_rate": 4.866041765944135e-07, "loss": 0.0053, "step": 206670 }, { "epoch": 1.7452027611830023, "grad_norm": 0.16549113392829895, "learning_rate": 4.862871348690145e-07, "loss": 0.0064, "step": 206680 }, { "epoch": 1.745287201030166, "grad_norm": 0.48057955503463745, "learning_rate": 4.859701911805337e-07, "loss": 0.0063, "step": 206690 }, { "epoch": 1.7453716408773299, "grad_norm": 0.4275016486644745, "learning_rate": 4.856533455358553e-07, "loss": 0.0043, "step": 206700 }, { "epoch": 1.7454560807244939, "grad_norm": 0.15653522312641144, "learning_rate": 4.853365979418606e-07, "loss": 0.0038, "step": 206710 }, { "epoch": 1.7455405205716579, "grad_norm": 0.14508137106895447, "learning_rate": 4.850199484054308e-07, "loss": 0.0069, "step": 206720 }, { "epoch": 1.7456249604188216, "grad_norm": 0.08306359499692917, "learning_rate": 4.847033969334419e-07, "loss": 0.0029, "step": 206730 }, { "epoch": 1.7457094002659854, "grad_norm": 0.2468600571155548, "learning_rate": 4.843869435327703e-07, "loss": 0.008, "step": 206740 }, { "epoch": 1.7457938401131494, "grad_norm": 0.10737486183643341, "learning_rate": 4.840705882102891e-07, "loss": 0.0161, "step": 206750 }, { "epoch": 1.7458782799603134, "grad_norm": 0.014669924974441528, "learning_rate": 4.837543309728698e-07, "loss": 0.0022, "step": 206760 }, { "epoch": 1.7459627198074772, "grad_norm": 0.3549404740333557, "learning_rate": 4.834381718273806e-07, "loss": 0.0039, "step": 206770 }, { "epoch": 1.746047159654641, "grad_norm": 0.22494398057460785, "learning_rate": 4.831221107806877e-07, "loss": 0.0046, "step": 206780 }, { "epoch": 1.7461315995018047, "grad_norm": 0.4771553874015808, "learning_rate": 4.828061478396579e-07, "loss": 0.0121, "step": 206790 }, { "epoch": 1.7462160393489687, "grad_norm": 0.20529326796531677, "learning_rate": 4.824902830111522e-07, "loss": 0.0102, "step": 206800 }, { "epoch": 1.7463004791961327, "grad_norm": 0.21425452828407288, "learning_rate": 4.821745163020325e-07, "loss": 0.0088, "step": 206810 }, { "epoch": 1.7463849190432965, "grad_norm": 0.3215724229812622, "learning_rate": 4.818588477191561e-07, "loss": 0.0079, "step": 206820 }, { "epoch": 1.7464693588904603, "grad_norm": 0.07887915521860123, "learning_rate": 4.815432772693812e-07, "loss": 0.0025, "step": 206830 }, { "epoch": 1.7465537987376243, "grad_norm": 0.17219044268131256, "learning_rate": 4.812278049595586e-07, "loss": 0.0071, "step": 206840 }, { "epoch": 1.7466382385847883, "grad_norm": 0.32819032669067383, "learning_rate": 4.809124307965429e-07, "loss": 0.0053, "step": 206850 }, { "epoch": 1.746722678431952, "grad_norm": 0.2940078675746918, "learning_rate": 4.805971547871824e-07, "loss": 0.0065, "step": 206860 }, { "epoch": 1.7468071182791158, "grad_norm": 0.096591055393219, "learning_rate": 4.80281976938326e-07, "loss": 0.0068, "step": 206870 }, { "epoch": 1.7468915581262798, "grad_norm": 1.0073283910751343, "learning_rate": 4.799668972568195e-07, "loss": 0.0107, "step": 206880 }, { "epoch": 1.7469759979734438, "grad_norm": 0.11047645658254623, "learning_rate": 4.796519157495044e-07, "loss": 0.0025, "step": 206890 }, { "epoch": 1.7470604378206076, "grad_norm": 0.1621612161397934, "learning_rate": 4.793370324232243e-07, "loss": 0.0047, "step": 206900 }, { "epoch": 1.7471448776677714, "grad_norm": 0.545360803604126, "learning_rate": 4.790222472848177e-07, "loss": 0.0073, "step": 206910 }, { "epoch": 1.7472293175149352, "grad_norm": 0.2728042006492615, "learning_rate": 4.787075603411217e-07, "loss": 0.0073, "step": 206920 }, { "epoch": 1.7473137573620992, "grad_norm": 0.2216661274433136, "learning_rate": 4.783929715989699e-07, "loss": 0.0067, "step": 206930 }, { "epoch": 1.7473981972092631, "grad_norm": 0.16235093772411346, "learning_rate": 4.780784810651968e-07, "loss": 0.009, "step": 206940 }, { "epoch": 1.747482637056427, "grad_norm": 0.0881546139717102, "learning_rate": 4.777640887466318e-07, "loss": 0.0113, "step": 206950 }, { "epoch": 1.7475670769035907, "grad_norm": 0.05812663957476616, "learning_rate": 4.77449794650105e-07, "loss": 0.0049, "step": 206960 }, { "epoch": 1.7476515167507547, "grad_norm": 0.23196007311344147, "learning_rate": 4.771355987824411e-07, "loss": 0.0057, "step": 206970 }, { "epoch": 1.7477359565979187, "grad_norm": 0.3653528094291687, "learning_rate": 4.7682150115046566e-07, "loss": 0.0038, "step": 206980 }, { "epoch": 1.7478203964450825, "grad_norm": 0.1033320277929306, "learning_rate": 4.7650750176100104e-07, "loss": 0.003, "step": 206990 }, { "epoch": 1.7479048362922462, "grad_norm": 0.06773481518030167, "learning_rate": 4.7619360062086616e-07, "loss": 0.0051, "step": 207000 }, { "epoch": 1.7479892761394102, "grad_norm": 0.20868708193302155, "learning_rate": 4.7587979773687896e-07, "loss": 0.004, "step": 207010 }, { "epoch": 1.748073715986574, "grad_norm": 0.15741144120693207, "learning_rate": 4.7556609311585455e-07, "loss": 0.0093, "step": 207020 }, { "epoch": 1.748158155833738, "grad_norm": 0.01139204204082489, "learning_rate": 4.752524867646091e-07, "loss": 0.0036, "step": 207030 }, { "epoch": 1.7482425956809018, "grad_norm": 0.2943878173828125, "learning_rate": 4.749389786899505e-07, "loss": 0.0095, "step": 207040 }, { "epoch": 1.7483270355280656, "grad_norm": 0.31974589824676514, "learning_rate": 4.746255688986917e-07, "loss": 0.005, "step": 207050 }, { "epoch": 1.7484114753752296, "grad_norm": 0.23244334757328033, "learning_rate": 4.743122573976372e-07, "loss": 0.0059, "step": 207060 }, { "epoch": 1.7484959152223936, "grad_norm": 0.7404547929763794, "learning_rate": 4.739990441935938e-07, "loss": 0.0065, "step": 207070 }, { "epoch": 1.7485803550695573, "grad_norm": 0.17069774866104126, "learning_rate": 4.736859292933638e-07, "loss": 0.0032, "step": 207080 }, { "epoch": 1.748664794916721, "grad_norm": 0.1739145666360855, "learning_rate": 4.7337291270374783e-07, "loss": 0.0041, "step": 207090 }, { "epoch": 1.748749234763885, "grad_norm": 0.25635775923728943, "learning_rate": 4.730599944315434e-07, "loss": 0.0081, "step": 207100 }, { "epoch": 1.748833674611049, "grad_norm": 0.13268153369426727, "learning_rate": 4.7274717448354937e-07, "loss": 0.0068, "step": 207110 }, { "epoch": 1.7489181144582129, "grad_norm": 0.4941830337047577, "learning_rate": 4.724344528665592e-07, "loss": 0.0062, "step": 207120 }, { "epoch": 1.7490025543053767, "grad_norm": 0.34558019042015076, "learning_rate": 4.7212182958736365e-07, "loss": 0.0068, "step": 207130 }, { "epoch": 1.7490869941525404, "grad_norm": 0.2479415386915207, "learning_rate": 4.718093046527555e-07, "loss": 0.008, "step": 207140 }, { "epoch": 1.7491714339997044, "grad_norm": 0.18190504610538483, "learning_rate": 4.7149687806952114e-07, "loss": 0.0053, "step": 207150 }, { "epoch": 1.7492558738468684, "grad_norm": 0.3884749710559845, "learning_rate": 4.7118454984444616e-07, "loss": 0.0048, "step": 207160 }, { "epoch": 1.7493403136940322, "grad_norm": 0.265931099653244, "learning_rate": 4.708723199843146e-07, "loss": 0.0083, "step": 207170 }, { "epoch": 1.749424753541196, "grad_norm": 0.028505723923444748, "learning_rate": 4.7056018849590824e-07, "loss": 0.0048, "step": 207180 }, { "epoch": 1.74950919338836, "grad_norm": 0.09115459769964218, "learning_rate": 4.702481553860061e-07, "loss": 0.007, "step": 207190 }, { "epoch": 1.749593633235524, "grad_norm": 0.2805377244949341, "learning_rate": 4.6993622066138666e-07, "loss": 0.0057, "step": 207200 }, { "epoch": 1.7496780730826877, "grad_norm": 0.22434084117412567, "learning_rate": 4.6962438432882286e-07, "loss": 0.0089, "step": 207210 }, { "epoch": 1.7497625129298515, "grad_norm": 0.2199438065290451, "learning_rate": 4.6931264639509034e-07, "loss": 0.0094, "step": 207220 }, { "epoch": 1.7498469527770155, "grad_norm": 0.18756678700447083, "learning_rate": 4.690010068669587e-07, "loss": 0.0034, "step": 207230 }, { "epoch": 1.7499313926241795, "grad_norm": 0.36195695400238037, "learning_rate": 4.686894657511964e-07, "loss": 0.0063, "step": 207240 }, { "epoch": 1.7500158324713433, "grad_norm": 0.18320412933826447, "learning_rate": 4.6837802305457094e-07, "loss": 0.0037, "step": 207250 }, { "epoch": 1.750100272318507, "grad_norm": 0.25992101430892944, "learning_rate": 4.680666787838445e-07, "loss": 0.0029, "step": 207260 }, { "epoch": 1.7501847121656708, "grad_norm": 0.3726893663406372, "learning_rate": 4.6775543294578285e-07, "loss": 0.0091, "step": 207270 }, { "epoch": 1.7502691520128348, "grad_norm": 0.005674545653164387, "learning_rate": 4.674442855471428e-07, "loss": 0.0108, "step": 207280 }, { "epoch": 1.7503535918599988, "grad_norm": 0.29525384306907654, "learning_rate": 4.671332365946857e-07, "loss": 0.0041, "step": 207290 }, { "epoch": 1.7504380317071626, "grad_norm": 0.13917726278305054, "learning_rate": 4.6682228609516434e-07, "loss": 0.0093, "step": 207300 }, { "epoch": 1.7505224715543264, "grad_norm": 0.28790798783302307, "learning_rate": 4.6651143405533505e-07, "loss": 0.0106, "step": 207310 }, { "epoch": 1.7506069114014904, "grad_norm": 0.11928297579288483, "learning_rate": 4.662006804819486e-07, "loss": 0.0051, "step": 207320 }, { "epoch": 1.7506913512486544, "grad_norm": 0.19471196830272675, "learning_rate": 4.6589002538175387e-07, "loss": 0.0041, "step": 207330 }, { "epoch": 1.7507757910958182, "grad_norm": 0.2958598732948303, "learning_rate": 4.655794687614984e-07, "loss": 0.007, "step": 207340 }, { "epoch": 1.750860230942982, "grad_norm": 0.29135122895240784, "learning_rate": 4.6526901062792786e-07, "loss": 0.006, "step": 207350 }, { "epoch": 1.7509446707901457, "grad_norm": 0.02155878208577633, "learning_rate": 4.649586509877857e-07, "loss": 0.006, "step": 207360 }, { "epoch": 1.7510291106373097, "grad_norm": 0.11307933181524277, "learning_rate": 4.64648389847811e-07, "loss": 0.0065, "step": 207370 }, { "epoch": 1.7511135504844737, "grad_norm": 0.33615636825561523, "learning_rate": 4.643382272147451e-07, "loss": 0.0099, "step": 207380 }, { "epoch": 1.7511979903316375, "grad_norm": 0.2139975130558014, "learning_rate": 4.640281630953225e-07, "loss": 0.0085, "step": 207390 }, { "epoch": 1.7512824301788013, "grad_norm": 0.2048797756433487, "learning_rate": 4.6371819749627957e-07, "loss": 0.0039, "step": 207400 }, { "epoch": 1.7513668700259652, "grad_norm": 0.16048802435398102, "learning_rate": 4.6340833042434807e-07, "loss": 0.0043, "step": 207410 }, { "epoch": 1.7514513098731292, "grad_norm": 0.26169365644454956, "learning_rate": 4.630985618862577e-07, "loss": 0.0079, "step": 207420 }, { "epoch": 1.751535749720293, "grad_norm": 0.15434956550598145, "learning_rate": 4.6278889188873633e-07, "loss": 0.0072, "step": 207430 }, { "epoch": 1.7516201895674568, "grad_norm": 0.2599782645702362, "learning_rate": 4.6247932043851083e-07, "loss": 0.0043, "step": 207440 }, { "epoch": 1.7517046294146208, "grad_norm": 0.20627976953983307, "learning_rate": 4.621698475423042e-07, "loss": 0.0051, "step": 207450 }, { "epoch": 1.7517890692617848, "grad_norm": 0.09823766350746155, "learning_rate": 4.618604732068388e-07, "loss": 0.0046, "step": 207460 }, { "epoch": 1.7518735091089486, "grad_norm": 0.3277471959590912, "learning_rate": 4.615511974388348e-07, "loss": 0.0112, "step": 207470 }, { "epoch": 1.7519579489561123, "grad_norm": 0.3077928125858307, "learning_rate": 4.612420202450085e-07, "loss": 0.005, "step": 207480 }, { "epoch": 1.7520423888032761, "grad_norm": 0.5953047871589661, "learning_rate": 4.6093294163207516e-07, "loss": 0.0085, "step": 207490 }, { "epoch": 1.7521268286504401, "grad_norm": 0.3766317367553711, "learning_rate": 4.606239616067476e-07, "loss": 0.0068, "step": 207500 }, { "epoch": 1.7522112684976041, "grad_norm": 0.12951886653900146, "learning_rate": 4.6031508017573835e-07, "loss": 0.0038, "step": 207510 }, { "epoch": 1.7522957083447679, "grad_norm": 0.16574467718601227, "learning_rate": 4.600062973457542e-07, "loss": 0.0023, "step": 207520 }, { "epoch": 1.7523801481919317, "grad_norm": 0.38396090269088745, "learning_rate": 4.596976131235037e-07, "loss": 0.0054, "step": 207530 }, { "epoch": 1.7524645880390957, "grad_norm": 0.35269102454185486, "learning_rate": 4.593890275156898e-07, "loss": 0.0042, "step": 207540 }, { "epoch": 1.7525490278862597, "grad_norm": 0.21134313941001892, "learning_rate": 4.590805405290166e-07, "loss": 0.0068, "step": 207550 }, { "epoch": 1.7526334677334234, "grad_norm": 0.43448373675346375, "learning_rate": 4.587721521701838e-07, "loss": 0.0043, "step": 207560 }, { "epoch": 1.7527179075805872, "grad_norm": 0.14936955273151398, "learning_rate": 4.5846386244588923e-07, "loss": 0.0045, "step": 207570 }, { "epoch": 1.7528023474277512, "grad_norm": 0.03760906681418419, "learning_rate": 4.5815567136282824e-07, "loss": 0.0094, "step": 207580 }, { "epoch": 1.752886787274915, "grad_norm": 0.18573080003261566, "learning_rate": 4.5784757892769594e-07, "loss": 0.0076, "step": 207590 }, { "epoch": 1.752971227122079, "grad_norm": 0.8792084455490112, "learning_rate": 4.5753958514718366e-07, "loss": 0.0059, "step": 207600 }, { "epoch": 1.7530556669692428, "grad_norm": 0.1686713844537735, "learning_rate": 4.572316900279794e-07, "loss": 0.0078, "step": 207610 }, { "epoch": 1.7531401068164065, "grad_norm": 0.3499813973903656, "learning_rate": 4.5692389357677335e-07, "loss": 0.0051, "step": 207620 }, { "epoch": 1.7532245466635705, "grad_norm": 0.3116138279438019, "learning_rate": 4.5661619580024797e-07, "loss": 0.0046, "step": 207630 }, { "epoch": 1.7533089865107345, "grad_norm": 0.0208426546305418, "learning_rate": 4.56308596705089e-07, "loss": 0.0068, "step": 207640 }, { "epoch": 1.7533934263578983, "grad_norm": 0.34945616126060486, "learning_rate": 4.5600109629797604e-07, "loss": 0.0036, "step": 207650 }, { "epoch": 1.753477866205062, "grad_norm": 0.06841325014829636, "learning_rate": 4.556936945855883e-07, "loss": 0.006, "step": 207660 }, { "epoch": 1.753562306052226, "grad_norm": 0.01946825534105301, "learning_rate": 4.553863915746015e-07, "loss": 0.0049, "step": 207670 }, { "epoch": 1.75364674589939, "grad_norm": 0.3994016647338867, "learning_rate": 4.550791872716914e-07, "loss": 0.0063, "step": 207680 }, { "epoch": 1.7537311857465538, "grad_norm": 0.3208036720752716, "learning_rate": 4.5477208168352993e-07, "loss": 0.0033, "step": 207690 }, { "epoch": 1.7538156255937176, "grad_norm": 0.10188563913106918, "learning_rate": 4.5446507481678783e-07, "loss": 0.0061, "step": 207700 }, { "epoch": 1.7539000654408814, "grad_norm": 0.07927217334508896, "learning_rate": 4.541581666781325e-07, "loss": 0.0065, "step": 207710 }, { "epoch": 1.7539845052880454, "grad_norm": 0.7638803124427795, "learning_rate": 4.538513572742298e-07, "loss": 0.0074, "step": 207720 }, { "epoch": 1.7540689451352094, "grad_norm": 0.2006426900625229, "learning_rate": 4.5354464661174603e-07, "loss": 0.0044, "step": 207730 }, { "epoch": 1.7541533849823732, "grad_norm": 0.4318699240684509, "learning_rate": 4.5323803469733863e-07, "loss": 0.0084, "step": 207740 }, { "epoch": 1.754237824829537, "grad_norm": 0.16044512391090393, "learning_rate": 4.529315215376706e-07, "loss": 0.0044, "step": 207750 }, { "epoch": 1.754322264676701, "grad_norm": 0.031493350863456726, "learning_rate": 4.5262510713939667e-07, "loss": 0.0053, "step": 207760 }, { "epoch": 1.754406704523865, "grad_norm": 0.2932266592979431, "learning_rate": 4.5231879150917535e-07, "loss": 0.0048, "step": 207770 }, { "epoch": 1.7544911443710287, "grad_norm": 0.37680235505104065, "learning_rate": 4.5201257465365633e-07, "loss": 0.006, "step": 207780 }, { "epoch": 1.7545755842181925, "grad_norm": 0.3597191274166107, "learning_rate": 4.5170645657949317e-07, "loss": 0.0074, "step": 207790 }, { "epoch": 1.7546600240653565, "grad_norm": 0.6293911933898926, "learning_rate": 4.514004372933345e-07, "loss": 0.01, "step": 207800 }, { "epoch": 1.7547444639125205, "grad_norm": 0.08973763883113861, "learning_rate": 4.510945168018255e-07, "loss": 0.0044, "step": 207810 }, { "epoch": 1.7548289037596843, "grad_norm": 0.3382999300956726, "learning_rate": 4.5078869511161084e-07, "loss": 0.0069, "step": 207820 }, { "epoch": 1.754913343606848, "grad_norm": 0.4269005358219147, "learning_rate": 4.5048297222933415e-07, "loss": 0.0074, "step": 207830 }, { "epoch": 1.7549977834540118, "grad_norm": 0.13827894628047943, "learning_rate": 4.5017734816163505e-07, "loss": 0.0064, "step": 207840 }, { "epoch": 1.7550822233011758, "grad_norm": 0.061399735510349274, "learning_rate": 4.4987182291515106e-07, "loss": 0.0078, "step": 207850 }, { "epoch": 1.7551666631483398, "grad_norm": 0.15199191868305206, "learning_rate": 4.4956639649651914e-07, "loss": 0.0067, "step": 207860 }, { "epoch": 1.7552511029955036, "grad_norm": 0.008397286757826805, "learning_rate": 4.492610689123722e-07, "loss": 0.0065, "step": 207870 }, { "epoch": 1.7553355428426674, "grad_norm": 0.4038925766944885, "learning_rate": 4.489558401693428e-07, "loss": 0.0075, "step": 207880 }, { "epoch": 1.7554199826898313, "grad_norm": 0.2875782549381256, "learning_rate": 4.4865071027406e-07, "loss": 0.0048, "step": 207890 }, { "epoch": 1.7555044225369953, "grad_norm": 0.000245025526965037, "learning_rate": 4.4834567923315144e-07, "loss": 0.0057, "step": 207900 }, { "epoch": 1.7555888623841591, "grad_norm": 0.01686745509505272, "learning_rate": 4.480407470532405e-07, "loss": 0.0102, "step": 207910 }, { "epoch": 1.755673302231323, "grad_norm": 0.6150012016296387, "learning_rate": 4.4773591374095314e-07, "loss": 0.0119, "step": 207920 }, { "epoch": 1.7557577420784867, "grad_norm": 0.21834342181682587, "learning_rate": 4.474311793029079e-07, "loss": 0.006, "step": 207930 }, { "epoch": 1.7558421819256507, "grad_norm": 0.27188339829444885, "learning_rate": 4.4712654374572507e-07, "loss": 0.0097, "step": 207940 }, { "epoch": 1.7559266217728147, "grad_norm": 0.20751114189624786, "learning_rate": 4.4682200707602094e-07, "loss": 0.0046, "step": 207950 }, { "epoch": 1.7560110616199784, "grad_norm": 0.2578751742839813, "learning_rate": 4.4651756930040926e-07, "loss": 0.0041, "step": 207960 }, { "epoch": 1.7560955014671422, "grad_norm": 0.26133599877357483, "learning_rate": 4.4621323042550344e-07, "loss": 0.0069, "step": 207970 }, { "epoch": 1.7561799413143062, "grad_norm": 0.25952547788619995, "learning_rate": 4.4590899045791336e-07, "loss": 0.0066, "step": 207980 }, { "epoch": 1.7562643811614702, "grad_norm": 0.27635079622268677, "learning_rate": 4.456048494042464e-07, "loss": 0.0068, "step": 207990 }, { "epoch": 1.756348821008634, "grad_norm": 0.30898934602737427, "learning_rate": 4.453008072711079e-07, "loss": 0.0065, "step": 208000 }, { "epoch": 1.7564332608557978, "grad_norm": 0.08770794421434402, "learning_rate": 4.449968640651037e-07, "loss": 0.0045, "step": 208010 }, { "epoch": 1.7565177007029618, "grad_norm": 0.14779025316238403, "learning_rate": 4.446930197928334e-07, "loss": 0.0027, "step": 208020 }, { "epoch": 1.7566021405501258, "grad_norm": 0.1462589055299759, "learning_rate": 4.44389274460898e-07, "loss": 0.0054, "step": 208030 }, { "epoch": 1.7566865803972895, "grad_norm": 0.19369886815547943, "learning_rate": 4.4408562807589317e-07, "loss": 0.0049, "step": 208040 }, { "epoch": 1.7567710202444533, "grad_norm": 0.1446387767791748, "learning_rate": 4.437820806444165e-07, "loss": 0.0077, "step": 208050 }, { "epoch": 1.756855460091617, "grad_norm": 0.2917606830596924, "learning_rate": 4.4347863217305885e-07, "loss": 0.0056, "step": 208060 }, { "epoch": 1.756939899938781, "grad_norm": 0.180471733212471, "learning_rate": 4.431752826684105e-07, "loss": 0.0033, "step": 208070 }, { "epoch": 1.757024339785945, "grad_norm": 0.3133702278137207, "learning_rate": 4.4287203213706164e-07, "loss": 0.0041, "step": 208080 }, { "epoch": 1.7571087796331089, "grad_norm": 0.038064632564783096, "learning_rate": 4.425688805855982e-07, "loss": 0.0035, "step": 208090 }, { "epoch": 1.7571932194802726, "grad_norm": 0.349180668592453, "learning_rate": 4.4226582802060546e-07, "loss": 0.0081, "step": 208100 }, { "epoch": 1.7572776593274366, "grad_norm": 0.1303754299879074, "learning_rate": 4.419628744486637e-07, "loss": 0.0083, "step": 208110 }, { "epoch": 1.7573620991746006, "grad_norm": 0.4607946276664734, "learning_rate": 4.4166001987635544e-07, "loss": 0.005, "step": 208120 }, { "epoch": 1.7574465390217644, "grad_norm": 0.3034580647945404, "learning_rate": 4.413572643102576e-07, "loss": 0.0095, "step": 208130 }, { "epoch": 1.7575309788689282, "grad_norm": 0.06627050787210464, "learning_rate": 4.410546077569461e-07, "loss": 0.0066, "step": 208140 }, { "epoch": 1.7576154187160922, "grad_norm": 0.05412248149514198, "learning_rate": 4.4075205022299286e-07, "loss": 0.0066, "step": 208150 }, { "epoch": 1.757699858563256, "grad_norm": 0.15770074725151062, "learning_rate": 4.4044959171497204e-07, "loss": 0.0057, "step": 208160 }, { "epoch": 1.75778429841042, "grad_norm": 0.16196271777153015, "learning_rate": 4.4014723223945065e-07, "loss": 0.0073, "step": 208170 }, { "epoch": 1.7578687382575837, "grad_norm": 0.2507202625274658, "learning_rate": 4.398449718029979e-07, "loss": 0.0067, "step": 208180 }, { "epoch": 1.7579531781047475, "grad_norm": 0.20694562792778015, "learning_rate": 4.3954281041217794e-07, "loss": 0.0048, "step": 208190 }, { "epoch": 1.7580376179519115, "grad_norm": 0.20362736284732819, "learning_rate": 4.3924074807355276e-07, "loss": 0.0066, "step": 208200 }, { "epoch": 1.7581220577990755, "grad_norm": 0.3489336669445038, "learning_rate": 4.3893878479368435e-07, "loss": 0.0104, "step": 208210 }, { "epoch": 1.7582064976462393, "grad_norm": 0.11252975463867188, "learning_rate": 4.386369205791313e-07, "loss": 0.0062, "step": 208220 }, { "epoch": 1.758290937493403, "grad_norm": 0.6519230008125305, "learning_rate": 4.3833515543644955e-07, "loss": 0.01, "step": 208230 }, { "epoch": 1.758375377340567, "grad_norm": 0.11721077561378479, "learning_rate": 4.380334893721927e-07, "loss": 0.0053, "step": 208240 }, { "epoch": 1.758459817187731, "grad_norm": 0.23754973709583282, "learning_rate": 4.377319223929144e-07, "loss": 0.0063, "step": 208250 }, { "epoch": 1.7585442570348948, "grad_norm": 0.3329567611217499, "learning_rate": 4.3743045450516333e-07, "loss": 0.0094, "step": 208260 }, { "epoch": 1.7586286968820586, "grad_norm": 0.3665548264980316, "learning_rate": 4.3712908571548806e-07, "loss": 0.0083, "step": 208270 }, { "epoch": 1.7587131367292224, "grad_norm": 0.060195788741111755, "learning_rate": 4.3682781603043346e-07, "loss": 0.0066, "step": 208280 }, { "epoch": 1.7587975765763864, "grad_norm": 0.26524046063423157, "learning_rate": 4.365266454565448e-07, "loss": 0.0034, "step": 208290 }, { "epoch": 1.7588820164235504, "grad_norm": 0.38451477885246277, "learning_rate": 4.362255740003618e-07, "loss": 0.0038, "step": 208300 }, { "epoch": 1.7589664562707141, "grad_norm": 0.2202395498752594, "learning_rate": 4.359246016684243e-07, "loss": 0.0111, "step": 208310 }, { "epoch": 1.759050896117878, "grad_norm": 0.1939460188150406, "learning_rate": 4.3562372846726933e-07, "loss": 0.0062, "step": 208320 }, { "epoch": 1.759135335965042, "grad_norm": 0.08808081597089767, "learning_rate": 4.3532295440343044e-07, "loss": 0.0044, "step": 208330 }, { "epoch": 1.759219775812206, "grad_norm": 0.05975731462240219, "learning_rate": 4.35022279483443e-07, "loss": 0.0054, "step": 208340 }, { "epoch": 1.7593042156593697, "grad_norm": 0.7199229598045349, "learning_rate": 4.347217037138346e-07, "loss": 0.01, "step": 208350 }, { "epoch": 1.7593886555065334, "grad_norm": 0.1722405105829239, "learning_rate": 4.3442122710113663e-07, "loss": 0.0055, "step": 208360 }, { "epoch": 1.7594730953536974, "grad_norm": 0.4413309395313263, "learning_rate": 4.3412084965187394e-07, "loss": 0.0063, "step": 208370 }, { "epoch": 1.7595575352008614, "grad_norm": 0.17414845526218414, "learning_rate": 4.3382057137257073e-07, "loss": 0.0026, "step": 208380 }, { "epoch": 1.7596419750480252, "grad_norm": 0.17786577343940735, "learning_rate": 4.3352039226974783e-07, "loss": 0.0056, "step": 208390 }, { "epoch": 1.759726414895189, "grad_norm": 0.46485573053359985, "learning_rate": 4.3322031234992787e-07, "loss": 0.009, "step": 208400 }, { "epoch": 1.7598108547423528, "grad_norm": 0.040716029703617096, "learning_rate": 4.3292033161962564e-07, "loss": 0.0033, "step": 208410 }, { "epoch": 1.7598952945895168, "grad_norm": 0.07192449271678925, "learning_rate": 4.326204500853587e-07, "loss": 0.0058, "step": 208420 }, { "epoch": 1.7599797344366808, "grad_norm": 0.4043397605419159, "learning_rate": 4.3232066775363955e-07, "loss": 0.0074, "step": 208430 }, { "epoch": 1.7600641742838445, "grad_norm": 0.1219029501080513, "learning_rate": 4.3202098463097866e-07, "loss": 0.005, "step": 208440 }, { "epoch": 1.7601486141310083, "grad_norm": 0.44636446237564087, "learning_rate": 4.317214007238868e-07, "loss": 0.006, "step": 208450 }, { "epoch": 1.7602330539781723, "grad_norm": 0.24925042688846588, "learning_rate": 4.314219160388705e-07, "loss": 0.0083, "step": 208460 }, { "epoch": 1.7603174938253363, "grad_norm": 0.7801501154899597, "learning_rate": 4.311225305824335e-07, "loss": 0.0073, "step": 208470 }, { "epoch": 1.7604019336725, "grad_norm": 0.3202361762523651, "learning_rate": 4.308232443610777e-07, "loss": 0.0032, "step": 208480 }, { "epoch": 1.7604863735196639, "grad_norm": 0.15892401337623596, "learning_rate": 4.305240573813063e-07, "loss": 0.0055, "step": 208490 }, { "epoch": 1.7605708133668279, "grad_norm": 0.5462918281555176, "learning_rate": 4.302249696496147e-07, "loss": 0.0035, "step": 208500 }, { "epoch": 1.7606552532139916, "grad_norm": 0.22923888266086578, "learning_rate": 4.299259811725015e-07, "loss": 0.0048, "step": 208510 }, { "epoch": 1.7607396930611556, "grad_norm": 0.12811782956123352, "learning_rate": 4.296270919564582e-07, "loss": 0.0106, "step": 208520 }, { "epoch": 1.7608241329083194, "grad_norm": 0.2263890504837036, "learning_rate": 4.2932830200797913e-07, "loss": 0.0039, "step": 208530 }, { "epoch": 1.7609085727554832, "grad_norm": 0.34987711906433105, "learning_rate": 4.2902961133355235e-07, "loss": 0.0071, "step": 208540 }, { "epoch": 1.7609930126026472, "grad_norm": 0.1910666972398758, "learning_rate": 4.2873101993966605e-07, "loss": 0.0084, "step": 208550 }, { "epoch": 1.7610774524498112, "grad_norm": 0.6209776401519775, "learning_rate": 4.28432527832805e-07, "loss": 0.0042, "step": 208560 }, { "epoch": 1.761161892296975, "grad_norm": 0.1992366462945938, "learning_rate": 4.2813413501945235e-07, "loss": 0.0046, "step": 208570 }, { "epoch": 1.7612463321441387, "grad_norm": 0.15440070629119873, "learning_rate": 4.278358415060896e-07, "loss": 0.0051, "step": 208580 }, { "epoch": 1.7613307719913027, "grad_norm": 0.2715940475463867, "learning_rate": 4.2753764729919487e-07, "loss": 0.0023, "step": 208590 }, { "epoch": 1.7614152118384667, "grad_norm": 0.28375598788261414, "learning_rate": 4.272395524052464e-07, "loss": 0.0046, "step": 208600 }, { "epoch": 1.7614996516856305, "grad_norm": 0.28091686964035034, "learning_rate": 4.269415568307167e-07, "loss": 0.0054, "step": 208610 }, { "epoch": 1.7615840915327943, "grad_norm": 0.2503397762775421, "learning_rate": 4.266436605820806e-07, "loss": 0.0075, "step": 208620 }, { "epoch": 1.761668531379958, "grad_norm": 0.1416601687669754, "learning_rate": 4.263458636658063e-07, "loss": 0.0049, "step": 208630 }, { "epoch": 1.761752971227122, "grad_norm": 0.1393594890832901, "learning_rate": 4.2604816608836306e-07, "loss": 0.0066, "step": 208640 }, { "epoch": 1.761837411074286, "grad_norm": 0.4150563180446625, "learning_rate": 4.2575056785621627e-07, "loss": 0.0028, "step": 208650 }, { "epoch": 1.7619218509214498, "grad_norm": 0.40479835867881775, "learning_rate": 4.2545306897582907e-07, "loss": 0.0084, "step": 208660 }, { "epoch": 1.7620062907686136, "grad_norm": 0.28755730390548706, "learning_rate": 4.251556694536646e-07, "loss": 0.0102, "step": 208670 }, { "epoch": 1.7620907306157776, "grad_norm": 0.07964251935482025, "learning_rate": 4.248583692961805e-07, "loss": 0.005, "step": 208680 }, { "epoch": 1.7621751704629416, "grad_norm": 0.2886815071105957, "learning_rate": 4.245611685098361e-07, "loss": 0.0032, "step": 208690 }, { "epoch": 1.7622596103101054, "grad_norm": 0.38085028529167175, "learning_rate": 4.242640671010856e-07, "loss": 0.0051, "step": 208700 }, { "epoch": 1.7623440501572691, "grad_norm": 0.01471676304936409, "learning_rate": 4.239670650763816e-07, "loss": 0.0051, "step": 208710 }, { "epoch": 1.7624284900044331, "grad_norm": 0.2278003841638565, "learning_rate": 4.2367016244217464e-07, "loss": 0.0033, "step": 208720 }, { "epoch": 1.7625129298515971, "grad_norm": 0.19851282238960266, "learning_rate": 4.2337335920491497e-07, "loss": 0.0053, "step": 208730 }, { "epoch": 1.762597369698761, "grad_norm": 0.2355044186115265, "learning_rate": 4.230766553710469e-07, "loss": 0.0048, "step": 208740 }, { "epoch": 1.7626818095459247, "grad_norm": 0.8475155830383301, "learning_rate": 4.2278005094701703e-07, "loss": 0.008, "step": 208750 }, { "epoch": 1.7627662493930885, "grad_norm": 0.3091834485530853, "learning_rate": 4.224835459392657e-07, "loss": 0.0082, "step": 208760 }, { "epoch": 1.7628506892402525, "grad_norm": 0.12928086519241333, "learning_rate": 4.2218714035423437e-07, "loss": 0.0086, "step": 208770 }, { "epoch": 1.7629351290874165, "grad_norm": 0.24896690249443054, "learning_rate": 4.2189083419836017e-07, "loss": 0.0047, "step": 208780 }, { "epoch": 1.7630195689345802, "grad_norm": 0.3271437883377075, "learning_rate": 4.215946274780791e-07, "loss": 0.0059, "step": 208790 }, { "epoch": 1.763104008781744, "grad_norm": 0.01918783038854599, "learning_rate": 4.212985201998249e-07, "loss": 0.0048, "step": 208800 }, { "epoch": 1.763188448628908, "grad_norm": 0.6052986979484558, "learning_rate": 4.2100251237002733e-07, "loss": 0.0087, "step": 208810 }, { "epoch": 1.763272888476072, "grad_norm": 0.17857171595096588, "learning_rate": 4.207066039951174e-07, "loss": 0.0113, "step": 208820 }, { "epoch": 1.7633573283232358, "grad_norm": 0.3831101655960083, "learning_rate": 4.2041079508152117e-07, "loss": 0.0092, "step": 208830 }, { "epoch": 1.7634417681703995, "grad_norm": 0.08263246715068817, "learning_rate": 4.201150856356651e-07, "loss": 0.0083, "step": 208840 }, { "epoch": 1.7635262080175633, "grad_norm": 0.13859868049621582, "learning_rate": 4.198194756639695e-07, "loss": 0.0045, "step": 208850 }, { "epoch": 1.7636106478647273, "grad_norm": 0.3165084421634674, "learning_rate": 4.195239651728572e-07, "loss": 0.0134, "step": 208860 }, { "epoch": 1.7636950877118913, "grad_norm": 0.40769532322883606, "learning_rate": 4.192285541687463e-07, "loss": 0.0068, "step": 208870 }, { "epoch": 1.763779527559055, "grad_norm": 0.1244405210018158, "learning_rate": 4.1893324265805223e-07, "loss": 0.0036, "step": 208880 }, { "epoch": 1.7638639674062189, "grad_norm": 0.028517374768853188, "learning_rate": 4.1863803064718934e-07, "loss": 0.0046, "step": 208890 }, { "epoch": 1.7639484072533829, "grad_norm": 0.4173310399055481, "learning_rate": 4.1834291814256853e-07, "loss": 0.0072, "step": 208900 }, { "epoch": 1.7640328471005469, "grad_norm": 0.1763736456632614, "learning_rate": 4.1804790515060144e-07, "loss": 0.0097, "step": 208910 }, { "epoch": 1.7641172869477106, "grad_norm": 0.4016264081001282, "learning_rate": 4.177529916776946e-07, "loss": 0.0063, "step": 208920 }, { "epoch": 1.7642017267948744, "grad_norm": 0.585898220539093, "learning_rate": 4.1745817773025446e-07, "loss": 0.0054, "step": 208930 }, { "epoch": 1.7642861666420384, "grad_norm": 0.23593179881572723, "learning_rate": 4.171634633146826e-07, "loss": 0.0062, "step": 208940 }, { "epoch": 1.7643706064892024, "grad_norm": 0.2868289351463318, "learning_rate": 4.168688484373834e-07, "loss": 0.0089, "step": 208950 }, { "epoch": 1.7644550463363662, "grad_norm": 0.29660847783088684, "learning_rate": 4.1657433310475116e-07, "loss": 0.01, "step": 208960 }, { "epoch": 1.76453948618353, "grad_norm": 0.41219714283943176, "learning_rate": 4.1627991732318683e-07, "loss": 0.0097, "step": 208970 }, { "epoch": 1.7646239260306937, "grad_norm": 0.0014357181498780847, "learning_rate": 4.1598560109908203e-07, "loss": 0.0062, "step": 208980 }, { "epoch": 1.7647083658778577, "grad_norm": 0.31693536043167114, "learning_rate": 4.156913844388316e-07, "loss": 0.0073, "step": 208990 }, { "epoch": 1.7647928057250217, "grad_norm": 0.11075141280889511, "learning_rate": 4.153972673488238e-07, "loss": 0.0058, "step": 209000 }, { "epoch": 1.7648772455721855, "grad_norm": 0.40416479110717773, "learning_rate": 4.15103249835449e-07, "loss": 0.0117, "step": 209010 }, { "epoch": 1.7649616854193493, "grad_norm": 0.20200414955615997, "learning_rate": 4.148093319050922e-07, "loss": 0.0052, "step": 209020 }, { "epoch": 1.7650461252665133, "grad_norm": 0.8710151314735413, "learning_rate": 4.145155135641371e-07, "loss": 0.0068, "step": 209030 }, { "epoch": 1.7651305651136773, "grad_norm": 0.181033194065094, "learning_rate": 4.142217948189653e-07, "loss": 0.0036, "step": 209040 }, { "epoch": 1.765215004960841, "grad_norm": 0.34799492359161377, "learning_rate": 4.13928175675955e-07, "loss": 0.0046, "step": 209050 }, { "epoch": 1.7652994448080048, "grad_norm": 0.3507753312587738, "learning_rate": 4.1363465614148667e-07, "loss": 0.0059, "step": 209060 }, { "epoch": 1.7653838846551688, "grad_norm": 0.34107354283332825, "learning_rate": 4.1334123622193243e-07, "loss": 0.0096, "step": 209070 }, { "epoch": 1.7654683245023326, "grad_norm": 0.14985300600528717, "learning_rate": 4.130479159236678e-07, "loss": 0.0048, "step": 209080 }, { "epoch": 1.7655527643494966, "grad_norm": 0.5260441899299622, "learning_rate": 4.1275469525306144e-07, "loss": 0.0038, "step": 209090 }, { "epoch": 1.7656372041966604, "grad_norm": 0.04278240725398064, "learning_rate": 4.124615742164839e-07, "loss": 0.008, "step": 209100 }, { "epoch": 1.7657216440438241, "grad_norm": 0.26510751247406006, "learning_rate": 4.1216855282030065e-07, "loss": 0.0049, "step": 209110 }, { "epoch": 1.7658060838909881, "grad_norm": 0.0015313540352508426, "learning_rate": 4.118756310708766e-07, "loss": 0.005, "step": 209120 }, { "epoch": 1.7658905237381521, "grad_norm": 0.015245632268488407, "learning_rate": 4.1158280897457383e-07, "loss": 0.0077, "step": 209130 }, { "epoch": 1.765974963585316, "grad_norm": 0.1663704812526703, "learning_rate": 4.1129008653775117e-07, "loss": 0.0114, "step": 209140 }, { "epoch": 1.7660594034324797, "grad_norm": 0.007054193876683712, "learning_rate": 4.1099746376676797e-07, "loss": 0.0071, "step": 209150 }, { "epoch": 1.7661438432796437, "grad_norm": 0.8898553848266602, "learning_rate": 4.1070494066797915e-07, "loss": 0.0082, "step": 209160 }, { "epoch": 1.7662282831268077, "grad_norm": 0.12380234897136688, "learning_rate": 4.1041251724773914e-07, "loss": 0.0047, "step": 209170 }, { "epoch": 1.7663127229739715, "grad_norm": 0.3072020411491394, "learning_rate": 4.1012019351239827e-07, "loss": 0.0074, "step": 209180 }, { "epoch": 1.7663971628211352, "grad_norm": 0.06269249320030212, "learning_rate": 4.098279694683066e-07, "loss": 0.0106, "step": 209190 }, { "epoch": 1.766481602668299, "grad_norm": 0.3326943814754486, "learning_rate": 4.0953584512181124e-07, "loss": 0.009, "step": 209200 }, { "epoch": 1.766566042515463, "grad_norm": 0.18376775085926056, "learning_rate": 4.092438204792565e-07, "loss": 0.0018, "step": 209210 }, { "epoch": 1.766650482362627, "grad_norm": 0.26872679591178894, "learning_rate": 4.0895189554698456e-07, "loss": 0.0046, "step": 209220 }, { "epoch": 1.7667349222097908, "grad_norm": 1.0175701379776, "learning_rate": 4.0866007033133705e-07, "loss": 0.0054, "step": 209230 }, { "epoch": 1.7668193620569546, "grad_norm": 0.1827528476715088, "learning_rate": 4.0836834483865216e-07, "loss": 0.0044, "step": 209240 }, { "epoch": 1.7669038019041186, "grad_norm": 0.8454668521881104, "learning_rate": 4.080767190752655e-07, "loss": 0.0135, "step": 209250 }, { "epoch": 1.7669882417512826, "grad_norm": 0.26119014620780945, "learning_rate": 4.0778519304751183e-07, "loss": 0.0044, "step": 209260 }, { "epoch": 1.7670726815984463, "grad_norm": 0.4408814311027527, "learning_rate": 4.074937667617229e-07, "loss": 0.004, "step": 209270 }, { "epoch": 1.76715712144561, "grad_norm": 0.4544129967689514, "learning_rate": 4.0720244022422863e-07, "loss": 0.0054, "step": 209280 }, { "epoch": 1.767241561292774, "grad_norm": 0.12169776111841202, "learning_rate": 4.0691121344135445e-07, "loss": 0.0045, "step": 209290 }, { "epoch": 1.767326001139938, "grad_norm": 0.24471837282180786, "learning_rate": 4.066200864194292e-07, "loss": 0.0059, "step": 209300 }, { "epoch": 1.7674104409871019, "grad_norm": 0.3163957893848419, "learning_rate": 4.063290591647734e-07, "loss": 0.0063, "step": 209310 }, { "epoch": 1.7674948808342656, "grad_norm": 0.13283789157867432, "learning_rate": 4.0603813168370975e-07, "loss": 0.0034, "step": 209320 }, { "epoch": 1.7675793206814294, "grad_norm": 0.32591119408607483, "learning_rate": 4.0574730398255547e-07, "loss": 0.0086, "step": 209330 }, { "epoch": 1.7676637605285934, "grad_norm": 0.2632684111595154, "learning_rate": 4.0545657606762933e-07, "loss": 0.007, "step": 209340 }, { "epoch": 1.7677482003757574, "grad_norm": 0.4573342800140381, "learning_rate": 4.0516594794524524e-07, "loss": 0.0053, "step": 209350 }, { "epoch": 1.7678326402229212, "grad_norm": 0.49418947100639343, "learning_rate": 4.0487541962171473e-07, "loss": 0.0107, "step": 209360 }, { "epoch": 1.767917080070085, "grad_norm": 0.16607332229614258, "learning_rate": 4.045849911033484e-07, "loss": 0.0019, "step": 209370 }, { "epoch": 1.768001519917249, "grad_norm": 0.050102293491363525, "learning_rate": 4.042946623964539e-07, "loss": 0.0036, "step": 209380 }, { "epoch": 1.768085959764413, "grad_norm": 0.1800651252269745, "learning_rate": 4.0400443350733785e-07, "loss": 0.0044, "step": 209390 }, { "epoch": 1.7681703996115767, "grad_norm": 0.25873860716819763, "learning_rate": 4.037143044423036e-07, "loss": 0.0032, "step": 209400 }, { "epoch": 1.7682548394587405, "grad_norm": 0.18245753645896912, "learning_rate": 4.0342427520765335e-07, "loss": 0.0052, "step": 209410 }, { "epoch": 1.7683392793059045, "grad_norm": 0.4262019395828247, "learning_rate": 4.031343458096848e-07, "loss": 0.0087, "step": 209420 }, { "epoch": 1.7684237191530683, "grad_norm": 0.45890548825263977, "learning_rate": 4.0284451625469735e-07, "loss": 0.0097, "step": 209430 }, { "epoch": 1.7685081590002323, "grad_norm": 0.10407900810241699, "learning_rate": 4.0255478654898483e-07, "loss": 0.0047, "step": 209440 }, { "epoch": 1.768592598847396, "grad_norm": 0.09885438531637192, "learning_rate": 4.022651566988406e-07, "loss": 0.0019, "step": 209450 }, { "epoch": 1.7686770386945598, "grad_norm": 0.1057288721203804, "learning_rate": 4.0197562671055346e-07, "loss": 0.0071, "step": 209460 }, { "epoch": 1.7687614785417238, "grad_norm": 0.30910810828208923, "learning_rate": 4.0168619659041506e-07, "loss": 0.0036, "step": 209470 }, { "epoch": 1.7688459183888878, "grad_norm": 0.3516436815261841, "learning_rate": 4.0139686634470985e-07, "loss": 0.0103, "step": 209480 }, { "epoch": 1.7689303582360516, "grad_norm": 0.1894189566373825, "learning_rate": 4.0110763597972116e-07, "loss": 0.0059, "step": 209490 }, { "epoch": 1.7690147980832154, "grad_norm": 0.19209064543247223, "learning_rate": 4.0081850550173387e-07, "loss": 0.0095, "step": 209500 }, { "epoch": 1.7690992379303794, "grad_norm": 0.10066404938697815, "learning_rate": 4.0052947491702465e-07, "loss": 0.0075, "step": 209510 }, { "epoch": 1.7691836777775434, "grad_norm": 0.23970483243465424, "learning_rate": 4.002405442318741e-07, "loss": 0.0092, "step": 209520 }, { "epoch": 1.7692681176247071, "grad_norm": 0.261111319065094, "learning_rate": 3.99951713452556e-07, "loss": 0.0055, "step": 209530 }, { "epoch": 1.769352557471871, "grad_norm": 0.29306235909461975, "learning_rate": 3.9966298258534374e-07, "loss": 0.0044, "step": 209540 }, { "epoch": 1.7694369973190347, "grad_norm": 0.4952750504016876, "learning_rate": 3.993743516365084e-07, "loss": 0.0072, "step": 209550 }, { "epoch": 1.7695214371661987, "grad_norm": 0.1967124342918396, "learning_rate": 3.990858206123205e-07, "loss": 0.0117, "step": 209560 }, { "epoch": 1.7696058770133627, "grad_norm": 0.3983415961265564, "learning_rate": 3.9879738951904443e-07, "loss": 0.0059, "step": 209570 }, { "epoch": 1.7696903168605265, "grad_norm": 0.14243288338184357, "learning_rate": 3.9850905836294696e-07, "loss": 0.006, "step": 209580 }, { "epoch": 1.7697747567076902, "grad_norm": 0.2174224704504013, "learning_rate": 3.982208271502902e-07, "loss": 0.0091, "step": 209590 }, { "epoch": 1.7698591965548542, "grad_norm": 1.0332584381103516, "learning_rate": 3.979326958873336e-07, "loss": 0.0108, "step": 209600 }, { "epoch": 1.7699436364020182, "grad_norm": 0.15840046107769012, "learning_rate": 3.9764466458033614e-07, "loss": 0.0055, "step": 209610 }, { "epoch": 1.770028076249182, "grad_norm": 0.8525984883308411, "learning_rate": 3.973567332355527e-07, "loss": 0.005, "step": 209620 }, { "epoch": 1.7701125160963458, "grad_norm": 0.24378903210163116, "learning_rate": 3.970689018592383e-07, "loss": 0.0036, "step": 209630 }, { "epoch": 1.7701969559435098, "grad_norm": 0.1367567628622055, "learning_rate": 3.9678117045764353e-07, "loss": 0.0042, "step": 209640 }, { "epoch": 1.7702813957906738, "grad_norm": 0.004947171546518803, "learning_rate": 3.9649353903701947e-07, "loss": 0.0038, "step": 209650 }, { "epoch": 1.7703658356378376, "grad_norm": 0.11485731601715088, "learning_rate": 3.962060076036112e-07, "loss": 0.0074, "step": 209660 }, { "epoch": 1.7704502754850013, "grad_norm": 0.1561988890171051, "learning_rate": 3.9591857616366634e-07, "loss": 0.0033, "step": 209670 }, { "epoch": 1.770534715332165, "grad_norm": 0.1220395490527153, "learning_rate": 3.956312447234262e-07, "loss": 0.0053, "step": 209680 }, { "epoch": 1.770619155179329, "grad_norm": 0.22494006156921387, "learning_rate": 3.953440132891317e-07, "loss": 0.0051, "step": 209690 }, { "epoch": 1.770703595026493, "grad_norm": 0.22204141318798065, "learning_rate": 3.950568818670214e-07, "loss": 0.0092, "step": 209700 }, { "epoch": 1.7707880348736569, "grad_norm": 0.3412647545337677, "learning_rate": 3.947698504633324e-07, "loss": 0.0029, "step": 209710 }, { "epoch": 1.7708724747208207, "grad_norm": 0.3694533705711365, "learning_rate": 3.944829190842986e-07, "loss": 0.0081, "step": 209720 }, { "epoch": 1.7709569145679847, "grad_norm": 0.0005685074720531702, "learning_rate": 3.9419608773615116e-07, "loss": 0.0083, "step": 209730 }, { "epoch": 1.7710413544151486, "grad_norm": 0.41289517283439636, "learning_rate": 3.939093564251217e-07, "loss": 0.0051, "step": 209740 }, { "epoch": 1.7711257942623124, "grad_norm": 0.19924679398536682, "learning_rate": 3.936227251574365e-07, "loss": 0.0047, "step": 209750 }, { "epoch": 1.7712102341094762, "grad_norm": 0.4128609299659729, "learning_rate": 3.933361939393221e-07, "loss": 0.0053, "step": 209760 }, { "epoch": 1.77129467395664, "grad_norm": 0.1703406125307083, "learning_rate": 3.9304976277700193e-07, "loss": 0.0038, "step": 209770 }, { "epoch": 1.771379113803804, "grad_norm": 0.16955207288265228, "learning_rate": 3.9276343167669704e-07, "loss": 0.0063, "step": 209780 }, { "epoch": 1.771463553650968, "grad_norm": 0.24466575682163239, "learning_rate": 3.9247720064462536e-07, "loss": 0.0085, "step": 209790 }, { "epoch": 1.7715479934981317, "grad_norm": 0.1343984305858612, "learning_rate": 3.921910696870057e-07, "loss": 0.0092, "step": 209800 }, { "epoch": 1.7716324333452955, "grad_norm": 0.25734564661979675, "learning_rate": 3.919050388100504e-07, "loss": 0.0054, "step": 209810 }, { "epoch": 1.7717168731924595, "grad_norm": 0.22566211223602295, "learning_rate": 3.9161910801997436e-07, "loss": 0.0031, "step": 209820 }, { "epoch": 1.7718013130396235, "grad_norm": 0.19684423506259918, "learning_rate": 3.913332773229872e-07, "loss": 0.0058, "step": 209830 }, { "epoch": 1.7718857528867873, "grad_norm": 0.27758514881134033, "learning_rate": 3.910475467252961e-07, "loss": 0.0076, "step": 209840 }, { "epoch": 1.771970192733951, "grad_norm": 0.22375664114952087, "learning_rate": 3.9076191623310945e-07, "loss": 0.0038, "step": 209850 }, { "epoch": 1.772054632581115, "grad_norm": 0.10131357610225677, "learning_rate": 3.904763858526278e-07, "loss": 0.0028, "step": 209860 }, { "epoch": 1.772139072428279, "grad_norm": 0.6977795362472534, "learning_rate": 3.9019095559005517e-07, "loss": 0.0037, "step": 209870 }, { "epoch": 1.7722235122754428, "grad_norm": 0.29630669951438904, "learning_rate": 3.8990562545158925e-07, "loss": 0.0079, "step": 209880 }, { "epoch": 1.7723079521226066, "grad_norm": 0.3029787540435791, "learning_rate": 3.8962039544342966e-07, "loss": 0.0057, "step": 209890 }, { "epoch": 1.7723923919697704, "grad_norm": 0.22302132844924927, "learning_rate": 3.8933526557176915e-07, "loss": 0.0036, "step": 209900 }, { "epoch": 1.7724768318169344, "grad_norm": 0.6213091611862183, "learning_rate": 3.890502358428033e-07, "loss": 0.0116, "step": 209910 }, { "epoch": 1.7725612716640984, "grad_norm": 0.5639835596084595, "learning_rate": 3.887653062627211e-07, "loss": 0.0054, "step": 209920 }, { "epoch": 1.7726457115112622, "grad_norm": 0.049733296036720276, "learning_rate": 3.884804768377115e-07, "loss": 0.0034, "step": 209930 }, { "epoch": 1.772730151358426, "grad_norm": 0.06241221725940704, "learning_rate": 3.8819574757396003e-07, "loss": 0.0028, "step": 209940 }, { "epoch": 1.77281459120559, "grad_norm": 0.3138463795185089, "learning_rate": 3.879111184776524e-07, "loss": 0.0059, "step": 209950 }, { "epoch": 1.772899031052754, "grad_norm": 0.0015401585260406137, "learning_rate": 3.876265895549708e-07, "loss": 0.0082, "step": 209960 }, { "epoch": 1.7729834708999177, "grad_norm": 0.03121633268892765, "learning_rate": 3.8734216081209366e-07, "loss": 0.0071, "step": 209970 }, { "epoch": 1.7730679107470815, "grad_norm": 0.17000658810138702, "learning_rate": 3.870578322551999e-07, "loss": 0.0063, "step": 209980 }, { "epoch": 1.7731523505942455, "grad_norm": 0.4967520833015442, "learning_rate": 3.8677360389046404e-07, "loss": 0.0056, "step": 209990 }, { "epoch": 1.7732367904414092, "grad_norm": 0.17693132162094116, "learning_rate": 3.8648947572406114e-07, "loss": 0.0048, "step": 210000 }, { "epoch": 1.7733212302885732, "grad_norm": 0.06353170424699783, "learning_rate": 3.8620544776216127e-07, "loss": 0.0029, "step": 210010 }, { "epoch": 1.773405670135737, "grad_norm": 0.9657537937164307, "learning_rate": 3.85921520010934e-07, "loss": 0.0031, "step": 210020 }, { "epoch": 1.7734901099829008, "grad_norm": 0.2719888985157013, "learning_rate": 3.856376924765448e-07, "loss": 0.0032, "step": 210030 }, { "epoch": 1.7735745498300648, "grad_norm": 0.005152872297912836, "learning_rate": 3.8535396516516e-07, "loss": 0.0066, "step": 210040 }, { "epoch": 1.7736589896772288, "grad_norm": 0.23360997438430786, "learning_rate": 3.850703380829407e-07, "loss": 0.0084, "step": 210050 }, { "epoch": 1.7737434295243926, "grad_norm": 0.09852214902639389, "learning_rate": 3.847868112360492e-07, "loss": 0.0053, "step": 210060 }, { "epoch": 1.7738278693715563, "grad_norm": 0.21569395065307617, "learning_rate": 3.8450338463064173e-07, "loss": 0.0038, "step": 210070 }, { "epoch": 1.7739123092187203, "grad_norm": 0.41144710779190063, "learning_rate": 3.8422005827287445e-07, "loss": 0.0064, "step": 210080 }, { "epoch": 1.7739967490658843, "grad_norm": 0.17012754082679749, "learning_rate": 3.839368321689024e-07, "loss": 0.0057, "step": 210090 }, { "epoch": 1.7740811889130481, "grad_norm": 0.502294659614563, "learning_rate": 3.8365370632487633e-07, "loss": 0.0069, "step": 210100 }, { "epoch": 1.7741656287602119, "grad_norm": 0.44919466972351074, "learning_rate": 3.8337068074694616e-07, "loss": 0.0042, "step": 210110 }, { "epoch": 1.7742500686073757, "grad_norm": 0.4541577398777008, "learning_rate": 3.8308775544125766e-07, "loss": 0.0065, "step": 210120 }, { "epoch": 1.7743345084545397, "grad_norm": 0.7443846464157104, "learning_rate": 3.8280493041395803e-07, "loss": 0.0074, "step": 210130 }, { "epoch": 1.7744189483017037, "grad_norm": 0.09537415206432343, "learning_rate": 3.82522205671188e-07, "loss": 0.0049, "step": 210140 }, { "epoch": 1.7745033881488674, "grad_norm": 1.2053073644638062, "learning_rate": 3.8223958121909045e-07, "loss": 0.0088, "step": 210150 }, { "epoch": 1.7745878279960312, "grad_norm": 0.32364583015441895, "learning_rate": 3.8195705706380205e-07, "loss": 0.0054, "step": 210160 }, { "epoch": 1.7746722678431952, "grad_norm": 0.05119835212826729, "learning_rate": 3.816746332114618e-07, "loss": 0.0025, "step": 210170 }, { "epoch": 1.7747567076903592, "grad_norm": 0.11993139237165451, "learning_rate": 3.813923096682004e-07, "loss": 0.006, "step": 210180 }, { "epoch": 1.774841147537523, "grad_norm": 0.5316826105117798, "learning_rate": 3.811100864401529e-07, "loss": 0.0054, "step": 210190 }, { "epoch": 1.7749255873846868, "grad_norm": 0.0519566647708416, "learning_rate": 3.808279635334472e-07, "loss": 0.0048, "step": 210200 }, { "epoch": 1.7750100272318508, "grad_norm": 0.003563362406566739, "learning_rate": 3.805459409542106e-07, "loss": 0.0102, "step": 210210 }, { "epoch": 1.7750944670790147, "grad_norm": 0.1537260115146637, "learning_rate": 3.8026401870857097e-07, "loss": 0.0116, "step": 210220 }, { "epoch": 1.7751789069261785, "grad_norm": 0.10389380902051926, "learning_rate": 3.79982196802649e-07, "loss": 0.005, "step": 210230 }, { "epoch": 1.7752633467733423, "grad_norm": 0.3314197063446045, "learning_rate": 3.7970047524256813e-07, "loss": 0.0077, "step": 210240 }, { "epoch": 1.775347786620506, "grad_norm": 0.13212284445762634, "learning_rate": 3.7941885403444567e-07, "loss": 0.0078, "step": 210250 }, { "epoch": 1.77543222646767, "grad_norm": 0.46592432260513306, "learning_rate": 3.791373331843995e-07, "loss": 0.0073, "step": 210260 }, { "epoch": 1.775516666314834, "grad_norm": 0.23782575130462646, "learning_rate": 3.788559126985419e-07, "loss": 0.0049, "step": 210270 }, { "epoch": 1.7756011061619978, "grad_norm": 0.18547196686267853, "learning_rate": 3.785745925829881e-07, "loss": 0.0058, "step": 210280 }, { "epoch": 1.7756855460091616, "grad_norm": 0.037698496133089066, "learning_rate": 3.782933728438465e-07, "loss": 0.0061, "step": 210290 }, { "epoch": 1.7757699858563256, "grad_norm": 0.022014785557985306, "learning_rate": 3.780122534872266e-07, "loss": 0.0066, "step": 210300 }, { "epoch": 1.7758544257034896, "grad_norm": 0.252229243516922, "learning_rate": 3.77731234519233e-07, "loss": 0.0048, "step": 210310 }, { "epoch": 1.7759388655506534, "grad_norm": 0.7162796258926392, "learning_rate": 3.7745031594596916e-07, "loss": 0.0085, "step": 210320 }, { "epoch": 1.7760233053978172, "grad_norm": 0.3984164297580719, "learning_rate": 3.771694977735385e-07, "loss": 0.0096, "step": 210330 }, { "epoch": 1.7761077452449812, "grad_norm": 0.12067148834466934, "learning_rate": 3.768887800080384e-07, "loss": 0.0096, "step": 210340 }, { "epoch": 1.776192185092145, "grad_norm": 0.21171580255031586, "learning_rate": 3.7660816265556675e-07, "loss": 0.0178, "step": 210350 }, { "epoch": 1.776276624939309, "grad_norm": 0.25094738602638245, "learning_rate": 3.7632764572221816e-07, "loss": 0.0044, "step": 210360 }, { "epoch": 1.7763610647864727, "grad_norm": 0.4815196096897125, "learning_rate": 3.76047229214086e-07, "loss": 0.0056, "step": 210370 }, { "epoch": 1.7764455046336365, "grad_norm": 0.48699408769607544, "learning_rate": 3.757669131372593e-07, "loss": 0.0069, "step": 210380 }, { "epoch": 1.7765299444808005, "grad_norm": 0.16533873975276947, "learning_rate": 3.7548669749782886e-07, "loss": 0.0051, "step": 210390 }, { "epoch": 1.7766143843279645, "grad_norm": 0.36446189880371094, "learning_rate": 3.752065823018791e-07, "loss": 0.0055, "step": 210400 }, { "epoch": 1.7766988241751283, "grad_norm": 0.3929244577884674, "learning_rate": 3.749265675554953e-07, "loss": 0.006, "step": 210410 }, { "epoch": 1.776783264022292, "grad_norm": 0.10191337764263153, "learning_rate": 3.746466532647586e-07, "loss": 0.0032, "step": 210420 }, { "epoch": 1.776867703869456, "grad_norm": 0.7845985889434814, "learning_rate": 3.743668394357486e-07, "loss": 0.0077, "step": 210430 }, { "epoch": 1.77695214371662, "grad_norm": 0.23879052698612213, "learning_rate": 3.7408712607454323e-07, "loss": 0.0092, "step": 210440 }, { "epoch": 1.7770365835637838, "grad_norm": 0.44393789768218994, "learning_rate": 3.7380751318721653e-07, "loss": 0.0073, "step": 210450 }, { "epoch": 1.7771210234109476, "grad_norm": 0.21649721264839172, "learning_rate": 3.7352800077984363e-07, "loss": 0.0056, "step": 210460 }, { "epoch": 1.7772054632581114, "grad_norm": 0.21574878692626953, "learning_rate": 3.7324858885849356e-07, "loss": 0.0047, "step": 210470 }, { "epoch": 1.7772899031052753, "grad_norm": 0.9245976209640503, "learning_rate": 3.729692774292371e-07, "loss": 0.0107, "step": 210480 }, { "epoch": 1.7773743429524393, "grad_norm": 0.08025719225406647, "learning_rate": 3.726900664981392e-07, "loss": 0.007, "step": 210490 }, { "epoch": 1.7774587827996031, "grad_norm": 0.6260911226272583, "learning_rate": 3.724109560712652e-07, "loss": 0.0075, "step": 210500 }, { "epoch": 1.777543222646767, "grad_norm": 0.15095408260822296, "learning_rate": 3.721319461546763e-07, "loss": 0.0037, "step": 210510 }, { "epoch": 1.777627662493931, "grad_norm": 0.006425303872674704, "learning_rate": 3.7185303675443376e-07, "loss": 0.0046, "step": 210520 }, { "epoch": 1.777712102341095, "grad_norm": 0.07677550613880157, "learning_rate": 3.7157422787659436e-07, "loss": 0.0066, "step": 210530 }, { "epoch": 1.7777965421882587, "grad_norm": 0.022748081013560295, "learning_rate": 3.712955195272144e-07, "loss": 0.0088, "step": 210540 }, { "epoch": 1.7778809820354224, "grad_norm": 0.13071143627166748, "learning_rate": 3.71016911712348e-07, "loss": 0.004, "step": 210550 }, { "epoch": 1.7779654218825864, "grad_norm": 0.43362781405448914, "learning_rate": 3.7073840443804465e-07, "loss": 0.0057, "step": 210560 }, { "epoch": 1.7780498617297502, "grad_norm": 0.000496304186526686, "learning_rate": 3.704599977103551e-07, "loss": 0.0033, "step": 210570 }, { "epoch": 1.7781343015769142, "grad_norm": 0.13330748677253723, "learning_rate": 3.7018169153532624e-07, "loss": 0.0113, "step": 210580 }, { "epoch": 1.778218741424078, "grad_norm": 0.1630573719739914, "learning_rate": 3.699034859190015e-07, "loss": 0.0076, "step": 210590 }, { "epoch": 1.7783031812712418, "grad_norm": 0.0871787890791893, "learning_rate": 3.696253808674238e-07, "loss": 0.0091, "step": 210600 }, { "epoch": 1.7783876211184058, "grad_norm": 0.2032347321510315, "learning_rate": 3.693473763866351e-07, "loss": 0.0029, "step": 210610 }, { "epoch": 1.7784720609655698, "grad_norm": 0.0965753123164177, "learning_rate": 3.6906947248267156e-07, "loss": 0.0049, "step": 210620 }, { "epoch": 1.7785565008127335, "grad_norm": 0.09330452978610992, "learning_rate": 3.687916691615706e-07, "loss": 0.0059, "step": 210630 }, { "epoch": 1.7786409406598973, "grad_norm": 0.19210666418075562, "learning_rate": 3.6851396642936523e-07, "loss": 0.0035, "step": 210640 }, { "epoch": 1.7787253805070613, "grad_norm": 0.13707254827022552, "learning_rate": 3.6823636429208773e-07, "loss": 0.0066, "step": 210650 }, { "epoch": 1.7788098203542253, "grad_norm": 0.1542533040046692, "learning_rate": 3.6795886275576787e-07, "loss": 0.0053, "step": 210660 }, { "epoch": 1.778894260201389, "grad_norm": 0.1805119514465332, "learning_rate": 3.676814618264318e-07, "loss": 0.0055, "step": 210670 }, { "epoch": 1.7789787000485529, "grad_norm": 0.005724763497710228, "learning_rate": 3.674041615101054e-07, "loss": 0.0059, "step": 210680 }, { "epoch": 1.7790631398957166, "grad_norm": 0.1889553815126419, "learning_rate": 3.671269618128104e-07, "loss": 0.0079, "step": 210690 }, { "epoch": 1.7791475797428806, "grad_norm": 0.09697325527667999, "learning_rate": 3.668498627405692e-07, "loss": 0.0034, "step": 210700 }, { "epoch": 1.7792320195900446, "grad_norm": 0.42223110795021057, "learning_rate": 3.6657286429939877e-07, "loss": 0.0069, "step": 210710 }, { "epoch": 1.7793164594372084, "grad_norm": 0.09318739920854568, "learning_rate": 3.6629596649531694e-07, "loss": 0.0099, "step": 210720 }, { "epoch": 1.7794008992843722, "grad_norm": 0.41507551074028015, "learning_rate": 3.6601916933433623e-07, "loss": 0.0023, "step": 210730 }, { "epoch": 1.7794853391315362, "grad_norm": 0.33330637216567993, "learning_rate": 3.6574247282247064e-07, "loss": 0.0037, "step": 210740 }, { "epoch": 1.7795697789787002, "grad_norm": 0.37041160464286804, "learning_rate": 3.6546587696572876e-07, "loss": 0.0067, "step": 210750 }, { "epoch": 1.779654218825864, "grad_norm": 0.27065524458885193, "learning_rate": 3.651893817701185e-07, "loss": 0.0046, "step": 210760 }, { "epoch": 1.7797386586730277, "grad_norm": 0.4033777713775635, "learning_rate": 3.649129872416446e-07, "loss": 0.0102, "step": 210770 }, { "epoch": 1.7798230985201917, "grad_norm": 0.06715766340494156, "learning_rate": 3.64636693386311e-07, "loss": 0.0043, "step": 210780 }, { "epoch": 1.7799075383673557, "grad_norm": 0.03457564860582352, "learning_rate": 3.643605002101186e-07, "loss": 0.0056, "step": 210790 }, { "epoch": 1.7799919782145195, "grad_norm": 0.21208731830120087, "learning_rate": 3.640844077190658e-07, "loss": 0.005, "step": 210800 }, { "epoch": 1.7800764180616833, "grad_norm": 0.012169362977147102, "learning_rate": 3.638084159191502e-07, "loss": 0.0062, "step": 210810 }, { "epoch": 1.780160857908847, "grad_norm": 0.4638740122318268, "learning_rate": 3.635325248163657e-07, "loss": 0.0069, "step": 210820 }, { "epoch": 1.780245297756011, "grad_norm": 0.49689167737960815, "learning_rate": 3.6325673441670484e-07, "loss": 0.0054, "step": 210830 }, { "epoch": 1.780329737603175, "grad_norm": 0.17288780212402344, "learning_rate": 3.629810447261567e-07, "loss": 0.0063, "step": 210840 }, { "epoch": 1.7804141774503388, "grad_norm": 0.14739765226840973, "learning_rate": 3.6270545575071093e-07, "loss": 0.0037, "step": 210850 }, { "epoch": 1.7804986172975026, "grad_norm": 0.3525422215461731, "learning_rate": 3.624299674963516e-07, "loss": 0.0043, "step": 210860 }, { "epoch": 1.7805830571446666, "grad_norm": 0.2890509068965912, "learning_rate": 3.6215457996906343e-07, "loss": 0.0045, "step": 210870 }, { "epoch": 1.7806674969918306, "grad_norm": 0.4765712320804596, "learning_rate": 3.6187929317482716e-07, "loss": 0.0102, "step": 210880 }, { "epoch": 1.7807519368389944, "grad_norm": 0.12687696516513824, "learning_rate": 3.6160410711962247e-07, "loss": 0.0121, "step": 210890 }, { "epoch": 1.7808363766861581, "grad_norm": 0.08281191438436508, "learning_rate": 3.6132902180942676e-07, "loss": 0.0051, "step": 210900 }, { "epoch": 1.7809208165333221, "grad_norm": 0.6904147863388062, "learning_rate": 3.610540372502136e-07, "loss": 0.0086, "step": 210910 }, { "epoch": 1.781005256380486, "grad_norm": 0.4646967351436615, "learning_rate": 3.60779153447956e-07, "loss": 0.0099, "step": 210920 }, { "epoch": 1.78108969622765, "grad_norm": 0.012234222143888474, "learning_rate": 3.605043704086242e-07, "loss": 0.002, "step": 210930 }, { "epoch": 1.7811741360748137, "grad_norm": 0.3913465440273285, "learning_rate": 3.6022968813818725e-07, "loss": 0.0059, "step": 210940 }, { "epoch": 1.7812585759219774, "grad_norm": 0.26220381259918213, "learning_rate": 3.599551066426099e-07, "loss": 0.0056, "step": 210950 }, { "epoch": 1.7813430157691414, "grad_norm": 0.006695996504276991, "learning_rate": 3.596806259278579e-07, "loss": 0.0057, "step": 210960 }, { "epoch": 1.7814274556163054, "grad_norm": 0.029467778280377388, "learning_rate": 3.5940624599989095e-07, "loss": 0.0058, "step": 210970 }, { "epoch": 1.7815118954634692, "grad_norm": 0.3712540864944458, "learning_rate": 3.5913196686466976e-07, "loss": 0.0071, "step": 210980 }, { "epoch": 1.781596335310633, "grad_norm": 0.2260136604309082, "learning_rate": 3.5885778852815135e-07, "loss": 0.011, "step": 210990 }, { "epoch": 1.781680775157797, "grad_norm": 0.3652547597885132, "learning_rate": 3.585837109962914e-07, "loss": 0.0087, "step": 211000 }, { "epoch": 1.781765215004961, "grad_norm": 0.32244452834129333, "learning_rate": 3.583097342750408e-07, "loss": 0.0069, "step": 211010 }, { "epoch": 1.7818496548521248, "grad_norm": 0.12075944244861603, "learning_rate": 3.580358583703519e-07, "loss": 0.013, "step": 211020 }, { "epoch": 1.7819340946992885, "grad_norm": 0.2444908618927002, "learning_rate": 3.57762083288174e-07, "loss": 0.008, "step": 211030 }, { "epoch": 1.7820185345464523, "grad_norm": 0.10209432244300842, "learning_rate": 3.5748840903445106e-07, "loss": 0.0057, "step": 211040 }, { "epoch": 1.7821029743936163, "grad_norm": 0.37386554479599, "learning_rate": 3.5721483561512895e-07, "loss": 0.0075, "step": 211050 }, { "epoch": 1.7821874142407803, "grad_norm": 0.044793639332056046, "learning_rate": 3.56941363036149e-07, "loss": 0.0046, "step": 211060 }, { "epoch": 1.782271854087944, "grad_norm": 0.637010931968689, "learning_rate": 3.566679913034526e-07, "loss": 0.008, "step": 211070 }, { "epoch": 1.7823562939351079, "grad_norm": 0.11792798340320587, "learning_rate": 3.5639472042297395e-07, "loss": 0.0059, "step": 211080 }, { "epoch": 1.7824407337822719, "grad_norm": 0.22003187239170074, "learning_rate": 3.5612155040065156e-07, "loss": 0.0048, "step": 211090 }, { "epoch": 1.7825251736294359, "grad_norm": 0.07625747472047806, "learning_rate": 3.558484812424168e-07, "loss": 0.0094, "step": 211100 }, { "epoch": 1.7826096134765996, "grad_norm": 0.47657591104507446, "learning_rate": 3.5557551295420213e-07, "loss": 0.0042, "step": 211110 }, { "epoch": 1.7826940533237634, "grad_norm": 0.21328648924827576, "learning_rate": 3.553026455419345e-07, "loss": 0.0073, "step": 211120 }, { "epoch": 1.7827784931709274, "grad_norm": 0.2907056212425232, "learning_rate": 3.550298790115425e-07, "loss": 0.0038, "step": 211130 }, { "epoch": 1.7828629330180914, "grad_norm": 0.14303995668888092, "learning_rate": 3.5475721336894975e-07, "loss": 0.0038, "step": 211140 }, { "epoch": 1.7829473728652552, "grad_norm": 0.12802711129188538, "learning_rate": 3.5448464862007815e-07, "loss": 0.0083, "step": 211150 }, { "epoch": 1.783031812712419, "grad_norm": 0.4246739149093628, "learning_rate": 3.542121847708485e-07, "loss": 0.0068, "step": 211160 }, { "epoch": 1.7831162525595827, "grad_norm": 0.1378020942211151, "learning_rate": 3.539398218271772e-07, "loss": 0.0041, "step": 211170 }, { "epoch": 1.7832006924067467, "grad_norm": 0.1404440999031067, "learning_rate": 3.536675597949818e-07, "loss": 0.0063, "step": 211180 }, { "epoch": 1.7832851322539107, "grad_norm": 0.8538327813148499, "learning_rate": 3.533953986801736e-07, "loss": 0.0133, "step": 211190 }, { "epoch": 1.7833695721010745, "grad_norm": 0.19286973774433136, "learning_rate": 3.5312333848866677e-07, "loss": 0.0152, "step": 211200 }, { "epoch": 1.7834540119482383, "grad_norm": 0.21092242002487183, "learning_rate": 3.5285137922636713e-07, "loss": 0.0042, "step": 211210 }, { "epoch": 1.7835384517954023, "grad_norm": 0.27526214718818665, "learning_rate": 3.5257952089918445e-07, "loss": 0.003, "step": 211220 }, { "epoch": 1.7836228916425663, "grad_norm": 0.3075321912765503, "learning_rate": 3.5230776351302233e-07, "loss": 0.0078, "step": 211230 }, { "epoch": 1.78370733148973, "grad_norm": 0.16437657177448273, "learning_rate": 3.520361070737832e-07, "loss": 0.0068, "step": 211240 }, { "epoch": 1.7837917713368938, "grad_norm": 0.6163459420204163, "learning_rate": 3.5176455158736744e-07, "loss": 0.0043, "step": 211250 }, { "epoch": 1.7838762111840576, "grad_norm": 0.38548967242240906, "learning_rate": 3.5149309705967193e-07, "loss": 0.0139, "step": 211260 }, { "epoch": 1.7839606510312216, "grad_norm": 0.41303184628486633, "learning_rate": 3.512217434965948e-07, "loss": 0.01, "step": 211270 }, { "epoch": 1.7840450908783856, "grad_norm": 0.13031752407550812, "learning_rate": 3.5095049090402787e-07, "loss": 0.0093, "step": 211280 }, { "epoch": 1.7841295307255494, "grad_norm": 0.5803045630455017, "learning_rate": 3.5067933928786436e-07, "loss": 0.0109, "step": 211290 }, { "epoch": 1.7842139705727131, "grad_norm": 0.2018815577030182, "learning_rate": 3.5040828865399224e-07, "loss": 0.0077, "step": 211300 }, { "epoch": 1.7842984104198771, "grad_norm": 0.10912109911441803, "learning_rate": 3.5013733900830015e-07, "loss": 0.0036, "step": 211310 }, { "epoch": 1.7843828502670411, "grad_norm": 0.6031101942062378, "learning_rate": 3.498664903566723e-07, "loss": 0.0054, "step": 211320 }, { "epoch": 1.784467290114205, "grad_norm": 0.12920226156711578, "learning_rate": 3.495957427049912e-07, "loss": 0.0037, "step": 211330 }, { "epoch": 1.7845517299613687, "grad_norm": 0.36926376819610596, "learning_rate": 3.493250960591366e-07, "loss": 0.0058, "step": 211340 }, { "epoch": 1.7846361698085327, "grad_norm": 0.2945229411125183, "learning_rate": 3.490545504249887e-07, "loss": 0.0038, "step": 211350 }, { "epoch": 1.7847206096556967, "grad_norm": 0.2651793360710144, "learning_rate": 3.4878410580842237e-07, "loss": 0.0057, "step": 211360 }, { "epoch": 1.7848050495028605, "grad_norm": 0.7410087585449219, "learning_rate": 3.4851376221531284e-07, "loss": 0.0073, "step": 211370 }, { "epoch": 1.7848894893500242, "grad_norm": 0.12566828727722168, "learning_rate": 3.4824351965153103e-07, "loss": 0.0078, "step": 211380 }, { "epoch": 1.784973929197188, "grad_norm": 0.05363007262349129, "learning_rate": 3.479733781229472e-07, "loss": 0.0079, "step": 211390 }, { "epoch": 1.785058369044352, "grad_norm": 0.19259034097194672, "learning_rate": 3.477033376354283e-07, "loss": 0.0061, "step": 211400 }, { "epoch": 1.785142808891516, "grad_norm": 0.4230220317840576, "learning_rate": 3.47433398194838e-07, "loss": 0.0063, "step": 211410 }, { "epoch": 1.7852272487386798, "grad_norm": 0.0610799603164196, "learning_rate": 3.471635598070422e-07, "loss": 0.0092, "step": 211420 }, { "epoch": 1.7853116885858435, "grad_norm": 0.1507355123758316, "learning_rate": 3.4689382247790006e-07, "loss": 0.0044, "step": 211430 }, { "epoch": 1.7853961284330075, "grad_norm": 0.07581578940153122, "learning_rate": 3.466241862132708e-07, "loss": 0.006, "step": 211440 }, { "epoch": 1.7854805682801715, "grad_norm": 0.16813087463378906, "learning_rate": 3.4635465101900965e-07, "loss": 0.004, "step": 211450 }, { "epoch": 1.7855650081273353, "grad_norm": 0.058069344609975815, "learning_rate": 3.460852169009732e-07, "loss": 0.0062, "step": 211460 }, { "epoch": 1.785649447974499, "grad_norm": 0.09187455475330353, "learning_rate": 3.458158838650122e-07, "loss": 0.0068, "step": 211470 }, { "epoch": 1.785733887821663, "grad_norm": 0.17086710035800934, "learning_rate": 3.455466519169759e-07, "loss": 0.0081, "step": 211480 }, { "epoch": 1.7858183276688269, "grad_norm": 0.12156780809164047, "learning_rate": 3.4527752106271293e-07, "loss": 0.0031, "step": 211490 }, { "epoch": 1.7859027675159909, "grad_norm": 0.11503032594919205, "learning_rate": 3.4500849130806755e-07, "loss": 0.0082, "step": 211500 }, { "epoch": 1.7859872073631546, "grad_norm": 0.24594514071941376, "learning_rate": 3.4473956265888454e-07, "loss": 0.0029, "step": 211510 }, { "epoch": 1.7860716472103184, "grad_norm": 0.1984497308731079, "learning_rate": 3.444707351210036e-07, "loss": 0.0057, "step": 211520 }, { "epoch": 1.7861560870574824, "grad_norm": 0.0011003720574080944, "learning_rate": 3.442020087002651e-07, "loss": 0.0052, "step": 211530 }, { "epoch": 1.7862405269046464, "grad_norm": 0.009897810406982899, "learning_rate": 3.4393338340250384e-07, "loss": 0.0058, "step": 211540 }, { "epoch": 1.7863249667518102, "grad_norm": 0.17971713840961456, "learning_rate": 3.436648592335562e-07, "loss": 0.0063, "step": 211550 }, { "epoch": 1.786409406598974, "grad_norm": 0.20380882918834686, "learning_rate": 3.4339643619925365e-07, "loss": 0.0044, "step": 211560 }, { "epoch": 1.786493846446138, "grad_norm": 0.7240815162658691, "learning_rate": 3.431281143054266e-07, "loss": 0.0085, "step": 211570 }, { "epoch": 1.786578286293302, "grad_norm": 0.3200291693210602, "learning_rate": 3.428598935579014e-07, "loss": 0.0116, "step": 211580 }, { "epoch": 1.7866627261404657, "grad_norm": 0.47110578417778015, "learning_rate": 3.425917739625062e-07, "loss": 0.006, "step": 211590 }, { "epoch": 1.7867471659876295, "grad_norm": 0.19707778096199036, "learning_rate": 3.4232375552506246e-07, "loss": 0.0027, "step": 211600 }, { "epoch": 1.7868316058347933, "grad_norm": 0.23986299335956573, "learning_rate": 3.420558382513928e-07, "loss": 0.0025, "step": 211610 }, { "epoch": 1.7869160456819573, "grad_norm": 0.0002848730073310435, "learning_rate": 3.417880221473158e-07, "loss": 0.002, "step": 211620 }, { "epoch": 1.7870004855291213, "grad_norm": 0.2630697190761566, "learning_rate": 3.415203072186479e-07, "loss": 0.0096, "step": 211630 }, { "epoch": 1.787084925376285, "grad_norm": 0.1998768150806427, "learning_rate": 3.4125269347120516e-07, "loss": 0.0073, "step": 211640 }, { "epoch": 1.7871693652234488, "grad_norm": 0.2367161512374878, "learning_rate": 3.409851809107989e-07, "loss": 0.0058, "step": 211650 }, { "epoch": 1.7872538050706128, "grad_norm": 0.2557724118232727, "learning_rate": 3.407177695432401e-07, "loss": 0.0044, "step": 211660 }, { "epoch": 1.7873382449177768, "grad_norm": 0.24125654995441437, "learning_rate": 3.404504593743352e-07, "loss": 0.0046, "step": 211670 }, { "epoch": 1.7874226847649406, "grad_norm": 0.23546577990055084, "learning_rate": 3.4018325040989285e-07, "loss": 0.0121, "step": 211680 }, { "epoch": 1.7875071246121044, "grad_norm": 0.27053138613700867, "learning_rate": 3.399161426557146e-07, "loss": 0.0096, "step": 211690 }, { "epoch": 1.7875915644592684, "grad_norm": 0.2407260239124298, "learning_rate": 3.3964913611760353e-07, "loss": 0.0089, "step": 211700 }, { "epoch": 1.7876760043064324, "grad_norm": 0.38817098736763, "learning_rate": 3.393822308013578e-07, "loss": 0.0059, "step": 211710 }, { "epoch": 1.7877604441535961, "grad_norm": 0.16264359652996063, "learning_rate": 3.3911542671277554e-07, "loss": 0.0029, "step": 211720 }, { "epoch": 1.78784488400076, "grad_norm": 0.2726641595363617, "learning_rate": 3.38848723857651e-07, "loss": 0.0032, "step": 211730 }, { "epoch": 1.7879293238479237, "grad_norm": 0.09367578476667404, "learning_rate": 3.385821222417757e-07, "loss": 0.0175, "step": 211740 }, { "epoch": 1.7880137636950877, "grad_norm": 0.3867592215538025, "learning_rate": 3.383156218709427e-07, "loss": 0.0055, "step": 211750 }, { "epoch": 1.7880982035422517, "grad_norm": 0.33724457025527954, "learning_rate": 3.3804922275093856e-07, "loss": 0.0058, "step": 211760 }, { "epoch": 1.7881826433894155, "grad_norm": 0.37650805711746216, "learning_rate": 3.3778292488755026e-07, "loss": 0.0063, "step": 211770 }, { "epoch": 1.7882670832365792, "grad_norm": 0.5267491340637207, "learning_rate": 3.37516728286561e-07, "loss": 0.0093, "step": 211780 }, { "epoch": 1.7883515230837432, "grad_norm": 0.1300293207168579, "learning_rate": 3.372506329537539e-07, "loss": 0.0068, "step": 211790 }, { "epoch": 1.7884359629309072, "grad_norm": 0.023763326928019524, "learning_rate": 3.369846388949072e-07, "loss": 0.0031, "step": 211800 }, { "epoch": 1.788520402778071, "grad_norm": 0.017495231702923775, "learning_rate": 3.367187461157989e-07, "loss": 0.012, "step": 211810 }, { "epoch": 1.7886048426252348, "grad_norm": 0.21171143651008606, "learning_rate": 3.3645295462220287e-07, "loss": 0.0036, "step": 211820 }, { "epoch": 1.7886892824723988, "grad_norm": 0.3116256892681122, "learning_rate": 3.361872644198938e-07, "loss": 0.0036, "step": 211830 }, { "epoch": 1.7887737223195626, "grad_norm": 0.32695168256759644, "learning_rate": 3.359216755146416e-07, "loss": 0.0062, "step": 211840 }, { "epoch": 1.7888581621667266, "grad_norm": 0.12425512820482254, "learning_rate": 3.356561879122144e-07, "loss": 0.0058, "step": 211850 }, { "epoch": 1.7889426020138903, "grad_norm": 0.09008369594812393, "learning_rate": 3.3539080161837935e-07, "loss": 0.0055, "step": 211860 }, { "epoch": 1.789027041861054, "grad_norm": 0.1598568558692932, "learning_rate": 3.351255166388995e-07, "loss": 0.0056, "step": 211870 }, { "epoch": 1.789111481708218, "grad_norm": 0.5704956650733948, "learning_rate": 3.3486033297953856e-07, "loss": 0.0065, "step": 211880 }, { "epoch": 1.789195921555382, "grad_norm": 0.06773406267166138, "learning_rate": 3.345952506460548e-07, "loss": 0.005, "step": 211890 }, { "epoch": 1.7892803614025459, "grad_norm": 0.674633264541626, "learning_rate": 3.3433026964420633e-07, "loss": 0.004, "step": 211900 }, { "epoch": 1.7893648012497096, "grad_norm": 0.2966270446777344, "learning_rate": 3.340653899797475e-07, "loss": 0.0031, "step": 211910 }, { "epoch": 1.7894492410968736, "grad_norm": 0.164783775806427, "learning_rate": 3.338006116584336e-07, "loss": 0.0064, "step": 211920 }, { "epoch": 1.7895336809440376, "grad_norm": 0.2622755467891693, "learning_rate": 3.3353593468601286e-07, "loss": 0.0107, "step": 211930 }, { "epoch": 1.7896181207912014, "grad_norm": 0.14466379582881927, "learning_rate": 3.3327135906823626e-07, "loss": 0.0028, "step": 211940 }, { "epoch": 1.7897025606383652, "grad_norm": 0.06612599641084671, "learning_rate": 3.330068848108486e-07, "loss": 0.0079, "step": 211950 }, { "epoch": 1.789787000485529, "grad_norm": 0.2091442197561264, "learning_rate": 3.327425119195965e-07, "loss": 0.0043, "step": 211960 }, { "epoch": 1.789871440332693, "grad_norm": 0.32365939021110535, "learning_rate": 3.3247824040022025e-07, "loss": 0.0071, "step": 211970 }, { "epoch": 1.789955880179857, "grad_norm": 0.33488717675209045, "learning_rate": 3.322140702584592e-07, "loss": 0.0071, "step": 211980 }, { "epoch": 1.7900403200270207, "grad_norm": 0.22737284004688263, "learning_rate": 3.3195000150005263e-07, "loss": 0.0071, "step": 211990 }, { "epoch": 1.7901247598741845, "grad_norm": 0.18334269523620605, "learning_rate": 3.3168603413073486e-07, "loss": 0.0039, "step": 212000 }, { "epoch": 1.7902091997213485, "grad_norm": 0.20504100620746613, "learning_rate": 3.3142216815624017e-07, "loss": 0.0087, "step": 212010 }, { "epoch": 1.7902936395685125, "grad_norm": 0.11961983889341354, "learning_rate": 3.31158403582299e-07, "loss": 0.0031, "step": 212020 }, { "epoch": 1.7903780794156763, "grad_norm": 0.16035790741443634, "learning_rate": 3.308947404146412e-07, "loss": 0.0081, "step": 212030 }, { "epoch": 1.79046251926284, "grad_norm": 0.5587936639785767, "learning_rate": 3.3063117865899277e-07, "loss": 0.0105, "step": 212040 }, { "epoch": 1.790546959110004, "grad_norm": 0.20922107994556427, "learning_rate": 3.303677183210785e-07, "loss": 0.0071, "step": 212050 }, { "epoch": 1.790631398957168, "grad_norm": 0.23281128704547882, "learning_rate": 3.3010435940661944e-07, "loss": 0.0061, "step": 212060 }, { "epoch": 1.7907158388043318, "grad_norm": 0.061605747789144516, "learning_rate": 3.2984110192133766e-07, "loss": 0.0036, "step": 212070 }, { "epoch": 1.7908002786514956, "grad_norm": 0.12596438825130463, "learning_rate": 3.2957794587095027e-07, "loss": 0.0051, "step": 212080 }, { "epoch": 1.7908847184986594, "grad_norm": 1.195985198020935, "learning_rate": 3.293148912611721e-07, "loss": 0.0051, "step": 212090 }, { "epoch": 1.7909691583458234, "grad_norm": 0.4940190315246582, "learning_rate": 3.2905193809771805e-07, "loss": 0.0091, "step": 212100 }, { "epoch": 1.7910535981929874, "grad_norm": 0.10165821015834808, "learning_rate": 3.2878908638629746e-07, "loss": 0.0055, "step": 212110 }, { "epoch": 1.7911380380401511, "grad_norm": 0.2481200248003006, "learning_rate": 3.2852633613262184e-07, "loss": 0.0075, "step": 212120 }, { "epoch": 1.791222477887315, "grad_norm": 0.39739981293678284, "learning_rate": 3.282636873423972e-07, "loss": 0.0054, "step": 212130 }, { "epoch": 1.791306917734479, "grad_norm": 0.10451307147741318, "learning_rate": 3.280011400213279e-07, "loss": 0.0088, "step": 212140 }, { "epoch": 1.791391357581643, "grad_norm": 0.2771613597869873, "learning_rate": 3.277386941751154e-07, "loss": 0.0064, "step": 212150 }, { "epoch": 1.7914757974288067, "grad_norm": 0.7689935564994812, "learning_rate": 3.2747634980946186e-07, "loss": 0.0137, "step": 212160 }, { "epoch": 1.7915602372759705, "grad_norm": 0.09512797743082047, "learning_rate": 3.272141069300638e-07, "loss": 0.0061, "step": 212170 }, { "epoch": 1.7916446771231342, "grad_norm": 0.3335041403770447, "learning_rate": 3.2695196554261845e-07, "loss": 0.0126, "step": 212180 }, { "epoch": 1.7917291169702982, "grad_norm": 0.3594990372657776, "learning_rate": 3.266899256528183e-07, "loss": 0.0114, "step": 212190 }, { "epoch": 1.7918135568174622, "grad_norm": 0.5703297257423401, "learning_rate": 3.264279872663562e-07, "loss": 0.0047, "step": 212200 }, { "epoch": 1.791897996664626, "grad_norm": 0.14010314643383026, "learning_rate": 3.261661503889202e-07, "loss": 0.0045, "step": 212210 }, { "epoch": 1.7919824365117898, "grad_norm": 0.17745622992515564, "learning_rate": 3.259044150261975e-07, "loss": 0.0082, "step": 212220 }, { "epoch": 1.7920668763589538, "grad_norm": 0.5143013596534729, "learning_rate": 3.2564278118387304e-07, "loss": 0.0063, "step": 212230 }, { "epoch": 1.7921513162061178, "grad_norm": 0.5743967890739441, "learning_rate": 3.2538124886762945e-07, "loss": 0.0069, "step": 212240 }, { "epoch": 1.7922357560532816, "grad_norm": 0.26541948318481445, "learning_rate": 3.251198180831472e-07, "loss": 0.0049, "step": 212250 }, { "epoch": 1.7923201959004453, "grad_norm": 0.3716915547847748, "learning_rate": 3.248584888361039e-07, "loss": 0.0153, "step": 212260 }, { "epoch": 1.7924046357476093, "grad_norm": 0.30664411187171936, "learning_rate": 3.2459726113217737e-07, "loss": 0.0038, "step": 212270 }, { "epoch": 1.7924890755947733, "grad_norm": 0.18146152794361115, "learning_rate": 3.243361349770402e-07, "loss": 0.0045, "step": 212280 }, { "epoch": 1.792573515441937, "grad_norm": 0.06497488915920258, "learning_rate": 3.240751103763634e-07, "loss": 0.0101, "step": 212290 }, { "epoch": 1.7926579552891009, "grad_norm": 0.07662515342235565, "learning_rate": 3.2381418733581694e-07, "loss": 0.0064, "step": 212300 }, { "epoch": 1.7927423951362647, "grad_norm": 0.1776944398880005, "learning_rate": 3.2355336586106843e-07, "loss": 0.0094, "step": 212310 }, { "epoch": 1.7928268349834287, "grad_norm": 0.404797226190567, "learning_rate": 3.2329264595778287e-07, "loss": 0.0056, "step": 212320 }, { "epoch": 1.7929112748305926, "grad_norm": 0.0027236081659793854, "learning_rate": 3.2303202763162177e-07, "loss": 0.0052, "step": 212330 }, { "epoch": 1.7929957146777564, "grad_norm": 0.30564185976982117, "learning_rate": 3.2277151088824734e-07, "loss": 0.0091, "step": 212340 }, { "epoch": 1.7930801545249202, "grad_norm": 0.20898117125034332, "learning_rate": 3.2251109573331665e-07, "loss": 0.0048, "step": 212350 }, { "epoch": 1.7931645943720842, "grad_norm": 0.7972140908241272, "learning_rate": 3.222507821724868e-07, "loss": 0.0112, "step": 212360 }, { "epoch": 1.7932490342192482, "grad_norm": 0.40993815660476685, "learning_rate": 3.219905702114118e-07, "loss": 0.006, "step": 212370 }, { "epoch": 1.793333474066412, "grad_norm": 0.4708974361419678, "learning_rate": 3.21730459855743e-07, "loss": 0.005, "step": 212380 }, { "epoch": 1.7934179139135757, "grad_norm": 0.4484148323535919, "learning_rate": 3.2147045111112884e-07, "loss": 0.0152, "step": 212390 }, { "epoch": 1.7935023537607397, "grad_norm": 0.4917936623096466, "learning_rate": 3.212105439832192e-07, "loss": 0.0083, "step": 212400 }, { "epoch": 1.7935867936079035, "grad_norm": 0.33116212487220764, "learning_rate": 3.209507384776561e-07, "loss": 0.0067, "step": 212410 }, { "epoch": 1.7936712334550675, "grad_norm": 0.13768452405929565, "learning_rate": 3.2069103460008577e-07, "loss": 0.004, "step": 212420 }, { "epoch": 1.7937556733022313, "grad_norm": 0.1897905468940735, "learning_rate": 3.2043143235614583e-07, "loss": 0.0038, "step": 212430 }, { "epoch": 1.793840113149395, "grad_norm": 0.1970537304878235, "learning_rate": 3.201719317514773e-07, "loss": 0.0094, "step": 212440 }, { "epoch": 1.793924552996559, "grad_norm": 0.2782297134399414, "learning_rate": 3.1991253279171575e-07, "loss": 0.0054, "step": 212450 }, { "epoch": 1.794008992843723, "grad_norm": 0.4600578844547272, "learning_rate": 3.196532354824944e-07, "loss": 0.0082, "step": 212460 }, { "epoch": 1.7940934326908868, "grad_norm": 0.10027772188186646, "learning_rate": 3.1939403982944594e-07, "loss": 0.0065, "step": 212470 }, { "epoch": 1.7941778725380506, "grad_norm": 0.13267189264297485, "learning_rate": 3.1913494583819873e-07, "loss": 0.004, "step": 212480 }, { "epoch": 1.7942623123852146, "grad_norm": 0.11881578713655472, "learning_rate": 3.188759535143826e-07, "loss": 0.0059, "step": 212490 }, { "epoch": 1.7943467522323786, "grad_norm": 0.210004061460495, "learning_rate": 3.1861706286362035e-07, "loss": 0.0038, "step": 212500 }, { "epoch": 1.7944311920795424, "grad_norm": 0.12565670907497406, "learning_rate": 3.1835827389153694e-07, "loss": 0.0036, "step": 212510 }, { "epoch": 1.7945156319267062, "grad_norm": 0.10295464843511581, "learning_rate": 3.1809958660375175e-07, "loss": 0.0063, "step": 212520 }, { "epoch": 1.79460007177387, "grad_norm": 0.011492614634335041, "learning_rate": 3.178410010058852e-07, "loss": 0.0048, "step": 212530 }, { "epoch": 1.794684511621034, "grad_norm": 0.01857435144484043, "learning_rate": 3.1758251710355236e-07, "loss": 0.0068, "step": 212540 }, { "epoch": 1.794768951468198, "grad_norm": 0.47142937779426575, "learning_rate": 3.173241349023676e-07, "loss": 0.0064, "step": 212550 }, { "epoch": 1.7948533913153617, "grad_norm": 0.18240201473236084, "learning_rate": 3.17065854407943e-07, "loss": 0.0058, "step": 212560 }, { "epoch": 1.7949378311625255, "grad_norm": 0.04447323456406593, "learning_rate": 3.168076756258881e-07, "loss": 0.0029, "step": 212570 }, { "epoch": 1.7950222710096895, "grad_norm": 0.12721577286720276, "learning_rate": 3.165495985618111e-07, "loss": 0.0049, "step": 212580 }, { "epoch": 1.7951067108568535, "grad_norm": 0.23126547038555145, "learning_rate": 3.16291623221317e-07, "loss": 0.0078, "step": 212590 }, { "epoch": 1.7951911507040172, "grad_norm": 0.2609073221683502, "learning_rate": 3.160337496100091e-07, "loss": 0.0091, "step": 212600 }, { "epoch": 1.795275590551181, "grad_norm": 0.26436832547187805, "learning_rate": 3.15775977733489e-07, "loss": 0.0049, "step": 212610 }, { "epoch": 1.795360030398345, "grad_norm": 0.39649778604507446, "learning_rate": 3.155183075973539e-07, "loss": 0.0065, "step": 212620 }, { "epoch": 1.795444470245509, "grad_norm": 0.2897055149078369, "learning_rate": 3.15260739207201e-07, "loss": 0.0042, "step": 212630 }, { "epoch": 1.7955289100926728, "grad_norm": 0.4459485709667206, "learning_rate": 3.1500327256862583e-07, "loss": 0.0089, "step": 212640 }, { "epoch": 1.7956133499398366, "grad_norm": 0.5415037870407104, "learning_rate": 3.1474590768721833e-07, "loss": 0.0075, "step": 212650 }, { "epoch": 1.7956977897870003, "grad_norm": 0.41592299938201904, "learning_rate": 3.1448864456857077e-07, "loss": 0.0043, "step": 212660 }, { "epoch": 1.7957822296341643, "grad_norm": 0.229993537068367, "learning_rate": 3.142314832182691e-07, "loss": 0.0036, "step": 212670 }, { "epoch": 1.7958666694813283, "grad_norm": 0.015691367909312248, "learning_rate": 3.13974423641899e-07, "loss": 0.0053, "step": 212680 }, { "epoch": 1.7959511093284921, "grad_norm": 0.16606992483139038, "learning_rate": 3.137174658450454e-07, "loss": 0.0067, "step": 212690 }, { "epoch": 1.7960355491756559, "grad_norm": 0.11962141841650009, "learning_rate": 3.1346060983328766e-07, "loss": 0.0054, "step": 212700 }, { "epoch": 1.7961199890228199, "grad_norm": 0.2888895273208618, "learning_rate": 3.132038556122047e-07, "loss": 0.0068, "step": 212710 }, { "epoch": 1.7962044288699839, "grad_norm": 0.011878101155161858, "learning_rate": 3.129472031873737e-07, "loss": 0.0034, "step": 212720 }, { "epoch": 1.7962888687171477, "grad_norm": 0.08334657549858093, "learning_rate": 3.1269065256436917e-07, "loss": 0.0025, "step": 212730 }, { "epoch": 1.7963733085643114, "grad_norm": 0.16409996151924133, "learning_rate": 3.124342037487621e-07, "loss": 0.0088, "step": 212740 }, { "epoch": 1.7964577484114754, "grad_norm": 0.21754199266433716, "learning_rate": 3.121778567461248e-07, "loss": 0.0049, "step": 212750 }, { "epoch": 1.7965421882586392, "grad_norm": 0.0421183705329895, "learning_rate": 3.1192161156202327e-07, "loss": 0.006, "step": 212760 }, { "epoch": 1.7966266281058032, "grad_norm": 0.4580877125263214, "learning_rate": 3.116654682020237e-07, "loss": 0.0072, "step": 212770 }, { "epoch": 1.796711067952967, "grad_norm": 0.3331681489944458, "learning_rate": 3.114094266716899e-07, "loss": 0.0085, "step": 212780 }, { "epoch": 1.7967955078001308, "grad_norm": 0.08603556454181671, "learning_rate": 3.1115348697658253e-07, "loss": 0.0051, "step": 212790 }, { "epoch": 1.7968799476472948, "grad_norm": 0.06213567405939102, "learning_rate": 3.1089764912226095e-07, "loss": 0.0049, "step": 212800 }, { "epoch": 1.7969643874944587, "grad_norm": 0.27233949303627014, "learning_rate": 3.1064191311428014e-07, "loss": 0.0032, "step": 212810 }, { "epoch": 1.7970488273416225, "grad_norm": 0.399091511964798, "learning_rate": 3.103862789581974e-07, "loss": 0.0058, "step": 212820 }, { "epoch": 1.7971332671887863, "grad_norm": 0.10673711448907852, "learning_rate": 3.101307466595621e-07, "loss": 0.0049, "step": 212830 }, { "epoch": 1.7972177070359503, "grad_norm": 0.10333139449357986, "learning_rate": 3.0987531622392763e-07, "loss": 0.0084, "step": 212840 }, { "epoch": 1.7973021468831143, "grad_norm": 0.14216479659080505, "learning_rate": 3.09619987656839e-07, "loss": 0.0057, "step": 212850 }, { "epoch": 1.797386586730278, "grad_norm": 0.0740748718380928, "learning_rate": 3.093647609638445e-07, "loss": 0.0089, "step": 212860 }, { "epoch": 1.7974710265774418, "grad_norm": 0.6741247177124023, "learning_rate": 3.091096361504847e-07, "loss": 0.0074, "step": 212870 }, { "epoch": 1.7975554664246056, "grad_norm": 0.20564132928848267, "learning_rate": 3.08854613222303e-07, "loss": 0.0042, "step": 212880 }, { "epoch": 1.7976399062717696, "grad_norm": 0.12025976926088333, "learning_rate": 3.085996921848372e-07, "loss": 0.0024, "step": 212890 }, { "epoch": 1.7977243461189336, "grad_norm": 0.2531670331954956, "learning_rate": 3.083448730436256e-07, "loss": 0.0061, "step": 212900 }, { "epoch": 1.7978087859660974, "grad_norm": 1.1335129737854004, "learning_rate": 3.0809015580420207e-07, "loss": 0.0043, "step": 212910 }, { "epoch": 1.7978932258132612, "grad_norm": 0.1379641890525818, "learning_rate": 3.0783554047209784e-07, "loss": 0.0024, "step": 212920 }, { "epoch": 1.7979776656604252, "grad_norm": 0.5643807053565979, "learning_rate": 3.075810270528451e-07, "loss": 0.0096, "step": 212930 }, { "epoch": 1.7980621055075892, "grad_norm": 0.25434842705726624, "learning_rate": 3.0732661555197054e-07, "loss": 0.0091, "step": 212940 }, { "epoch": 1.798146545354753, "grad_norm": 0.4563736617565155, "learning_rate": 3.0707230597500084e-07, "loss": 0.0072, "step": 212950 }, { "epoch": 1.7982309852019167, "grad_norm": 0.015478523448109627, "learning_rate": 3.0681809832745767e-07, "loss": 0.0094, "step": 212960 }, { "epoch": 1.7983154250490807, "grad_norm": 0.28040334582328796, "learning_rate": 3.065639926148645e-07, "loss": 0.008, "step": 212970 }, { "epoch": 1.7983998648962447, "grad_norm": 0.4389258623123169, "learning_rate": 3.0630998884273897e-07, "loss": 0.0063, "step": 212980 }, { "epoch": 1.7984843047434085, "grad_norm": 0.0137129295617342, "learning_rate": 3.060560870165996e-07, "loss": 0.0083, "step": 212990 }, { "epoch": 1.7985687445905723, "grad_norm": 1.5311626195907593, "learning_rate": 3.0580228714195916e-07, "loss": 0.0108, "step": 213000 }, { "epoch": 1.798653184437736, "grad_norm": 0.012854138389229774, "learning_rate": 3.0554858922433207e-07, "loss": 0.0048, "step": 213010 }, { "epoch": 1.7987376242849, "grad_norm": 0.2730194628238678, "learning_rate": 3.052949932692273e-07, "loss": 0.0072, "step": 213020 }, { "epoch": 1.798822064132064, "grad_norm": 0.25856292247772217, "learning_rate": 3.0504149928215374e-07, "loss": 0.0061, "step": 213030 }, { "epoch": 1.7989065039792278, "grad_norm": 0.40061289072036743, "learning_rate": 3.047881072686165e-07, "loss": 0.0052, "step": 213040 }, { "epoch": 1.7989909438263916, "grad_norm": 0.1791725903749466, "learning_rate": 3.0453481723411836e-07, "loss": 0.004, "step": 213050 }, { "epoch": 1.7990753836735556, "grad_norm": 0.010317358188331127, "learning_rate": 3.042816291841627e-07, "loss": 0.0056, "step": 213060 }, { "epoch": 1.7991598235207196, "grad_norm": 0.46733346581459045, "learning_rate": 3.0402854312424736e-07, "loss": 0.0061, "step": 213070 }, { "epoch": 1.7992442633678833, "grad_norm": 0.5050050616264343, "learning_rate": 3.0377555905987064e-07, "loss": 0.0058, "step": 213080 }, { "epoch": 1.7993287032150471, "grad_norm": 0.34063854813575745, "learning_rate": 3.035226769965249e-07, "loss": 0.0103, "step": 213090 }, { "epoch": 1.799413143062211, "grad_norm": 0.129256471991539, "learning_rate": 3.0326989693970575e-07, "loss": 0.0032, "step": 213100 }, { "epoch": 1.799497582909375, "grad_norm": 0.17363744974136353, "learning_rate": 3.030172188949015e-07, "loss": 0.0077, "step": 213110 }, { "epoch": 1.799582022756539, "grad_norm": 0.027423501014709473, "learning_rate": 3.0276464286760123e-07, "loss": 0.0053, "step": 213120 }, { "epoch": 1.7996664626037027, "grad_norm": 0.025735534727573395, "learning_rate": 3.0251216886328926e-07, "loss": 0.0026, "step": 213130 }, { "epoch": 1.7997509024508664, "grad_norm": 0.027512427419424057, "learning_rate": 3.022597968874508e-07, "loss": 0.0071, "step": 213140 }, { "epoch": 1.7998353422980304, "grad_norm": 0.19217906892299652, "learning_rate": 3.0200752694556746e-07, "loss": 0.0102, "step": 213150 }, { "epoch": 1.7999197821451944, "grad_norm": 0.23791085183620453, "learning_rate": 3.017553590431171e-07, "loss": 0.0047, "step": 213160 }, { "epoch": 1.8000042219923582, "grad_norm": 0.3889678716659546, "learning_rate": 3.015032931855783e-07, "loss": 0.0037, "step": 213170 }, { "epoch": 1.800088661839522, "grad_norm": 0.5578010678291321, "learning_rate": 3.012513293784247e-07, "loss": 0.0056, "step": 213180 }, { "epoch": 1.800173101686686, "grad_norm": 0.19445432722568512, "learning_rate": 3.0099946762712994e-07, "loss": 0.0095, "step": 213190 }, { "epoch": 1.80025754153385, "grad_norm": 0.44700151681900024, "learning_rate": 3.007477079371629e-07, "loss": 0.0075, "step": 213200 }, { "epoch": 1.8003419813810138, "grad_norm": 0.43509453535079956, "learning_rate": 3.004960503139931e-07, "loss": 0.0082, "step": 213210 }, { "epoch": 1.8004264212281775, "grad_norm": 0.13894517719745636, "learning_rate": 3.0024449476308557e-07, "loss": 0.0091, "step": 213220 }, { "epoch": 1.8005108610753413, "grad_norm": 0.05903015285730362, "learning_rate": 2.999930412899055e-07, "loss": 0.0062, "step": 213230 }, { "epoch": 1.8005953009225053, "grad_norm": 0.46797001361846924, "learning_rate": 2.9974168989991284e-07, "loss": 0.005, "step": 213240 }, { "epoch": 1.8006797407696693, "grad_norm": 0.18036983907222748, "learning_rate": 2.994904405985677e-07, "loss": 0.0054, "step": 213250 }, { "epoch": 1.800764180616833, "grad_norm": 0.3808985948562622, "learning_rate": 2.992392933913274e-07, "loss": 0.0048, "step": 213260 }, { "epoch": 1.8008486204639969, "grad_norm": 0.1991582214832306, "learning_rate": 2.989882482836459e-07, "loss": 0.0063, "step": 213270 }, { "epoch": 1.8009330603111608, "grad_norm": 0.678904116153717, "learning_rate": 2.9873730528097666e-07, "loss": 0.0042, "step": 213280 }, { "epoch": 1.8010175001583248, "grad_norm": 0.0006294319173321128, "learning_rate": 2.984864643887692e-07, "loss": 0.005, "step": 213290 }, { "epoch": 1.8011019400054886, "grad_norm": 0.3291633129119873, "learning_rate": 2.9823572561247296e-07, "loss": 0.0044, "step": 213300 }, { "epoch": 1.8011863798526524, "grad_norm": 0.18578772246837616, "learning_rate": 2.97985088957532e-07, "loss": 0.0043, "step": 213310 }, { "epoch": 1.8012708196998164, "grad_norm": 0.3136711120605469, "learning_rate": 2.9773455442939246e-07, "loss": 0.005, "step": 213320 }, { "epoch": 1.8013552595469802, "grad_norm": 0.45850226283073425, "learning_rate": 2.9748412203349397e-07, "loss": 0.0047, "step": 213330 }, { "epoch": 1.8014396993941442, "grad_norm": 0.13009682297706604, "learning_rate": 2.9723379177527767e-07, "loss": 0.0052, "step": 213340 }, { "epoch": 1.801524139241308, "grad_norm": 0.8451528549194336, "learning_rate": 2.9698356366017977e-07, "loss": 0.0113, "step": 213350 }, { "epoch": 1.8016085790884717, "grad_norm": 0.1669161021709442, "learning_rate": 2.967334376936348e-07, "loss": 0.0064, "step": 213360 }, { "epoch": 1.8016930189356357, "grad_norm": 0.2291436344385147, "learning_rate": 2.964834138810757e-07, "loss": 0.0054, "step": 213370 }, { "epoch": 1.8017774587827997, "grad_norm": 0.1595969796180725, "learning_rate": 2.962334922279331e-07, "loss": 0.0081, "step": 213380 }, { "epoch": 1.8018618986299635, "grad_norm": 0.014590582810342312, "learning_rate": 2.959836727396354e-07, "loss": 0.0015, "step": 213390 }, { "epoch": 1.8019463384771273, "grad_norm": 0.603282630443573, "learning_rate": 2.9573395542160765e-07, "loss": 0.0087, "step": 213400 }, { "epoch": 1.8020307783242913, "grad_norm": 0.4421033561229706, "learning_rate": 2.954843402792751e-07, "loss": 0.0058, "step": 213410 }, { "epoch": 1.8021152181714553, "grad_norm": 0.23326949775218964, "learning_rate": 2.9523482731805777e-07, "loss": 0.007, "step": 213420 }, { "epoch": 1.802199658018619, "grad_norm": 0.8359416127204895, "learning_rate": 2.949854165433769e-07, "loss": 0.0055, "step": 213430 }, { "epoch": 1.8022840978657828, "grad_norm": 0.2572169899940491, "learning_rate": 2.947361079606481e-07, "loss": 0.008, "step": 213440 }, { "epoch": 1.8023685377129466, "grad_norm": 0.2553849518299103, "learning_rate": 2.9448690157528717e-07, "loss": 0.0056, "step": 213450 }, { "epoch": 1.8024529775601106, "grad_norm": 0.19693441689014435, "learning_rate": 2.942377973927057e-07, "loss": 0.0038, "step": 213460 }, { "epoch": 1.8025374174072746, "grad_norm": 0.03041469305753708, "learning_rate": 2.939887954183157e-07, "loss": 0.0044, "step": 213470 }, { "epoch": 1.8026218572544384, "grad_norm": 0.2240307480096817, "learning_rate": 2.937398956575244e-07, "loss": 0.0064, "step": 213480 }, { "epoch": 1.8027062971016021, "grad_norm": 0.22076773643493652, "learning_rate": 2.934910981157385e-07, "loss": 0.0074, "step": 213490 }, { "epoch": 1.8027907369487661, "grad_norm": 0.056390244513750076, "learning_rate": 2.9324240279836156e-07, "loss": 0.0056, "step": 213500 }, { "epoch": 1.8028751767959301, "grad_norm": 0.24649165570735931, "learning_rate": 2.929938097107948e-07, "loss": 0.003, "step": 213510 }, { "epoch": 1.802959616643094, "grad_norm": 0.21692384779453278, "learning_rate": 2.927453188584384e-07, "loss": 0.0073, "step": 213520 }, { "epoch": 1.8030440564902577, "grad_norm": 0.49614354968070984, "learning_rate": 2.924969302466885e-07, "loss": 0.0073, "step": 213530 }, { "epoch": 1.8031284963374217, "grad_norm": 0.2541511654853821, "learning_rate": 2.922486438809408e-07, "loss": 0.0072, "step": 213540 }, { "epoch": 1.8032129361845857, "grad_norm": 0.3355872333049774, "learning_rate": 2.920004597665871e-07, "loss": 0.0123, "step": 213550 }, { "epoch": 1.8032973760317494, "grad_norm": 0.40904226899147034, "learning_rate": 2.9175237790901977e-07, "loss": 0.0063, "step": 213560 }, { "epoch": 1.8033818158789132, "grad_norm": 0.4015503227710724, "learning_rate": 2.9150439831362507e-07, "loss": 0.0078, "step": 213570 }, { "epoch": 1.803466255726077, "grad_norm": 0.05433456227183342, "learning_rate": 2.912565209857909e-07, "loss": 0.0053, "step": 213580 }, { "epoch": 1.803550695573241, "grad_norm": 0.05622084066271782, "learning_rate": 2.9100874593089965e-07, "loss": 0.0058, "step": 213590 }, { "epoch": 1.803635135420405, "grad_norm": 0.311941921710968, "learning_rate": 2.9076107315433424e-07, "loss": 0.0093, "step": 213600 }, { "epoch": 1.8037195752675688, "grad_norm": 0.22068454325199127, "learning_rate": 2.9051350266147193e-07, "loss": 0.0104, "step": 213610 }, { "epoch": 1.8038040151147325, "grad_norm": 0.24453237652778625, "learning_rate": 2.9026603445769243e-07, "loss": 0.0117, "step": 213620 }, { "epoch": 1.8038884549618965, "grad_norm": 0.36014488339424133, "learning_rate": 2.900186685483691e-07, "loss": 0.0051, "step": 213630 }, { "epoch": 1.8039728948090605, "grad_norm": 0.2235746830701828, "learning_rate": 2.8977140493887503e-07, "loss": 0.0113, "step": 213640 }, { "epoch": 1.8040573346562243, "grad_norm": 0.17833077907562256, "learning_rate": 2.895242436345808e-07, "loss": 0.0079, "step": 213650 }, { "epoch": 1.804141774503388, "grad_norm": 0.13372524082660675, "learning_rate": 2.892771846408543e-07, "loss": 0.0059, "step": 213660 }, { "epoch": 1.8042262143505519, "grad_norm": 0.12002779543399811, "learning_rate": 2.8903022796306303e-07, "loss": 0.0074, "step": 213670 }, { "epoch": 1.8043106541977159, "grad_norm": 0.22926384210586548, "learning_rate": 2.8878337360656985e-07, "loss": 0.0063, "step": 213680 }, { "epoch": 1.8043950940448799, "grad_norm": 0.2631164789199829, "learning_rate": 2.88536621576736e-07, "loss": 0.0049, "step": 213690 }, { "epoch": 1.8044795338920436, "grad_norm": 0.013858026824891567, "learning_rate": 2.882899718789206e-07, "loss": 0.0075, "step": 213700 }, { "epoch": 1.8045639737392074, "grad_norm": 0.08334480971097946, "learning_rate": 2.8804342451848264e-07, "loss": 0.0072, "step": 213710 }, { "epoch": 1.8046484135863714, "grad_norm": 0.24118655920028687, "learning_rate": 2.877969795007746e-07, "loss": 0.0038, "step": 213720 }, { "epoch": 1.8047328534335354, "grad_norm": 0.005859247874468565, "learning_rate": 2.875506368311515e-07, "loss": 0.0075, "step": 213730 }, { "epoch": 1.8048172932806992, "grad_norm": 0.4580489695072174, "learning_rate": 2.87304396514963e-07, "loss": 0.009, "step": 213740 }, { "epoch": 1.804901733127863, "grad_norm": 0.04432453587651253, "learning_rate": 2.8705825855755654e-07, "loss": 0.0033, "step": 213750 }, { "epoch": 1.804986172975027, "grad_norm": 0.7077799439430237, "learning_rate": 2.868122229642806e-07, "loss": 0.0086, "step": 213760 }, { "epoch": 1.805070612822191, "grad_norm": 0.05404776334762573, "learning_rate": 2.865662897404758e-07, "loss": 0.0081, "step": 213770 }, { "epoch": 1.8051550526693547, "grad_norm": 0.08207203447818756, "learning_rate": 2.863204588914859e-07, "loss": 0.0055, "step": 213780 }, { "epoch": 1.8052394925165185, "grad_norm": 0.10101968795061111, "learning_rate": 2.8607473042264856e-07, "loss": 0.0057, "step": 213790 }, { "epoch": 1.8053239323636823, "grad_norm": 0.052686657756567, "learning_rate": 2.8582910433930355e-07, "loss": 0.0031, "step": 213800 }, { "epoch": 1.8054083722108463, "grad_norm": 0.2914333939552307, "learning_rate": 2.8558358064678325e-07, "loss": 0.0035, "step": 213810 }, { "epoch": 1.8054928120580103, "grad_norm": 0.13938996195793152, "learning_rate": 2.853381593504223e-07, "loss": 0.0054, "step": 213820 }, { "epoch": 1.805577251905174, "grad_norm": 0.3899596333503723, "learning_rate": 2.850928404555503e-07, "loss": 0.0109, "step": 213830 }, { "epoch": 1.8056616917523378, "grad_norm": 0.23716877400875092, "learning_rate": 2.8484762396749577e-07, "loss": 0.0036, "step": 213840 }, { "epoch": 1.8057461315995018, "grad_norm": 0.5484055876731873, "learning_rate": 2.8460250989158443e-07, "loss": 0.0078, "step": 213850 }, { "epoch": 1.8058305714466658, "grad_norm": 0.3166988790035248, "learning_rate": 2.8435749823313985e-07, "loss": 0.0064, "step": 213860 }, { "epoch": 1.8059150112938296, "grad_norm": 0.31968989968299866, "learning_rate": 2.8411258899748494e-07, "loss": 0.008, "step": 213870 }, { "epoch": 1.8059994511409934, "grad_norm": 0.2146228849887848, "learning_rate": 2.8386778218993715e-07, "loss": 0.0065, "step": 213880 }, { "epoch": 1.8060838909881574, "grad_norm": 0.3151596784591675, "learning_rate": 2.8362307781581553e-07, "loss": 0.0045, "step": 213890 }, { "epoch": 1.8061683308353211, "grad_norm": 0.08328565955162048, "learning_rate": 2.8337847588043364e-07, "loss": 0.0086, "step": 213900 }, { "epoch": 1.8062527706824851, "grad_norm": 0.46887779235839844, "learning_rate": 2.83133976389105e-07, "loss": 0.0088, "step": 213910 }, { "epoch": 1.806337210529649, "grad_norm": 0.2325763702392578, "learning_rate": 2.8288957934714033e-07, "loss": 0.0041, "step": 213920 }, { "epoch": 1.8064216503768127, "grad_norm": 0.17538602650165558, "learning_rate": 2.826452847598471e-07, "loss": 0.0031, "step": 213930 }, { "epoch": 1.8065060902239767, "grad_norm": 0.2769213914871216, "learning_rate": 2.82401092632531e-07, "loss": 0.005, "step": 213940 }, { "epoch": 1.8065905300711407, "grad_norm": 0.1735750138759613, "learning_rate": 2.8215700297049674e-07, "loss": 0.0035, "step": 213950 }, { "epoch": 1.8066749699183045, "grad_norm": 1.2624355554580688, "learning_rate": 2.8191301577904563e-07, "loss": 0.0078, "step": 213960 }, { "epoch": 1.8067594097654682, "grad_norm": 0.14850060641765594, "learning_rate": 2.8166913106347726e-07, "loss": 0.0046, "step": 213970 }, { "epoch": 1.8068438496126322, "grad_norm": 0.38418444991111755, "learning_rate": 2.81425348829088e-07, "loss": 0.0052, "step": 213980 }, { "epoch": 1.8069282894597962, "grad_norm": 0.41548648476600647, "learning_rate": 2.8118166908117307e-07, "loss": 0.0064, "step": 213990 }, { "epoch": 1.80701272930696, "grad_norm": 0.08285452425479889, "learning_rate": 2.8093809182502596e-07, "loss": 0.0062, "step": 214000 }, { "epoch": 1.8070971691541238, "grad_norm": 0.1598028540611267, "learning_rate": 2.8069461706593694e-07, "loss": 0.0087, "step": 214010 }, { "epoch": 1.8071816090012875, "grad_norm": 0.0012035981053486466, "learning_rate": 2.804512448091934e-07, "loss": 0.0043, "step": 214020 }, { "epoch": 1.8072660488484515, "grad_norm": 0.013992189429700375, "learning_rate": 2.802079750600806e-07, "loss": 0.0025, "step": 214030 }, { "epoch": 1.8073504886956155, "grad_norm": 0.203630730509758, "learning_rate": 2.799648078238842e-07, "loss": 0.0054, "step": 214040 }, { "epoch": 1.8074349285427793, "grad_norm": 0.4203644394874573, "learning_rate": 2.797217431058846e-07, "loss": 0.0075, "step": 214050 }, { "epoch": 1.807519368389943, "grad_norm": 0.6929190754890442, "learning_rate": 2.7947878091136196e-07, "loss": 0.0028, "step": 214060 }, { "epoch": 1.807603808237107, "grad_norm": 0.06485608965158463, "learning_rate": 2.792359212455925e-07, "loss": 0.0073, "step": 214070 }, { "epoch": 1.807688248084271, "grad_norm": 0.4661257863044739, "learning_rate": 2.789931641138527e-07, "loss": 0.0071, "step": 214080 }, { "epoch": 1.8077726879314349, "grad_norm": 0.2727418839931488, "learning_rate": 2.787505095214127e-07, "loss": 0.0063, "step": 214090 }, { "epoch": 1.8078571277785986, "grad_norm": 0.12617330253124237, "learning_rate": 2.7850795747354385e-07, "loss": 0.0088, "step": 214100 }, { "epoch": 1.8079415676257626, "grad_norm": 0.38429614901542664, "learning_rate": 2.782655079755159e-07, "loss": 0.0091, "step": 214110 }, { "epoch": 1.8080260074729266, "grad_norm": 0.21992827951908112, "learning_rate": 2.780231610325923e-07, "loss": 0.0106, "step": 214120 }, { "epoch": 1.8081104473200904, "grad_norm": 0.49989891052246094, "learning_rate": 2.7778091665003837e-07, "loss": 0.0074, "step": 214130 }, { "epoch": 1.8081948871672542, "grad_norm": 0.16143374145030975, "learning_rate": 2.7753877483311485e-07, "loss": 0.0051, "step": 214140 }, { "epoch": 1.808279327014418, "grad_norm": 0.3071984350681305, "learning_rate": 2.772967355870826e-07, "loss": 0.0054, "step": 214150 }, { "epoch": 1.808363766861582, "grad_norm": 0.26194626092910767, "learning_rate": 2.7705479891719676e-07, "loss": 0.0093, "step": 214160 }, { "epoch": 1.808448206708746, "grad_norm": 0.06831101328134537, "learning_rate": 2.768129648287132e-07, "loss": 0.007, "step": 214170 }, { "epoch": 1.8085326465559097, "grad_norm": 0.11798607558012009, "learning_rate": 2.7657123332688327e-07, "loss": 0.0085, "step": 214180 }, { "epoch": 1.8086170864030735, "grad_norm": 0.0066286190412938595, "learning_rate": 2.7632960441695947e-07, "loss": 0.0048, "step": 214190 }, { "epoch": 1.8087015262502375, "grad_norm": 0.14259734749794006, "learning_rate": 2.760880781041875e-07, "loss": 0.008, "step": 214200 }, { "epoch": 1.8087859660974015, "grad_norm": 0.5294741988182068, "learning_rate": 2.7584665439381543e-07, "loss": 0.0095, "step": 214210 }, { "epoch": 1.8088704059445653, "grad_norm": 0.12766386568546295, "learning_rate": 2.7560533329108576e-07, "loss": 0.0045, "step": 214220 }, { "epoch": 1.808954845791729, "grad_norm": 0.34878790378570557, "learning_rate": 2.7536411480123927e-07, "loss": 0.0059, "step": 214230 }, { "epoch": 1.809039285638893, "grad_norm": 0.2247755378484726, "learning_rate": 2.7512299892951676e-07, "loss": 0.0077, "step": 214240 }, { "epoch": 1.8091237254860568, "grad_norm": 0.17731857299804688, "learning_rate": 2.748819856811552e-07, "loss": 0.0064, "step": 214250 }, { "epoch": 1.8092081653332208, "grad_norm": 0.30485209822654724, "learning_rate": 2.7464107506138814e-07, "loss": 0.0062, "step": 214260 }, { "epoch": 1.8092926051803846, "grad_norm": 0.20340435206890106, "learning_rate": 2.7440026707544754e-07, "loss": 0.0039, "step": 214270 }, { "epoch": 1.8093770450275484, "grad_norm": 0.3226586878299713, "learning_rate": 2.741595617285658e-07, "loss": 0.0068, "step": 214280 }, { "epoch": 1.8094614848747124, "grad_norm": 0.21844686567783356, "learning_rate": 2.7391895902596944e-07, "loss": 0.003, "step": 214290 }, { "epoch": 1.8095459247218764, "grad_norm": 0.0011639774311333895, "learning_rate": 2.736784589728853e-07, "loss": 0.0027, "step": 214300 }, { "epoch": 1.8096303645690401, "grad_norm": 0.25193989276885986, "learning_rate": 2.734380615745358e-07, "loss": 0.009, "step": 214310 }, { "epoch": 1.809714804416204, "grad_norm": 0.06846312433481216, "learning_rate": 2.7319776683614405e-07, "loss": 0.0053, "step": 214320 }, { "epoch": 1.809799244263368, "grad_norm": 0.7655022740364075, "learning_rate": 2.729575747629276e-07, "loss": 0.0053, "step": 214330 }, { "epoch": 1.809883684110532, "grad_norm": 0.46679288148880005, "learning_rate": 2.7271748536010447e-07, "loss": 0.0026, "step": 214340 }, { "epoch": 1.8099681239576957, "grad_norm": 0.08580022305250168, "learning_rate": 2.7247749863288877e-07, "loss": 0.0062, "step": 214350 }, { "epoch": 1.8100525638048595, "grad_norm": 0.41559073328971863, "learning_rate": 2.722376145864924e-07, "loss": 0.0051, "step": 214360 }, { "epoch": 1.8101370036520232, "grad_norm": 0.34361040592193604, "learning_rate": 2.719978332261264e-07, "loss": 0.0073, "step": 214370 }, { "epoch": 1.8102214434991872, "grad_norm": 0.46886518597602844, "learning_rate": 2.717581545569986e-07, "loss": 0.0034, "step": 214380 }, { "epoch": 1.8103058833463512, "grad_norm": 0.3675263524055481, "learning_rate": 2.7151857858431495e-07, "loss": 0.0065, "step": 214390 }, { "epoch": 1.810390323193515, "grad_norm": 0.4553365111351013, "learning_rate": 2.7127910531327907e-07, "loss": 0.0028, "step": 214400 }, { "epoch": 1.8104747630406788, "grad_norm": 0.49543410539627075, "learning_rate": 2.710397347490923e-07, "loss": 0.0079, "step": 214410 }, { "epoch": 1.8105592028878428, "grad_norm": 0.006228240672498941, "learning_rate": 2.7080046689695216e-07, "loss": 0.0085, "step": 214420 }, { "epoch": 1.8106436427350068, "grad_norm": 0.4002004861831665, "learning_rate": 2.705613017620579e-07, "loss": 0.0067, "step": 214430 }, { "epoch": 1.8107280825821706, "grad_norm": 0.14196045696735382, "learning_rate": 2.703222393496019e-07, "loss": 0.0061, "step": 214440 }, { "epoch": 1.8108125224293343, "grad_norm": 0.17204751074314117, "learning_rate": 2.7008327966477896e-07, "loss": 0.0027, "step": 214450 }, { "epoch": 1.8108969622764983, "grad_norm": 0.18506884574890137, "learning_rate": 2.698444227127772e-07, "loss": 0.0032, "step": 214460 }, { "epoch": 1.8109814021236623, "grad_norm": 0.21204277873039246, "learning_rate": 2.696056684987852e-07, "loss": 0.0028, "step": 214470 }, { "epoch": 1.811065841970826, "grad_norm": 0.4296559989452362, "learning_rate": 2.6936701702798937e-07, "loss": 0.0063, "step": 214480 }, { "epoch": 1.8111502818179899, "grad_norm": 0.2171727418899536, "learning_rate": 2.6912846830557225e-07, "loss": 0.0031, "step": 214490 }, { "epoch": 1.8112347216651536, "grad_norm": 0.5063209533691406, "learning_rate": 2.6889002233671523e-07, "loss": 0.0097, "step": 214500 }, { "epoch": 1.8113191615123176, "grad_norm": 0.10709962993860245, "learning_rate": 2.68651679126597e-07, "loss": 0.0028, "step": 214510 }, { "epoch": 1.8114036013594816, "grad_norm": 0.2094026505947113, "learning_rate": 2.684134386803955e-07, "loss": 0.0053, "step": 214520 }, { "epoch": 1.8114880412066454, "grad_norm": 0.11774314939975739, "learning_rate": 2.681753010032839e-07, "loss": 0.0109, "step": 214530 }, { "epoch": 1.8115724810538092, "grad_norm": 0.08455520868301392, "learning_rate": 2.679372661004354e-07, "loss": 0.0065, "step": 214540 }, { "epoch": 1.8116569209009732, "grad_norm": 0.11850810796022415, "learning_rate": 2.6769933397701953e-07, "loss": 0.0045, "step": 214550 }, { "epoch": 1.8117413607481372, "grad_norm": 0.23012931644916534, "learning_rate": 2.6746150463820453e-07, "loss": 0.0063, "step": 214560 }, { "epoch": 1.811825800595301, "grad_norm": 0.14817732572555542, "learning_rate": 2.672237780891562e-07, "loss": 0.0033, "step": 214570 }, { "epoch": 1.8119102404424647, "grad_norm": 0.530421257019043, "learning_rate": 2.669861543350377e-07, "loss": 0.0109, "step": 214580 }, { "epoch": 1.8119946802896285, "grad_norm": 0.30374062061309814, "learning_rate": 2.6674863338100987e-07, "loss": 0.0048, "step": 214590 }, { "epoch": 1.8120791201367925, "grad_norm": 0.3548085689544678, "learning_rate": 2.665112152322313e-07, "loss": 0.0078, "step": 214600 }, { "epoch": 1.8121635599839565, "grad_norm": 0.30757057666778564, "learning_rate": 2.6627389989385965e-07, "loss": 0.0073, "step": 214610 }, { "epoch": 1.8122479998311203, "grad_norm": 0.023856092244386673, "learning_rate": 2.6603668737104794e-07, "loss": 0.008, "step": 214620 }, { "epoch": 1.812332439678284, "grad_norm": 0.2570312023162842, "learning_rate": 2.6579957766895037e-07, "loss": 0.0073, "step": 214630 }, { "epoch": 1.812416879525448, "grad_norm": 0.11508180946111679, "learning_rate": 2.6556257079271506e-07, "loss": 0.0038, "step": 214640 }, { "epoch": 1.812501319372612, "grad_norm": 0.0022919396869838238, "learning_rate": 2.653256667474913e-07, "loss": 0.003, "step": 214650 }, { "epoch": 1.8125857592197758, "grad_norm": 0.058995552361011505, "learning_rate": 2.650888655384237e-07, "loss": 0.0082, "step": 214660 }, { "epoch": 1.8126701990669396, "grad_norm": 0.08371222764253616, "learning_rate": 2.6485216717065556e-07, "loss": 0.0042, "step": 214670 }, { "epoch": 1.8127546389141036, "grad_norm": 0.163487046957016, "learning_rate": 2.646155716493282e-07, "loss": 0.0075, "step": 214680 }, { "epoch": 1.8128390787612676, "grad_norm": 0.24777522683143616, "learning_rate": 2.643790789795797e-07, "loss": 0.0039, "step": 214690 }, { "epoch": 1.8129235186084314, "grad_norm": 0.15537531673908234, "learning_rate": 2.6414268916654774e-07, "loss": 0.0071, "step": 214700 }, { "epoch": 1.8130079584555951, "grad_norm": 0.5410708785057068, "learning_rate": 2.6390640221536534e-07, "loss": 0.0056, "step": 214710 }, { "epoch": 1.813092398302759, "grad_norm": 0.21235914528369904, "learning_rate": 2.636702181311662e-07, "loss": 0.0043, "step": 214720 }, { "epoch": 1.813176838149923, "grad_norm": 0.2052658200263977, "learning_rate": 2.63434136919079e-07, "loss": 0.005, "step": 214730 }, { "epoch": 1.813261277997087, "grad_norm": 0.24449200928211212, "learning_rate": 2.6319815858423194e-07, "loss": 0.0028, "step": 214740 }, { "epoch": 1.8133457178442507, "grad_norm": 0.24280117452144623, "learning_rate": 2.6296228313174967e-07, "loss": 0.0025, "step": 214750 }, { "epoch": 1.8134301576914145, "grad_norm": 0.09957925230264664, "learning_rate": 2.627265105667565e-07, "loss": 0.0053, "step": 214760 }, { "epoch": 1.8135145975385785, "grad_norm": 0.11259281635284424, "learning_rate": 2.624908408943722e-07, "loss": 0.0024, "step": 214770 }, { "epoch": 1.8135990373857425, "grad_norm": 0.409352570772171, "learning_rate": 2.6225527411971654e-07, "loss": 0.0178, "step": 214780 }, { "epoch": 1.8136834772329062, "grad_norm": 0.12444573640823364, "learning_rate": 2.620198102479049e-07, "loss": 0.0054, "step": 214790 }, { "epoch": 1.81376791708007, "grad_norm": 0.06458315253257751, "learning_rate": 2.6178444928405266e-07, "loss": 0.0043, "step": 214800 }, { "epoch": 1.813852356927234, "grad_norm": 0.23803304135799408, "learning_rate": 2.6154919123327127e-07, "loss": 0.0064, "step": 214810 }, { "epoch": 1.8139367967743978, "grad_norm": 0.17520365118980408, "learning_rate": 2.6131403610067053e-07, "loss": 0.004, "step": 214820 }, { "epoch": 1.8140212366215618, "grad_norm": 0.41331472992897034, "learning_rate": 2.6107898389135746e-07, "loss": 0.0058, "step": 214830 }, { "epoch": 1.8141056764687256, "grad_norm": 0.29036688804626465, "learning_rate": 2.608440346104374e-07, "loss": 0.008, "step": 214840 }, { "epoch": 1.8141901163158893, "grad_norm": 0.0022066528908908367, "learning_rate": 2.606091882630146e-07, "loss": 0.0047, "step": 214850 }, { "epoch": 1.8142745561630533, "grad_norm": 0.22411759197711945, "learning_rate": 2.6037444485418786e-07, "loss": 0.0106, "step": 214860 }, { "epoch": 1.8143589960102173, "grad_norm": 0.056315649300813675, "learning_rate": 2.6013980438905793e-07, "loss": 0.0046, "step": 214870 }, { "epoch": 1.814443435857381, "grad_norm": 0.22920437157154083, "learning_rate": 2.599052668727198e-07, "loss": 0.0068, "step": 214880 }, { "epoch": 1.8145278757045449, "grad_norm": 0.09263797849416733, "learning_rate": 2.596708323102681e-07, "loss": 0.0154, "step": 214890 }, { "epoch": 1.8146123155517089, "grad_norm": 0.2562173902988434, "learning_rate": 2.5943650070679506e-07, "loss": 0.006, "step": 214900 }, { "epoch": 1.8146967553988729, "grad_norm": 0.2128634750843048, "learning_rate": 2.5920227206738923e-07, "loss": 0.0065, "step": 214910 }, { "epoch": 1.8147811952460366, "grad_norm": 0.3733108639717102, "learning_rate": 2.5896814639713886e-07, "loss": 0.0063, "step": 214920 }, { "epoch": 1.8148656350932004, "grad_norm": 0.1840662956237793, "learning_rate": 2.5873412370112815e-07, "loss": 0.0048, "step": 214930 }, { "epoch": 1.8149500749403642, "grad_norm": 0.3050292730331421, "learning_rate": 2.585002039844414e-07, "loss": 0.008, "step": 214940 }, { "epoch": 1.8150345147875282, "grad_norm": 0.22612452507019043, "learning_rate": 2.582663872521579e-07, "loss": 0.0067, "step": 214950 }, { "epoch": 1.8151189546346922, "grad_norm": 0.25861719250679016, "learning_rate": 2.5803267350935744e-07, "loss": 0.0055, "step": 214960 }, { "epoch": 1.815203394481856, "grad_norm": 0.26562607288360596, "learning_rate": 2.577990627611149e-07, "loss": 0.007, "step": 214970 }, { "epoch": 1.8152878343290197, "grad_norm": 0.2662900984287262, "learning_rate": 2.5756555501250614e-07, "loss": 0.0051, "step": 214980 }, { "epoch": 1.8153722741761837, "grad_norm": 0.2351180911064148, "learning_rate": 2.5733215026860045e-07, "loss": 0.007, "step": 214990 }, { "epoch": 1.8154567140233477, "grad_norm": 0.15985026955604553, "learning_rate": 2.570988485344694e-07, "loss": 0.0065, "step": 215000 }, { "epoch": 1.8155411538705115, "grad_norm": 0.677919328212738, "learning_rate": 2.5686564981517836e-07, "loss": 0.009, "step": 215010 }, { "epoch": 1.8156255937176753, "grad_norm": 0.8636208772659302, "learning_rate": 2.566325541157949e-07, "loss": 0.0037, "step": 215020 }, { "epoch": 1.8157100335648393, "grad_norm": 0.13530723750591278, "learning_rate": 2.563995614413789e-07, "loss": 0.0064, "step": 215030 }, { "epoch": 1.8157944734120033, "grad_norm": 0.5598891377449036, "learning_rate": 2.561666717969935e-07, "loss": 0.0049, "step": 215040 }, { "epoch": 1.815878913259167, "grad_norm": 0.6348675489425659, "learning_rate": 2.559338851876958e-07, "loss": 0.0095, "step": 215050 }, { "epoch": 1.8159633531063308, "grad_norm": 0.17983458936214447, "learning_rate": 2.5570120161854174e-07, "loss": 0.0067, "step": 215060 }, { "epoch": 1.8160477929534946, "grad_norm": 0.12128201127052307, "learning_rate": 2.554686210945856e-07, "loss": 0.0071, "step": 215070 }, { "epoch": 1.8161322328006586, "grad_norm": 0.2583666145801544, "learning_rate": 2.5523614362087837e-07, "loss": 0.0022, "step": 215080 }, { "epoch": 1.8162166726478226, "grad_norm": 0.9314660429954529, "learning_rate": 2.5500376920246984e-07, "loss": 0.006, "step": 215090 }, { "epoch": 1.8163011124949864, "grad_norm": 0.09743916988372803, "learning_rate": 2.547714978444071e-07, "loss": 0.0058, "step": 215100 }, { "epoch": 1.8163855523421502, "grad_norm": 0.10221424698829651, "learning_rate": 2.545393295517351e-07, "loss": 0.0054, "step": 215110 }, { "epoch": 1.8164699921893142, "grad_norm": 0.3125886023044586, "learning_rate": 2.543072643294958e-07, "loss": 0.0061, "step": 215120 }, { "epoch": 1.8165544320364782, "grad_norm": 0.42293643951416016, "learning_rate": 2.5407530218273133e-07, "loss": 0.0045, "step": 215130 }, { "epoch": 1.816638871883642, "grad_norm": 0.2901255190372467, "learning_rate": 2.538434431164788e-07, "loss": 0.0061, "step": 215140 }, { "epoch": 1.8167233117308057, "grad_norm": 0.26619377732276917, "learning_rate": 2.5361168713577354e-07, "loss": 0.0053, "step": 215150 }, { "epoch": 1.8168077515779697, "grad_norm": 1.0318607091903687, "learning_rate": 2.533800342456505e-07, "loss": 0.0062, "step": 215160 }, { "epoch": 1.8168921914251335, "grad_norm": 0.23366934061050415, "learning_rate": 2.531484844511389e-07, "loss": 0.0055, "step": 215170 }, { "epoch": 1.8169766312722975, "grad_norm": 0.02593681588768959, "learning_rate": 2.529170377572709e-07, "loss": 0.0045, "step": 215180 }, { "epoch": 1.8170610711194612, "grad_norm": 0.4861818850040436, "learning_rate": 2.526856941690714e-07, "loss": 0.0052, "step": 215190 }, { "epoch": 1.817145510966625, "grad_norm": 0.4874721169471741, "learning_rate": 2.5245445369156573e-07, "loss": 0.0065, "step": 215200 }, { "epoch": 1.817229950813789, "grad_norm": 0.29250115156173706, "learning_rate": 2.522233163297766e-07, "loss": 0.0079, "step": 215210 }, { "epoch": 1.817314390660953, "grad_norm": 0.27543219923973083, "learning_rate": 2.519922820887244e-07, "loss": 0.0083, "step": 215220 }, { "epoch": 1.8173988305081168, "grad_norm": 0.062159810215234756, "learning_rate": 2.517613509734268e-07, "loss": 0.0064, "step": 215230 }, { "epoch": 1.8174832703552806, "grad_norm": 0.25053972005844116, "learning_rate": 2.515305229888998e-07, "loss": 0.0091, "step": 215240 }, { "epoch": 1.8175677102024446, "grad_norm": 0.19132579863071442, "learning_rate": 2.5129979814015657e-07, "loss": 0.0039, "step": 215250 }, { "epoch": 1.8176521500496086, "grad_norm": 0.17447394132614136, "learning_rate": 2.5106917643220874e-07, "loss": 0.0067, "step": 215260 }, { "epoch": 1.8177365898967723, "grad_norm": 0.1104564443230629, "learning_rate": 2.5083865787006556e-07, "loss": 0.0064, "step": 215270 }, { "epoch": 1.8178210297439361, "grad_norm": 0.2720710039138794, "learning_rate": 2.506082424587325e-07, "loss": 0.0026, "step": 215280 }, { "epoch": 1.8179054695910999, "grad_norm": 0.11429467797279358, "learning_rate": 2.503779302032161e-07, "loss": 0.003, "step": 215290 }, { "epoch": 1.8179899094382639, "grad_norm": 0.41854342818260193, "learning_rate": 2.5014772110851735e-07, "loss": 0.004, "step": 215300 }, { "epoch": 1.8180743492854279, "grad_norm": 0.7049176692962646, "learning_rate": 2.499176151796373e-07, "loss": 0.0062, "step": 215310 }, { "epoch": 1.8181587891325917, "grad_norm": 0.1449645757675171, "learning_rate": 2.4968761242157245e-07, "loss": 0.0082, "step": 215320 }, { "epoch": 1.8182432289797554, "grad_norm": 0.2866540551185608, "learning_rate": 2.4945771283931994e-07, "loss": 0.0034, "step": 215330 }, { "epoch": 1.8183276688269194, "grad_norm": 0.09335682541131973, "learning_rate": 2.492279164378714e-07, "loss": 0.0042, "step": 215340 }, { "epoch": 1.8184121086740834, "grad_norm": 0.2562088668346405, "learning_rate": 2.4899822322221987e-07, "loss": 0.0055, "step": 215350 }, { "epoch": 1.8184965485212472, "grad_norm": 0.46205100417137146, "learning_rate": 2.487686331973527e-07, "loss": 0.0071, "step": 215360 }, { "epoch": 1.818580988368411, "grad_norm": 0.27370116114616394, "learning_rate": 2.4853914636825795e-07, "loss": 0.0066, "step": 215370 }, { "epoch": 1.818665428215575, "grad_norm": 0.4136051535606384, "learning_rate": 2.4830976273991945e-07, "loss": 0.0066, "step": 215380 }, { "epoch": 1.818749868062739, "grad_norm": 0.20487365126609802, "learning_rate": 2.4808048231731885e-07, "loss": 0.0037, "step": 215390 }, { "epoch": 1.8188343079099027, "grad_norm": 0.30666863918304443, "learning_rate": 2.4785130510543655e-07, "loss": 0.0032, "step": 215400 }, { "epoch": 1.8189187477570665, "grad_norm": 0.07698522508144379, "learning_rate": 2.476222311092491e-07, "loss": 0.0082, "step": 215410 }, { "epoch": 1.8190031876042303, "grad_norm": 0.3622957468032837, "learning_rate": 2.473932603337342e-07, "loss": 0.0039, "step": 215420 }, { "epoch": 1.8190876274513943, "grad_norm": 0.3846255838871002, "learning_rate": 2.4716439278386297e-07, "loss": 0.006, "step": 215430 }, { "epoch": 1.8191720672985583, "grad_norm": 0.2871650755405426, "learning_rate": 2.469356284646074e-07, "loss": 0.0058, "step": 215440 }, { "epoch": 1.819256507145722, "grad_norm": 0.28279849886894226, "learning_rate": 2.467069673809358e-07, "loss": 0.0049, "step": 215450 }, { "epoch": 1.8193409469928858, "grad_norm": 0.41121265292167664, "learning_rate": 2.4647840953781533e-07, "loss": 0.0084, "step": 215460 }, { "epoch": 1.8194253868400498, "grad_norm": 0.06990314275026321, "learning_rate": 2.4624995494020976e-07, "loss": 0.0038, "step": 215470 }, { "epoch": 1.8195098266872138, "grad_norm": 0.11390586197376251, "learning_rate": 2.460216035930807e-07, "loss": 0.0056, "step": 215480 }, { "epoch": 1.8195942665343776, "grad_norm": 0.2012050449848175, "learning_rate": 2.4579335550138805e-07, "loss": 0.0089, "step": 215490 }, { "epoch": 1.8196787063815414, "grad_norm": 0.36248770356178284, "learning_rate": 2.455652106700901e-07, "loss": 0.0038, "step": 215500 }, { "epoch": 1.8197631462287052, "grad_norm": 0.41346073150634766, "learning_rate": 2.4533716910414117e-07, "loss": 0.0105, "step": 215510 }, { "epoch": 1.8198475860758692, "grad_norm": 0.5567861795425415, "learning_rate": 2.45109230808494e-07, "loss": 0.0055, "step": 215520 }, { "epoch": 1.8199320259230332, "grad_norm": 0.15762120485305786, "learning_rate": 2.4488139578810077e-07, "loss": 0.0076, "step": 215530 }, { "epoch": 1.820016465770197, "grad_norm": 0.5626285672187805, "learning_rate": 2.4465366404790857e-07, "loss": 0.0063, "step": 215540 }, { "epoch": 1.8201009056173607, "grad_norm": 0.4514397084712982, "learning_rate": 2.444260355928646e-07, "loss": 0.0064, "step": 215550 }, { "epoch": 1.8201853454645247, "grad_norm": 0.4980945885181427, "learning_rate": 2.441985104279132e-07, "loss": 0.004, "step": 215560 }, { "epoch": 1.8202697853116887, "grad_norm": 0.09538538753986359, "learning_rate": 2.4397108855799546e-07, "loss": 0.0055, "step": 215570 }, { "epoch": 1.8203542251588525, "grad_norm": 0.5105413794517517, "learning_rate": 2.4374376998805017e-07, "loss": 0.0091, "step": 215580 }, { "epoch": 1.8204386650060163, "grad_norm": 0.15017281472682953, "learning_rate": 2.4351655472301615e-07, "loss": 0.007, "step": 215590 }, { "epoch": 1.8205231048531803, "grad_norm": 0.44033268094062805, "learning_rate": 2.432894427678273e-07, "loss": 0.0099, "step": 215600 }, { "epoch": 1.8206075447003442, "grad_norm": 0.05941270291805267, "learning_rate": 2.4306243412741794e-07, "loss": 0.0063, "step": 215610 }, { "epoch": 1.820691984547508, "grad_norm": 1.016900897026062, "learning_rate": 2.4283552880671746e-07, "loss": 0.0085, "step": 215620 }, { "epoch": 1.8207764243946718, "grad_norm": 0.14748315513134003, "learning_rate": 2.4260872681065475e-07, "loss": 0.0039, "step": 215630 }, { "epoch": 1.8208608642418356, "grad_norm": 0.34986934065818787, "learning_rate": 2.4238202814415524e-07, "loss": 0.0078, "step": 215640 }, { "epoch": 1.8209453040889996, "grad_norm": 0.28232288360595703, "learning_rate": 2.421554328121428e-07, "loss": 0.0043, "step": 215650 }, { "epoch": 1.8210297439361636, "grad_norm": 0.02090381272137165, "learning_rate": 2.4192894081953965e-07, "loss": 0.0126, "step": 215660 }, { "epoch": 1.8211141837833273, "grad_norm": 0.5832570791244507, "learning_rate": 2.417025521712646e-07, "loss": 0.0095, "step": 215670 }, { "epoch": 1.8211986236304911, "grad_norm": 0.05983848124742508, "learning_rate": 2.414762668722359e-07, "loss": 0.0053, "step": 215680 }, { "epoch": 1.8212830634776551, "grad_norm": 0.29501837491989136, "learning_rate": 2.4125008492736635e-07, "loss": 0.0038, "step": 215690 }, { "epoch": 1.8213675033248191, "grad_norm": 0.2955259382724762, "learning_rate": 2.4102400634157085e-07, "loss": 0.0048, "step": 215700 }, { "epoch": 1.821451943171983, "grad_norm": 0.28496935963630676, "learning_rate": 2.407980311197583e-07, "loss": 0.0081, "step": 215710 }, { "epoch": 1.8215363830191467, "grad_norm": 0.22310790419578552, "learning_rate": 2.4057215926683755e-07, "loss": 0.0039, "step": 215720 }, { "epoch": 1.8216208228663107, "grad_norm": 0.008935605175793171, "learning_rate": 2.4034639078771347e-07, "loss": 0.0048, "step": 215730 }, { "epoch": 1.8217052627134744, "grad_norm": 0.018583523109555244, "learning_rate": 2.4012072568729115e-07, "loss": 0.0146, "step": 215740 }, { "epoch": 1.8217897025606384, "grad_norm": 0.5951226353645325, "learning_rate": 2.3989516397047164e-07, "loss": 0.0102, "step": 215750 }, { "epoch": 1.8218741424078022, "grad_norm": 0.018719036132097244, "learning_rate": 2.3966970564215263e-07, "loss": 0.0048, "step": 215760 }, { "epoch": 1.821958582254966, "grad_norm": 0.21243524551391602, "learning_rate": 2.3944435070723305e-07, "loss": 0.0055, "step": 215770 }, { "epoch": 1.82204302210213, "grad_norm": 0.3231281042098999, "learning_rate": 2.3921909917060557e-07, "loss": 0.0081, "step": 215780 }, { "epoch": 1.822127461949294, "grad_norm": 0.011953898705542088, "learning_rate": 2.389939510371647e-07, "loss": 0.0065, "step": 215790 }, { "epoch": 1.8222119017964578, "grad_norm": 0.014871710911393166, "learning_rate": 2.387689063117993e-07, "loss": 0.0044, "step": 215800 }, { "epoch": 1.8222963416436215, "grad_norm": 0.40863049030303955, "learning_rate": 2.385439649993976e-07, "loss": 0.0051, "step": 215810 }, { "epoch": 1.8223807814907855, "grad_norm": 0.011979018338024616, "learning_rate": 2.3831912710484462e-07, "loss": 0.0059, "step": 215820 }, { "epoch": 1.8224652213379495, "grad_norm": 0.16460424661636353, "learning_rate": 2.3809439263302537e-07, "loss": 0.0047, "step": 215830 }, { "epoch": 1.8225496611851133, "grad_norm": 0.3701290786266327, "learning_rate": 2.3786976158881925e-07, "loss": 0.0035, "step": 215840 }, { "epoch": 1.822634101032277, "grad_norm": 0.08526498824357986, "learning_rate": 2.3764523397710683e-07, "loss": 0.0055, "step": 215850 }, { "epoch": 1.8227185408794409, "grad_norm": 0.5414058566093445, "learning_rate": 2.3742080980276416e-07, "loss": 0.0069, "step": 215860 }, { "epoch": 1.8228029807266048, "grad_norm": 0.2643854022026062, "learning_rate": 2.3719648907066463e-07, "loss": 0.0099, "step": 215870 }, { "epoch": 1.8228874205737688, "grad_norm": 0.003977607004344463, "learning_rate": 2.369722717856826e-07, "loss": 0.005, "step": 215880 }, { "epoch": 1.8229718604209326, "grad_norm": 0.1486959308385849, "learning_rate": 2.3674815795268592e-07, "loss": 0.0065, "step": 215890 }, { "epoch": 1.8230563002680964, "grad_norm": 0.23512466251850128, "learning_rate": 2.3652414757654342e-07, "loss": 0.0044, "step": 215900 }, { "epoch": 1.8231407401152604, "grad_norm": 0.46464845538139343, "learning_rate": 2.3630024066211953e-07, "loss": 0.0054, "step": 215910 }, { "epoch": 1.8232251799624244, "grad_norm": 0.13823330402374268, "learning_rate": 2.360764372142793e-07, "loss": 0.0085, "step": 215920 }, { "epoch": 1.8233096198095882, "grad_norm": 0.19214977324008942, "learning_rate": 2.358527372378816e-07, "loss": 0.0055, "step": 215930 }, { "epoch": 1.823394059656752, "grad_norm": 0.14631471037864685, "learning_rate": 2.3562914073778754e-07, "loss": 0.0077, "step": 215940 }, { "epoch": 1.823478499503916, "grad_norm": 0.46766844391822815, "learning_rate": 2.3540564771885156e-07, "loss": 0.0071, "step": 215950 }, { "epoch": 1.82356293935108, "grad_norm": 0.10339029878377914, "learning_rate": 2.3518225818592866e-07, "loss": 0.0055, "step": 215960 }, { "epoch": 1.8236473791982437, "grad_norm": 0.4713723361492157, "learning_rate": 2.3495897214386999e-07, "loss": 0.0107, "step": 215970 }, { "epoch": 1.8237318190454075, "grad_norm": 0.022017348557710648, "learning_rate": 2.3473578959752663e-07, "loss": 0.0052, "step": 215980 }, { "epoch": 1.8238162588925713, "grad_norm": 0.06615816801786423, "learning_rate": 2.3451271055174584e-07, "loss": 0.0076, "step": 215990 }, { "epoch": 1.8239006987397353, "grad_norm": 0.12453285604715347, "learning_rate": 2.3428973501137153e-07, "loss": 0.01, "step": 216000 }, { "epoch": 1.8239851385868993, "grad_norm": 0.3902183771133423, "learning_rate": 2.3406686298124815e-07, "loss": 0.0043, "step": 216010 }, { "epoch": 1.824069578434063, "grad_norm": 0.3808892071247101, "learning_rate": 2.3384409446621515e-07, "loss": 0.0065, "step": 216020 }, { "epoch": 1.8241540182812268, "grad_norm": 0.7063421010971069, "learning_rate": 2.3362142947111254e-07, "loss": 0.0046, "step": 216030 }, { "epoch": 1.8242384581283908, "grad_norm": 0.1297607421875, "learning_rate": 2.3339886800077594e-07, "loss": 0.0067, "step": 216040 }, { "epoch": 1.8243228979755548, "grad_norm": 0.11895956099033356, "learning_rate": 2.331764100600392e-07, "loss": 0.0047, "step": 216050 }, { "epoch": 1.8244073378227186, "grad_norm": 0.1664724498987198, "learning_rate": 2.3295405565373296e-07, "loss": 0.0082, "step": 216060 }, { "epoch": 1.8244917776698824, "grad_norm": 0.06657706201076508, "learning_rate": 2.3273180478668833e-07, "loss": 0.0044, "step": 216070 }, { "epoch": 1.8245762175170461, "grad_norm": 0.25952616333961487, "learning_rate": 2.32509657463732e-07, "loss": 0.0074, "step": 216080 }, { "epoch": 1.8246606573642101, "grad_norm": 0.4310114085674286, "learning_rate": 2.32287613689689e-07, "loss": 0.0086, "step": 216090 }, { "epoch": 1.8247450972113741, "grad_norm": 0.15751947462558746, "learning_rate": 2.3206567346938213e-07, "loss": 0.0044, "step": 216100 }, { "epoch": 1.824829537058538, "grad_norm": 0.18683850765228271, "learning_rate": 2.3184383680763144e-07, "loss": 0.006, "step": 216110 }, { "epoch": 1.8249139769057017, "grad_norm": 0.400436133146286, "learning_rate": 2.3162210370925588e-07, "loss": 0.0065, "step": 216120 }, { "epoch": 1.8249984167528657, "grad_norm": 0.2075410634279251, "learning_rate": 2.3140047417907152e-07, "loss": 0.0041, "step": 216130 }, { "epoch": 1.8250828566000297, "grad_norm": 0.027825957164168358, "learning_rate": 2.3117894822189124e-07, "loss": 0.0056, "step": 216140 }, { "epoch": 1.8251672964471934, "grad_norm": 0.19944189488887787, "learning_rate": 2.3095752584252673e-07, "loss": 0.0052, "step": 216150 }, { "epoch": 1.8252517362943572, "grad_norm": 0.09792959690093994, "learning_rate": 2.3073620704578747e-07, "loss": 0.0078, "step": 216160 }, { "epoch": 1.8253361761415212, "grad_norm": 0.2626090347766876, "learning_rate": 2.3051499183648018e-07, "loss": 0.0037, "step": 216170 }, { "epoch": 1.8254206159886852, "grad_norm": 0.06813574582338333, "learning_rate": 2.3029388021941102e-07, "loss": 0.0037, "step": 216180 }, { "epoch": 1.825505055835849, "grad_norm": 0.14651694893836975, "learning_rate": 2.3007287219938003e-07, "loss": 0.0037, "step": 216190 }, { "epoch": 1.8255894956830128, "grad_norm": 0.21275486052036285, "learning_rate": 2.2985196778119056e-07, "loss": 0.0034, "step": 216200 }, { "epoch": 1.8256739355301765, "grad_norm": 0.193231001496315, "learning_rate": 2.2963116696963717e-07, "loss": 0.005, "step": 216210 }, { "epoch": 1.8257583753773405, "grad_norm": 0.01349583175033331, "learning_rate": 2.294104697695182e-07, "loss": 0.002, "step": 216220 }, { "epoch": 1.8258428152245045, "grad_norm": 0.8495492339134216, "learning_rate": 2.2918987618562648e-07, "loss": 0.007, "step": 216230 }, { "epoch": 1.8259272550716683, "grad_norm": 0.22823648154735565, "learning_rate": 2.289693862227521e-07, "loss": 0.0043, "step": 216240 }, { "epoch": 1.826011694918832, "grad_norm": 0.4438416659832001, "learning_rate": 2.2874899988568566e-07, "loss": 0.0102, "step": 216250 }, { "epoch": 1.826096134765996, "grad_norm": 0.42093610763549805, "learning_rate": 2.2852871717921221e-07, "loss": 0.0037, "step": 216260 }, { "epoch": 1.82618057461316, "grad_norm": 0.10751163214445114, "learning_rate": 2.283085381081185e-07, "loss": 0.0063, "step": 216270 }, { "epoch": 1.8262650144603239, "grad_norm": 0.0660744309425354, "learning_rate": 2.2808846267718509e-07, "loss": 0.0048, "step": 216280 }, { "epoch": 1.8263494543074876, "grad_norm": 0.326629102230072, "learning_rate": 2.2786849089119268e-07, "loss": 0.0088, "step": 216290 }, { "epoch": 1.8264338941546516, "grad_norm": 0.472834974527359, "learning_rate": 2.2764862275491794e-07, "loss": 0.0081, "step": 216300 }, { "epoch": 1.8265183340018154, "grad_norm": 0.6490233540534973, "learning_rate": 2.2742885827313766e-07, "loss": 0.0041, "step": 216310 }, { "epoch": 1.8266027738489794, "grad_norm": 0.005812725517898798, "learning_rate": 2.2720919745062408e-07, "loss": 0.0061, "step": 216320 }, { "epoch": 1.8266872136961432, "grad_norm": 0.2497207224369049, "learning_rate": 2.2698964029214954e-07, "loss": 0.0089, "step": 216330 }, { "epoch": 1.826771653543307, "grad_norm": 0.2665403187274933, "learning_rate": 2.2677018680248187e-07, "loss": 0.0163, "step": 216340 }, { "epoch": 1.826856093390471, "grad_norm": 0.3645109534263611, "learning_rate": 2.265508369863867e-07, "loss": 0.0069, "step": 216350 }, { "epoch": 1.826940533237635, "grad_norm": 0.32987067103385925, "learning_rate": 2.263315908486302e-07, "loss": 0.0047, "step": 216360 }, { "epoch": 1.8270249730847987, "grad_norm": 0.3282029926776886, "learning_rate": 2.2611244839397307e-07, "loss": 0.0111, "step": 216370 }, { "epoch": 1.8271094129319625, "grad_norm": 0.21204645931720734, "learning_rate": 2.2589340962717533e-07, "loss": 0.004, "step": 216380 }, { "epoch": 1.8271938527791265, "grad_norm": 0.3004702925682068, "learning_rate": 2.2567447455299373e-07, "loss": 0.007, "step": 216390 }, { "epoch": 1.8272782926262905, "grad_norm": 0.6863823533058167, "learning_rate": 2.2545564317618506e-07, "loss": 0.0083, "step": 216400 }, { "epoch": 1.8273627324734543, "grad_norm": 0.17525771260261536, "learning_rate": 2.252369155015005e-07, "loss": 0.0052, "step": 216410 }, { "epoch": 1.827447172320618, "grad_norm": 0.5626764893531799, "learning_rate": 2.2501829153369291e-07, "loss": 0.0065, "step": 216420 }, { "epoch": 1.8275316121677818, "grad_norm": 0.5996630787849426, "learning_rate": 2.2479977127750853e-07, "loss": 0.0047, "step": 216430 }, { "epoch": 1.8276160520149458, "grad_norm": 0.07041897624731064, "learning_rate": 2.2458135473769516e-07, "loss": 0.0075, "step": 216440 }, { "epoch": 1.8277004918621098, "grad_norm": 0.1844949573278427, "learning_rate": 2.2436304191899573e-07, "loss": 0.0027, "step": 216450 }, { "epoch": 1.8277849317092736, "grad_norm": 0.486810564994812, "learning_rate": 2.2414483282615308e-07, "loss": 0.0061, "step": 216460 }, { "epoch": 1.8278693715564374, "grad_norm": 0.6031284928321838, "learning_rate": 2.2392672746390566e-07, "loss": 0.0052, "step": 216470 }, { "epoch": 1.8279538114036014, "grad_norm": 0.3793696165084839, "learning_rate": 2.2370872583699078e-07, "loss": 0.0044, "step": 216480 }, { "epoch": 1.8280382512507654, "grad_norm": 0.5728925466537476, "learning_rate": 2.234908279501441e-07, "loss": 0.0052, "step": 216490 }, { "epoch": 1.8281226910979291, "grad_norm": 0.22478781640529633, "learning_rate": 2.2327303380809683e-07, "loss": 0.0057, "step": 216500 }, { "epoch": 1.828207130945093, "grad_norm": 0.13650758564472198, "learning_rate": 2.2305534341558132e-07, "loss": 0.0042, "step": 216510 }, { "epoch": 1.828291570792257, "grad_norm": 0.15383777022361755, "learning_rate": 2.2283775677732488e-07, "loss": 0.0058, "step": 216520 }, { "epoch": 1.828376010639421, "grad_norm": 0.17309795320034027, "learning_rate": 2.2262027389805318e-07, "loss": 0.0125, "step": 216530 }, { "epoch": 1.8284604504865847, "grad_norm": 0.3706372082233429, "learning_rate": 2.2240289478249022e-07, "loss": 0.0071, "step": 216540 }, { "epoch": 1.8285448903337485, "grad_norm": 0.29351934790611267, "learning_rate": 2.2218561943535722e-07, "loss": 0.007, "step": 216550 }, { "epoch": 1.8286293301809122, "grad_norm": 0.03061702474951744, "learning_rate": 2.2196844786137316e-07, "loss": 0.0044, "step": 216560 }, { "epoch": 1.8287137700280762, "grad_norm": 0.3015217185020447, "learning_rate": 2.21751380065256e-07, "loss": 0.0061, "step": 216570 }, { "epoch": 1.8287982098752402, "grad_norm": 0.07449653744697571, "learning_rate": 2.2153441605171965e-07, "loss": 0.0054, "step": 216580 }, { "epoch": 1.828882649722404, "grad_norm": 0.36828288435935974, "learning_rate": 2.2131755582547598e-07, "loss": 0.0058, "step": 216590 }, { "epoch": 1.8289670895695678, "grad_norm": 0.0018750857561826706, "learning_rate": 2.2110079939123618e-07, "loss": 0.0029, "step": 216600 }, { "epoch": 1.8290515294167318, "grad_norm": 0.1171647161245346, "learning_rate": 2.2088414675370816e-07, "loss": 0.0058, "step": 216610 }, { "epoch": 1.8291359692638958, "grad_norm": 0.1114029735326767, "learning_rate": 2.206675979175965e-07, "loss": 0.0045, "step": 216620 }, { "epoch": 1.8292204091110595, "grad_norm": 0.04326295852661133, "learning_rate": 2.204511528876052e-07, "loss": 0.0034, "step": 216630 }, { "epoch": 1.8293048489582233, "grad_norm": 0.010527031496167183, "learning_rate": 2.2023481166843607e-07, "loss": 0.0035, "step": 216640 }, { "epoch": 1.8293892888053873, "grad_norm": 0.11652521044015884, "learning_rate": 2.2001857426478647e-07, "loss": 0.0042, "step": 216650 }, { "epoch": 1.829473728652551, "grad_norm": 0.24163396656513214, "learning_rate": 2.1980244068135426e-07, "loss": 0.0035, "step": 216660 }, { "epoch": 1.829558168499715, "grad_norm": 0.049623165279626846, "learning_rate": 2.195864109228335e-07, "loss": 0.005, "step": 216670 }, { "epoch": 1.8296426083468789, "grad_norm": 0.17724424600601196, "learning_rate": 2.1937048499391655e-07, "loss": 0.0099, "step": 216680 }, { "epoch": 1.8297270481940426, "grad_norm": 0.29049620032310486, "learning_rate": 2.19154662899293e-07, "loss": 0.0048, "step": 216690 }, { "epoch": 1.8298114880412066, "grad_norm": 0.02104247733950615, "learning_rate": 2.1893894464365074e-07, "loss": 0.0029, "step": 216700 }, { "epoch": 1.8298959278883706, "grad_norm": 0.3410242199897766, "learning_rate": 2.1872333023167436e-07, "loss": 0.0047, "step": 216710 }, { "epoch": 1.8299803677355344, "grad_norm": 0.40069496631622314, "learning_rate": 2.1850781966804736e-07, "loss": 0.0069, "step": 216720 }, { "epoch": 1.8300648075826982, "grad_norm": 0.34700262546539307, "learning_rate": 2.1829241295745096e-07, "loss": 0.0039, "step": 216730 }, { "epoch": 1.8301492474298622, "grad_norm": 0.04888719692826271, "learning_rate": 2.1807711010456312e-07, "loss": 0.0065, "step": 216740 }, { "epoch": 1.8302336872770262, "grad_norm": 0.23555533587932587, "learning_rate": 2.1786191111406118e-07, "loss": 0.0056, "step": 216750 }, { "epoch": 1.83031812712419, "grad_norm": 0.3740900754928589, "learning_rate": 2.176468159906181e-07, "loss": 0.0064, "step": 216760 }, { "epoch": 1.8304025669713537, "grad_norm": 0.18205581605434418, "learning_rate": 2.1743182473890623e-07, "loss": 0.0077, "step": 216770 }, { "epoch": 1.8304870068185175, "grad_norm": 0.13881853222846985, "learning_rate": 2.1721693736359574e-07, "loss": 0.0049, "step": 216780 }, { "epoch": 1.8305714466656815, "grad_norm": 0.37608039379119873, "learning_rate": 2.1700215386935287e-07, "loss": 0.0058, "step": 216790 }, { "epoch": 1.8306558865128455, "grad_norm": 0.3178430497646332, "learning_rate": 2.1678747426084223e-07, "loss": 0.0056, "step": 216800 }, { "epoch": 1.8307403263600093, "grad_norm": 0.3359988331794739, "learning_rate": 2.1657289854272844e-07, "loss": 0.0071, "step": 216810 }, { "epoch": 1.830824766207173, "grad_norm": 0.20637603104114532, "learning_rate": 2.163584267196711e-07, "loss": 0.0061, "step": 216820 }, { "epoch": 1.830909206054337, "grad_norm": 0.11900397390127182, "learning_rate": 2.1614405879632816e-07, "loss": 0.0047, "step": 216830 }, { "epoch": 1.830993645901501, "grad_norm": 0.4221607446670532, "learning_rate": 2.1592979477735588e-07, "loss": 0.0044, "step": 216840 }, { "epoch": 1.8310780857486648, "grad_norm": 0.19682744145393372, "learning_rate": 2.1571563466740886e-07, "loss": 0.0168, "step": 216850 }, { "epoch": 1.8311625255958286, "grad_norm": 0.008093158714473248, "learning_rate": 2.1550157847113727e-07, "loss": 0.01, "step": 216860 }, { "epoch": 1.8312469654429926, "grad_norm": 0.5328010320663452, "learning_rate": 2.1528762619319022e-07, "loss": 0.0068, "step": 216870 }, { "epoch": 1.8313314052901566, "grad_norm": 0.06966017186641693, "learning_rate": 2.150737778382167e-07, "loss": 0.004, "step": 216880 }, { "epoch": 1.8314158451373204, "grad_norm": 0.13530555367469788, "learning_rate": 2.1486003341085915e-07, "loss": 0.0069, "step": 216890 }, { "epoch": 1.8315002849844841, "grad_norm": 0.26428499817848206, "learning_rate": 2.1464639291576217e-07, "loss": 0.0085, "step": 216900 }, { "epoch": 1.831584724831648, "grad_norm": 0.08213479071855545, "learning_rate": 2.1443285635756371e-07, "loss": 0.0086, "step": 216910 }, { "epoch": 1.831669164678812, "grad_norm": 0.0010508084669709206, "learning_rate": 2.1421942374090397e-07, "loss": 0.0095, "step": 216920 }, { "epoch": 1.831753604525976, "grad_norm": 0.18053992092609406, "learning_rate": 2.1400609507041757e-07, "loss": 0.0049, "step": 216930 }, { "epoch": 1.8318380443731397, "grad_norm": 0.19265234470367432, "learning_rate": 2.1379287035073805e-07, "loss": 0.0076, "step": 216940 }, { "epoch": 1.8319224842203035, "grad_norm": 0.32812198996543884, "learning_rate": 2.1357974958649607e-07, "loss": 0.0089, "step": 216950 }, { "epoch": 1.8320069240674675, "grad_norm": 0.19223180413246155, "learning_rate": 2.1336673278232134e-07, "loss": 0.0072, "step": 216960 }, { "epoch": 1.8320913639146315, "grad_norm": 0.36288169026374817, "learning_rate": 2.131538199428401e-07, "loss": 0.0052, "step": 216970 }, { "epoch": 1.8321758037617952, "grad_norm": 0.1463610976934433, "learning_rate": 2.1294101107267706e-07, "loss": 0.007, "step": 216980 }, { "epoch": 1.832260243608959, "grad_norm": 0.12704738974571228, "learning_rate": 2.1272830617645456e-07, "loss": 0.0071, "step": 216990 }, { "epoch": 1.8323446834561228, "grad_norm": 0.5087644457817078, "learning_rate": 2.1251570525879174e-07, "loss": 0.0087, "step": 217000 }, { "epoch": 1.8324291233032868, "grad_norm": 0.3470654785633087, "learning_rate": 2.1230320832430706e-07, "loss": 0.0035, "step": 217010 }, { "epoch": 1.8325135631504508, "grad_norm": 0.37949907779693604, "learning_rate": 2.1209081537761632e-07, "loss": 0.0066, "step": 217020 }, { "epoch": 1.8325980029976146, "grad_norm": 0.06200435385107994, "learning_rate": 2.1187852642333139e-07, "loss": 0.0071, "step": 217030 }, { "epoch": 1.8326824428447783, "grad_norm": 0.17663365602493286, "learning_rate": 2.1166634146606302e-07, "loss": 0.0063, "step": 217040 }, { "epoch": 1.8327668826919423, "grad_norm": 0.3516196012496948, "learning_rate": 2.114542605104214e-07, "loss": 0.0056, "step": 217050 }, { "epoch": 1.8328513225391063, "grad_norm": 0.3153373599052429, "learning_rate": 2.1124228356101173e-07, "loss": 0.0084, "step": 217060 }, { "epoch": 1.83293576238627, "grad_norm": 0.251394659280777, "learning_rate": 2.1103041062243756e-07, "loss": 0.0053, "step": 217070 }, { "epoch": 1.8330202022334339, "grad_norm": 0.583740770816803, "learning_rate": 2.1081864169930243e-07, "loss": 0.0071, "step": 217080 }, { "epoch": 1.8331046420805979, "grad_norm": 0.18904238939285278, "learning_rate": 2.1060697679620546e-07, "loss": 0.0074, "step": 217090 }, { "epoch": 1.8331890819277619, "grad_norm": 0.5673895478248596, "learning_rate": 2.1039541591774294e-07, "loss": 0.0053, "step": 217100 }, { "epoch": 1.8332735217749256, "grad_norm": 0.20449496805667877, "learning_rate": 2.1018395906851007e-07, "loss": 0.0056, "step": 217110 }, { "epoch": 1.8333579616220894, "grad_norm": 0.2207411378622055, "learning_rate": 2.0997260625310046e-07, "loss": 0.0082, "step": 217120 }, { "epoch": 1.8334424014692532, "grad_norm": 0.17240050435066223, "learning_rate": 2.097613574761037e-07, "loss": 0.0049, "step": 217130 }, { "epoch": 1.8335268413164172, "grad_norm": 0.5993899703025818, "learning_rate": 2.0955021274210952e-07, "loss": 0.011, "step": 217140 }, { "epoch": 1.8336112811635812, "grad_norm": 0.648676335811615, "learning_rate": 2.0933917205570198e-07, "loss": 0.009, "step": 217150 }, { "epoch": 1.833695721010745, "grad_norm": 0.0090326564386487, "learning_rate": 2.0912823542146686e-07, "loss": 0.0146, "step": 217160 }, { "epoch": 1.8337801608579087, "grad_norm": 0.1304650604724884, "learning_rate": 2.0891740284398497e-07, "loss": 0.0069, "step": 217170 }, { "epoch": 1.8338646007050727, "grad_norm": 0.08610104024410248, "learning_rate": 2.0870667432783486e-07, "loss": 0.0044, "step": 217180 }, { "epoch": 1.8339490405522367, "grad_norm": 0.18891145288944244, "learning_rate": 2.0849604987759454e-07, "loss": 0.0092, "step": 217190 }, { "epoch": 1.8340334803994005, "grad_norm": 0.09578349441289902, "learning_rate": 2.0828552949783697e-07, "loss": 0.0046, "step": 217200 }, { "epoch": 1.8341179202465643, "grad_norm": 0.03166336566209793, "learning_rate": 2.0807511319313633e-07, "loss": 0.0026, "step": 217210 }, { "epoch": 1.8342023600937283, "grad_norm": 0.8115366697311401, "learning_rate": 2.078648009680623e-07, "loss": 0.006, "step": 217220 }, { "epoch": 1.834286799940892, "grad_norm": 0.3009161353111267, "learning_rate": 2.076545928271828e-07, "loss": 0.0022, "step": 217230 }, { "epoch": 1.834371239788056, "grad_norm": 0.48795583844184875, "learning_rate": 2.074444887750632e-07, "loss": 0.0045, "step": 217240 }, { "epoch": 1.8344556796352198, "grad_norm": 0.15138807892799377, "learning_rate": 2.072344888162675e-07, "loss": 0.0072, "step": 217250 }, { "epoch": 1.8345401194823836, "grad_norm": 0.10384813696146011, "learning_rate": 2.070245929553566e-07, "loss": 0.0073, "step": 217260 }, { "epoch": 1.8346245593295476, "grad_norm": 0.4759235680103302, "learning_rate": 2.0681480119688956e-07, "loss": 0.0081, "step": 217270 }, { "epoch": 1.8347089991767116, "grad_norm": 0.09843959659337997, "learning_rate": 2.0660511354542278e-07, "loss": 0.0049, "step": 217280 }, { "epoch": 1.8347934390238754, "grad_norm": 0.11688487231731415, "learning_rate": 2.0639553000551038e-07, "loss": 0.0057, "step": 217290 }, { "epoch": 1.8348778788710391, "grad_norm": 0.3361477553844452, "learning_rate": 2.0618605058170481e-07, "loss": 0.0074, "step": 217300 }, { "epoch": 1.8349623187182031, "grad_norm": 0.22742025554180145, "learning_rate": 2.0597667527855582e-07, "loss": 0.006, "step": 217310 }, { "epoch": 1.8350467585653671, "grad_norm": 0.4692513346672058, "learning_rate": 2.0576740410061137e-07, "loss": 0.0072, "step": 217320 }, { "epoch": 1.835131198412531, "grad_norm": 0.4995681047439575, "learning_rate": 2.0555823705241562e-07, "loss": 0.0073, "step": 217330 }, { "epoch": 1.8352156382596947, "grad_norm": 0.24949464201927185, "learning_rate": 2.0534917413851386e-07, "loss": 0.0036, "step": 217340 }, { "epoch": 1.8353000781068585, "grad_norm": 0.17816497385501862, "learning_rate": 2.0514021536344464e-07, "loss": 0.0032, "step": 217350 }, { "epoch": 1.8353845179540225, "grad_norm": 0.17519748210906982, "learning_rate": 2.049313607317477e-07, "loss": 0.008, "step": 217360 }, { "epoch": 1.8354689578011865, "grad_norm": 0.14005650579929352, "learning_rate": 2.047226102479588e-07, "loss": 0.0108, "step": 217370 }, { "epoch": 1.8355533976483502, "grad_norm": 0.21532385051250458, "learning_rate": 2.0451396391661215e-07, "loss": 0.0075, "step": 217380 }, { "epoch": 1.835637837495514, "grad_norm": 0.08819117397069931, "learning_rate": 2.043054217422391e-07, "loss": 0.0056, "step": 217390 }, { "epoch": 1.835722277342678, "grad_norm": 0.1360362321138382, "learning_rate": 2.0409698372937048e-07, "loss": 0.0067, "step": 217400 }, { "epoch": 1.835806717189842, "grad_norm": 0.16433954238891602, "learning_rate": 2.038886498825321e-07, "loss": 0.0026, "step": 217410 }, { "epoch": 1.8358911570370058, "grad_norm": 0.39965587854385376, "learning_rate": 2.036804202062498e-07, "loss": 0.0045, "step": 217420 }, { "epoch": 1.8359755968841696, "grad_norm": 0.10787826031446457, "learning_rate": 2.0347229470504602e-07, "loss": 0.0041, "step": 217430 }, { "epoch": 1.8360600367313336, "grad_norm": 0.20235297083854675, "learning_rate": 2.0326427338344e-07, "loss": 0.0021, "step": 217440 }, { "epoch": 1.8361444765784976, "grad_norm": 0.17842945456504822, "learning_rate": 2.03056356245952e-07, "loss": 0.0051, "step": 217450 }, { "epoch": 1.8362289164256613, "grad_norm": 0.4730088412761688, "learning_rate": 2.0284854329709615e-07, "loss": 0.0057, "step": 217460 }, { "epoch": 1.836313356272825, "grad_norm": 0.08940599858760834, "learning_rate": 2.0264083454138773e-07, "loss": 0.0096, "step": 217470 }, { "epoch": 1.8363977961199889, "grad_norm": 0.351958692073822, "learning_rate": 2.0243322998333646e-07, "loss": 0.0073, "step": 217480 }, { "epoch": 1.8364822359671529, "grad_norm": 0.1995782107114792, "learning_rate": 2.0222572962745323e-07, "loss": 0.0115, "step": 217490 }, { "epoch": 1.8365666758143169, "grad_norm": 0.07273170351982117, "learning_rate": 2.0201833347824385e-07, "loss": 0.0083, "step": 217500 }, { "epoch": 1.8366511156614806, "grad_norm": 0.1530039757490158, "learning_rate": 2.0181104154021302e-07, "loss": 0.0078, "step": 217510 }, { "epoch": 1.8367355555086444, "grad_norm": 0.23505088686943054, "learning_rate": 2.0160385381786328e-07, "loss": 0.0055, "step": 217520 }, { "epoch": 1.8368199953558084, "grad_norm": 0.1550908088684082, "learning_rate": 2.013967703156938e-07, "loss": 0.0069, "step": 217530 }, { "epoch": 1.8369044352029724, "grad_norm": 0.4622606337070465, "learning_rate": 2.011897910382038e-07, "loss": 0.0073, "step": 217540 }, { "epoch": 1.8369888750501362, "grad_norm": 0.16257129609584808, "learning_rate": 2.0098291598988739e-07, "loss": 0.0083, "step": 217550 }, { "epoch": 1.8370733148973, "grad_norm": 0.18054915964603424, "learning_rate": 2.0077614517523936e-07, "loss": 0.004, "step": 217560 }, { "epoch": 1.837157754744464, "grad_norm": 0.059430379420518875, "learning_rate": 2.0056947859874942e-07, "loss": 0.0021, "step": 217570 }, { "epoch": 1.8372421945916277, "grad_norm": 0.25209665298461914, "learning_rate": 2.0036291626490735e-07, "loss": 0.0045, "step": 217580 }, { "epoch": 1.8373266344387917, "grad_norm": 0.5337153077125549, "learning_rate": 2.001564581781995e-07, "loss": 0.0088, "step": 217590 }, { "epoch": 1.8374110742859555, "grad_norm": 0.21052473783493042, "learning_rate": 1.9995010434310956e-07, "loss": 0.0047, "step": 217600 }, { "epoch": 1.8374955141331193, "grad_norm": 0.3841240406036377, "learning_rate": 1.997438547641195e-07, "loss": 0.0066, "step": 217610 }, { "epoch": 1.8375799539802833, "grad_norm": 0.4846096932888031, "learning_rate": 1.9953770944570905e-07, "loss": 0.0075, "step": 217620 }, { "epoch": 1.8376643938274473, "grad_norm": 0.29632753133773804, "learning_rate": 1.9933166839235573e-07, "loss": 0.007, "step": 217630 }, { "epoch": 1.837748833674611, "grad_norm": 0.16998790204524994, "learning_rate": 1.9912573160853598e-07, "loss": 0.007, "step": 217640 }, { "epoch": 1.8378332735217748, "grad_norm": 0.08422951400279999, "learning_rate": 1.9891989909872067e-07, "loss": 0.0092, "step": 217650 }, { "epoch": 1.8379177133689388, "grad_norm": 0.332680344581604, "learning_rate": 1.9871417086738122e-07, "loss": 0.0056, "step": 217660 }, { "epoch": 1.8380021532161028, "grad_norm": 0.5091645121574402, "learning_rate": 1.9850854691898735e-07, "loss": 0.0106, "step": 217670 }, { "epoch": 1.8380865930632666, "grad_norm": 0.2500976622104645, "learning_rate": 1.983030272580022e-07, "loss": 0.0087, "step": 217680 }, { "epoch": 1.8381710329104304, "grad_norm": 0.09713547676801682, "learning_rate": 1.980976118888922e-07, "loss": 0.0037, "step": 217690 }, { "epoch": 1.8382554727575942, "grad_norm": 0.21117384731769562, "learning_rate": 1.9789230081611766e-07, "loss": 0.0068, "step": 217700 }, { "epoch": 1.8383399126047582, "grad_norm": 0.2500220835208893, "learning_rate": 1.9768709404413887e-07, "loss": 0.0049, "step": 217710 }, { "epoch": 1.8384243524519222, "grad_norm": 0.45887744426727295, "learning_rate": 1.974819915774112e-07, "loss": 0.0093, "step": 217720 }, { "epoch": 1.838508792299086, "grad_norm": 0.21113067865371704, "learning_rate": 1.9727699342039164e-07, "loss": 0.0052, "step": 217730 }, { "epoch": 1.8385932321462497, "grad_norm": 0.18733017146587372, "learning_rate": 1.970720995775316e-07, "loss": 0.0048, "step": 217740 }, { "epoch": 1.8386776719934137, "grad_norm": 0.2656986117362976, "learning_rate": 1.9686731005328141e-07, "loss": 0.0114, "step": 217750 }, { "epoch": 1.8387621118405777, "grad_norm": 0.1660914272069931, "learning_rate": 1.9666262485208865e-07, "loss": 0.0036, "step": 217760 }, { "epoch": 1.8388465516877415, "grad_norm": 0.25365445017814636, "learning_rate": 1.9645804397839918e-07, "loss": 0.0073, "step": 217770 }, { "epoch": 1.8389309915349052, "grad_norm": 0.2195199579000473, "learning_rate": 1.9625356743665668e-07, "loss": 0.0053, "step": 217780 }, { "epoch": 1.8390154313820692, "grad_norm": 0.19874052703380585, "learning_rate": 1.9604919523130207e-07, "loss": 0.0079, "step": 217790 }, { "epoch": 1.8390998712292332, "grad_norm": 0.23397591710090637, "learning_rate": 1.9584492736677563e-07, "loss": 0.0024, "step": 217800 }, { "epoch": 1.839184311076397, "grad_norm": 0.010871024802327156, "learning_rate": 1.9564076384751164e-07, "loss": 0.0124, "step": 217810 }, { "epoch": 1.8392687509235608, "grad_norm": 0.03946690261363983, "learning_rate": 1.954367046779465e-07, "loss": 0.0043, "step": 217820 }, { "epoch": 1.8393531907707246, "grad_norm": 0.16471190750598907, "learning_rate": 1.9523274986251173e-07, "loss": 0.0073, "step": 217830 }, { "epoch": 1.8394376306178886, "grad_norm": 0.2768636643886566, "learning_rate": 1.9502889940563653e-07, "loss": 0.005, "step": 217840 }, { "epoch": 1.8395220704650526, "grad_norm": 0.2554936408996582, "learning_rate": 1.94825153311749e-07, "loss": 0.006, "step": 217850 }, { "epoch": 1.8396065103122163, "grad_norm": 0.10189186036586761, "learning_rate": 1.9462151158527455e-07, "loss": 0.0032, "step": 217860 }, { "epoch": 1.8396909501593801, "grad_norm": 0.05562540888786316, "learning_rate": 1.944179742306368e-07, "loss": 0.0115, "step": 217870 }, { "epoch": 1.839775390006544, "grad_norm": 0.1390489786863327, "learning_rate": 1.9421454125225504e-07, "loss": 0.011, "step": 217880 }, { "epoch": 1.839859829853708, "grad_norm": 0.4834881126880646, "learning_rate": 1.9401121265454903e-07, "loss": 0.0077, "step": 217890 }, { "epoch": 1.8399442697008719, "grad_norm": 0.23864750564098358, "learning_rate": 1.9380798844193417e-07, "loss": 0.0058, "step": 217900 }, { "epoch": 1.8400287095480357, "grad_norm": 0.4155472218990326, "learning_rate": 1.9360486861882521e-07, "loss": 0.0055, "step": 217910 }, { "epoch": 1.8401131493951994, "grad_norm": 0.14442479610443115, "learning_rate": 1.9340185318963424e-07, "loss": 0.004, "step": 217920 }, { "epoch": 1.8401975892423634, "grad_norm": 0.025379890576004982, "learning_rate": 1.9319894215876932e-07, "loss": 0.0049, "step": 217930 }, { "epoch": 1.8402820290895274, "grad_norm": 0.3513035774230957, "learning_rate": 1.929961355306381e-07, "loss": 0.0061, "step": 217940 }, { "epoch": 1.8403664689366912, "grad_norm": 0.23512354493141174, "learning_rate": 1.927934333096465e-07, "loss": 0.0037, "step": 217950 }, { "epoch": 1.840450908783855, "grad_norm": 0.33771222829818726, "learning_rate": 1.9259083550019597e-07, "loss": 0.0045, "step": 217960 }, { "epoch": 1.840535348631019, "grad_norm": 0.299350768327713, "learning_rate": 1.9238834210668744e-07, "loss": 0.0065, "step": 217970 }, { "epoch": 1.840619788478183, "grad_norm": 0.34271305799484253, "learning_rate": 1.9218595313351906e-07, "loss": 0.0031, "step": 217980 }, { "epoch": 1.8407042283253467, "grad_norm": 0.19365791976451874, "learning_rate": 1.9198366858508731e-07, "loss": 0.0034, "step": 217990 }, { "epoch": 1.8407886681725105, "grad_norm": 0.38370323181152344, "learning_rate": 1.9178148846578425e-07, "loss": 0.0049, "step": 218000 }, { "epoch": 1.8408731080196745, "grad_norm": 0.4594861567020416, "learning_rate": 1.9157941278000136e-07, "loss": 0.0053, "step": 218010 }, { "epoch": 1.8409575478668385, "grad_norm": 0.058365385979413986, "learning_rate": 1.9137744153212957e-07, "loss": 0.0018, "step": 218020 }, { "epoch": 1.8410419877140023, "grad_norm": 0.5937549471855164, "learning_rate": 1.9117557472655313e-07, "loss": 0.0062, "step": 218030 }, { "epoch": 1.841126427561166, "grad_norm": 0.540399968624115, "learning_rate": 1.9097381236765855e-07, "loss": 0.0103, "step": 218040 }, { "epoch": 1.8412108674083298, "grad_norm": 0.2665617763996124, "learning_rate": 1.9077215445982678e-07, "loss": 0.0048, "step": 218050 }, { "epoch": 1.8412953072554938, "grad_norm": 0.26468321681022644, "learning_rate": 1.9057060100743875e-07, "loss": 0.0058, "step": 218060 }, { "epoch": 1.8413797471026578, "grad_norm": 0.023571498692035675, "learning_rate": 1.9036915201487204e-07, "loss": 0.0056, "step": 218070 }, { "epoch": 1.8414641869498216, "grad_norm": 0.12533654272556305, "learning_rate": 1.9016780748650155e-07, "loss": 0.0082, "step": 218080 }, { "epoch": 1.8415486267969854, "grad_norm": 0.6180077195167542, "learning_rate": 1.8996656742669982e-07, "loss": 0.0057, "step": 218090 }, { "epoch": 1.8416330666441494, "grad_norm": 0.30154988169670105, "learning_rate": 1.8976543183983953e-07, "loss": 0.0065, "step": 218100 }, { "epoch": 1.8417175064913134, "grad_norm": 0.18395346403121948, "learning_rate": 1.8956440073028826e-07, "loss": 0.0049, "step": 218110 }, { "epoch": 1.8418019463384772, "grad_norm": 0.23770953714847565, "learning_rate": 1.8936347410241142e-07, "loss": 0.0045, "step": 218120 }, { "epoch": 1.841886386185641, "grad_norm": 0.08908435702323914, "learning_rate": 1.8916265196057548e-07, "loss": 0.006, "step": 218130 }, { "epoch": 1.841970826032805, "grad_norm": 0.17351962625980377, "learning_rate": 1.8896193430913978e-07, "loss": 0.0053, "step": 218140 }, { "epoch": 1.8420552658799687, "grad_norm": 0.12390107661485672, "learning_rate": 1.887613211524658e-07, "loss": 0.0051, "step": 218150 }, { "epoch": 1.8421397057271327, "grad_norm": 0.5394760966300964, "learning_rate": 1.8856081249490954e-07, "loss": 0.0077, "step": 218160 }, { "epoch": 1.8422241455742965, "grad_norm": 0.7291366457939148, "learning_rate": 1.8836040834082692e-07, "loss": 0.0082, "step": 218170 }, { "epoch": 1.8423085854214603, "grad_norm": 0.06428994983434677, "learning_rate": 1.8816010869457001e-07, "loss": 0.004, "step": 218180 }, { "epoch": 1.8423930252686243, "grad_norm": 0.6252292394638062, "learning_rate": 1.8795991356048927e-07, "loss": 0.005, "step": 218190 }, { "epoch": 1.8424774651157882, "grad_norm": 0.3324244022369385, "learning_rate": 1.8775982294293338e-07, "loss": 0.007, "step": 218200 }, { "epoch": 1.842561904962952, "grad_norm": 0.5111971497535706, "learning_rate": 1.8755983684624778e-07, "loss": 0.0086, "step": 218210 }, { "epoch": 1.8426463448101158, "grad_norm": 0.06827250868082047, "learning_rate": 1.8735995527477624e-07, "loss": 0.0049, "step": 218220 }, { "epoch": 1.8427307846572798, "grad_norm": 0.1150171086192131, "learning_rate": 1.8716017823286082e-07, "loss": 0.0055, "step": 218230 }, { "epoch": 1.8428152245044438, "grad_norm": 0.06704892218112946, "learning_rate": 1.8696050572484025e-07, "loss": 0.0042, "step": 218240 }, { "epoch": 1.8428996643516076, "grad_norm": 0.10616513341665268, "learning_rate": 1.867609377550511e-07, "loss": 0.0088, "step": 218250 }, { "epoch": 1.8429841041987713, "grad_norm": 0.1969204545021057, "learning_rate": 1.8656147432782766e-07, "loss": 0.0071, "step": 218260 }, { "epoch": 1.8430685440459351, "grad_norm": 0.13752198219299316, "learning_rate": 1.863621154475026e-07, "loss": 0.006, "step": 218270 }, { "epoch": 1.8431529838930991, "grad_norm": 0.3151560425758362, "learning_rate": 1.861628611184063e-07, "loss": 0.0075, "step": 218280 }, { "epoch": 1.8432374237402631, "grad_norm": 0.25487247109413147, "learning_rate": 1.8596371134486536e-07, "loss": 0.0056, "step": 218290 }, { "epoch": 1.843321863587427, "grad_norm": 0.034123051911592484, "learning_rate": 1.8576466613120736e-07, "loss": 0.0048, "step": 218300 }, { "epoch": 1.8434063034345907, "grad_norm": 0.3966575860977173, "learning_rate": 1.855657254817539e-07, "loss": 0.0061, "step": 218310 }, { "epoch": 1.8434907432817547, "grad_norm": 0.09245887398719788, "learning_rate": 1.8536688940082593e-07, "loss": 0.0089, "step": 218320 }, { "epoch": 1.8435751831289187, "grad_norm": 0.34121599793434143, "learning_rate": 1.8516815789274223e-07, "loss": 0.0053, "step": 218330 }, { "epoch": 1.8436596229760824, "grad_norm": 0.013104172423481941, "learning_rate": 1.8496953096182045e-07, "loss": 0.0069, "step": 218340 }, { "epoch": 1.8437440628232462, "grad_norm": 0.1001020073890686, "learning_rate": 1.847710086123733e-07, "loss": 0.003, "step": 218350 }, { "epoch": 1.8438285026704102, "grad_norm": 0.5866423845291138, "learning_rate": 1.8457259084871281e-07, "loss": 0.0038, "step": 218360 }, { "epoch": 1.8439129425175742, "grad_norm": 0.23798057436943054, "learning_rate": 1.8437427767514948e-07, "loss": 0.0057, "step": 218370 }, { "epoch": 1.843997382364738, "grad_norm": 0.21927697956562042, "learning_rate": 1.8417606909598927e-07, "loss": 0.0083, "step": 218380 }, { "epoch": 1.8440818222119018, "grad_norm": 0.2141326367855072, "learning_rate": 1.8397796511553877e-07, "loss": 0.0053, "step": 218390 }, { "epoch": 1.8441662620590655, "grad_norm": 0.4074456989765167, "learning_rate": 1.8377996573809953e-07, "loss": 0.0067, "step": 218400 }, { "epoch": 1.8442507019062295, "grad_norm": 0.19882875680923462, "learning_rate": 1.8358207096797255e-07, "loss": 0.0045, "step": 218410 }, { "epoch": 1.8443351417533935, "grad_norm": 0.24967187643051147, "learning_rate": 1.8338428080945603e-07, "loss": 0.0045, "step": 218420 }, { "epoch": 1.8444195816005573, "grad_norm": 0.12679129838943481, "learning_rate": 1.83186595266846e-07, "loss": 0.0091, "step": 218430 }, { "epoch": 1.844504021447721, "grad_norm": 0.5623202919960022, "learning_rate": 1.8298901434443573e-07, "loss": 0.0049, "step": 218440 }, { "epoch": 1.844588461294885, "grad_norm": 0.6600416898727417, "learning_rate": 1.8279153804651728e-07, "loss": 0.0035, "step": 218450 }, { "epoch": 1.844672901142049, "grad_norm": 0.5043323040008545, "learning_rate": 1.8259416637737948e-07, "loss": 0.0074, "step": 218460 }, { "epoch": 1.8447573409892128, "grad_norm": 0.5181356072425842, "learning_rate": 1.8239689934130888e-07, "loss": 0.0109, "step": 218470 }, { "epoch": 1.8448417808363766, "grad_norm": 0.16281549632549286, "learning_rate": 1.8219973694259097e-07, "loss": 0.0074, "step": 218480 }, { "epoch": 1.8449262206835404, "grad_norm": 0.48178672790527344, "learning_rate": 1.820026791855073e-07, "loss": 0.0054, "step": 218490 }, { "epoch": 1.8450106605307044, "grad_norm": 0.3752056360244751, "learning_rate": 1.8180572607433833e-07, "loss": 0.0066, "step": 218500 }, { "epoch": 1.8450951003778684, "grad_norm": 0.24213942885398865, "learning_rate": 1.8160887761336064e-07, "loss": 0.004, "step": 218510 }, { "epoch": 1.8451795402250322, "grad_norm": 0.12805432081222534, "learning_rate": 1.8141213380685197e-07, "loss": 0.0033, "step": 218520 }, { "epoch": 1.845263980072196, "grad_norm": 0.09997478127479553, "learning_rate": 1.812154946590833e-07, "loss": 0.0041, "step": 218530 }, { "epoch": 1.84534841991936, "grad_norm": 0.7749542593955994, "learning_rate": 1.810189601743273e-07, "loss": 0.0079, "step": 218540 }, { "epoch": 1.845432859766524, "grad_norm": 0.16794274747371674, "learning_rate": 1.8082253035685172e-07, "loss": 0.0041, "step": 218550 }, { "epoch": 1.8455172996136877, "grad_norm": 0.155745729804039, "learning_rate": 1.8062620521092367e-07, "loss": 0.0034, "step": 218560 }, { "epoch": 1.8456017394608515, "grad_norm": 0.4034246802330017, "learning_rate": 1.8042998474080642e-07, "loss": 0.0072, "step": 218570 }, { "epoch": 1.8456861793080155, "grad_norm": 0.4527720808982849, "learning_rate": 1.8023386895076266e-07, "loss": 0.0044, "step": 218580 }, { "epoch": 1.8457706191551795, "grad_norm": 0.08086879551410675, "learning_rate": 1.800378578450518e-07, "loss": 0.0066, "step": 218590 }, { "epoch": 1.8458550590023433, "grad_norm": 0.12468259781599045, "learning_rate": 1.7984195142792982e-07, "loss": 0.0066, "step": 218600 }, { "epoch": 1.845939498849507, "grad_norm": 0.0729588195681572, "learning_rate": 1.796461497036539e-07, "loss": 0.004, "step": 218610 }, { "epoch": 1.8460239386966708, "grad_norm": 0.5155609846115112, "learning_rate": 1.794504526764751e-07, "loss": 0.0078, "step": 218620 }, { "epoch": 1.8461083785438348, "grad_norm": 0.402412474155426, "learning_rate": 1.7925486035064498e-07, "loss": 0.0049, "step": 218630 }, { "epoch": 1.8461928183909988, "grad_norm": 0.28778964281082153, "learning_rate": 1.7905937273041185e-07, "loss": 0.0061, "step": 218640 }, { "epoch": 1.8462772582381626, "grad_norm": 0.21727913618087769, "learning_rate": 1.7886398982002117e-07, "loss": 0.0047, "step": 218650 }, { "epoch": 1.8463616980853264, "grad_norm": 0.3217895030975342, "learning_rate": 1.7866871162371624e-07, "loss": 0.0055, "step": 218660 }, { "epoch": 1.8464461379324904, "grad_norm": 0.0005525536253117025, "learning_rate": 1.784735381457392e-07, "loss": 0.0064, "step": 218670 }, { "epoch": 1.8465305777796543, "grad_norm": 0.14281708002090454, "learning_rate": 1.7827846939032833e-07, "loss": 0.007, "step": 218680 }, { "epoch": 1.8466150176268181, "grad_norm": 0.01166603621095419, "learning_rate": 1.7808350536172193e-07, "loss": 0.0058, "step": 218690 }, { "epoch": 1.846699457473982, "grad_norm": 0.09878171980381012, "learning_rate": 1.7788864606415324e-07, "loss": 0.0087, "step": 218700 }, { "epoch": 1.846783897321146, "grad_norm": 0.25710904598236084, "learning_rate": 1.7769389150185445e-07, "loss": 0.0049, "step": 218710 }, { "epoch": 1.8468683371683097, "grad_norm": 0.38774868845939636, "learning_rate": 1.7749924167905718e-07, "loss": 0.0079, "step": 218720 }, { "epoch": 1.8469527770154737, "grad_norm": 0.43226414918899536, "learning_rate": 1.773046965999875e-07, "loss": 0.0032, "step": 218730 }, { "epoch": 1.8470372168626374, "grad_norm": 0.2164253145456314, "learning_rate": 1.77110256268872e-07, "loss": 0.0063, "step": 218740 }, { "epoch": 1.8471216567098012, "grad_norm": 0.3724600374698639, "learning_rate": 1.769159206899329e-07, "loss": 0.0104, "step": 218750 }, { "epoch": 1.8472060965569652, "grad_norm": 0.13273312151432037, "learning_rate": 1.7672168986739236e-07, "loss": 0.0102, "step": 218760 }, { "epoch": 1.8472905364041292, "grad_norm": 0.05798407271504402, "learning_rate": 1.7652756380546754e-07, "loss": 0.0096, "step": 218770 }, { "epoch": 1.847374976251293, "grad_norm": 0.3314605951309204, "learning_rate": 1.763335425083762e-07, "loss": 0.0038, "step": 218780 }, { "epoch": 1.8474594160984568, "grad_norm": 0.683099627494812, "learning_rate": 1.7613962598033108e-07, "loss": 0.0076, "step": 218790 }, { "epoch": 1.8475438559456208, "grad_norm": 0.3623153865337372, "learning_rate": 1.7594581422554546e-07, "loss": 0.0051, "step": 218800 }, { "epoch": 1.8476282957927848, "grad_norm": 0.3171471655368805, "learning_rate": 1.757521072482282e-07, "loss": 0.0038, "step": 218810 }, { "epoch": 1.8477127356399485, "grad_norm": 0.23020130395889282, "learning_rate": 1.7555850505258653e-07, "loss": 0.0082, "step": 218820 }, { "epoch": 1.8477971754871123, "grad_norm": 0.2817663252353668, "learning_rate": 1.7536500764282594e-07, "loss": 0.0059, "step": 218830 }, { "epoch": 1.847881615334276, "grad_norm": 0.22978349030017853, "learning_rate": 1.7517161502314807e-07, "loss": 0.0053, "step": 218840 }, { "epoch": 1.84796605518144, "grad_norm": 0.7006347179412842, "learning_rate": 1.74978327197754e-07, "loss": 0.0051, "step": 218850 }, { "epoch": 1.848050495028604, "grad_norm": 0.20192910730838776, "learning_rate": 1.7478514417084203e-07, "loss": 0.0069, "step": 218860 }, { "epoch": 1.8481349348757679, "grad_norm": 0.4278779923915863, "learning_rate": 1.7459206594660826e-07, "loss": 0.0041, "step": 218870 }, { "epoch": 1.8482193747229316, "grad_norm": 0.4336884617805481, "learning_rate": 1.7439909252924491e-07, "loss": 0.0182, "step": 218880 }, { "epoch": 1.8483038145700956, "grad_norm": 0.3243299424648285, "learning_rate": 1.742062239229464e-07, "loss": 0.0101, "step": 218890 }, { "epoch": 1.8483882544172596, "grad_norm": 0.2829696238040924, "learning_rate": 1.7401346013189824e-07, "loss": 0.0029, "step": 218900 }, { "epoch": 1.8484726942644234, "grad_norm": 0.35732316970825195, "learning_rate": 1.738208011602893e-07, "loss": 0.0074, "step": 218910 }, { "epoch": 1.8485571341115872, "grad_norm": 0.20570090413093567, "learning_rate": 1.7362824701230296e-07, "loss": 0.0102, "step": 218920 }, { "epoch": 1.8486415739587512, "grad_norm": 0.5029038190841675, "learning_rate": 1.7343579769212303e-07, "loss": 0.0104, "step": 218930 }, { "epoch": 1.8487260138059152, "grad_norm": 0.11082363873720169, "learning_rate": 1.7324345320392844e-07, "loss": 0.0128, "step": 218940 }, { "epoch": 1.848810453653079, "grad_norm": 0.1957055926322937, "learning_rate": 1.7305121355189582e-07, "loss": 0.0055, "step": 218950 }, { "epoch": 1.8488948935002427, "grad_norm": 0.874573290348053, "learning_rate": 1.7285907874020292e-07, "loss": 0.0118, "step": 218960 }, { "epoch": 1.8489793333474065, "grad_norm": 0.31326624751091003, "learning_rate": 1.7266704877302144e-07, "loss": 0.0089, "step": 218970 }, { "epoch": 1.8490637731945705, "grad_norm": 0.25906267762184143, "learning_rate": 1.7247512365452246e-07, "loss": 0.0049, "step": 218980 }, { "epoch": 1.8491482130417345, "grad_norm": 0.3261277377605438, "learning_rate": 1.7228330338887377e-07, "loss": 0.0039, "step": 218990 }, { "epoch": 1.8492326528888983, "grad_norm": 0.36688941717147827, "learning_rate": 1.7209158798024316e-07, "loss": 0.0071, "step": 219000 }, { "epoch": 1.849317092736062, "grad_norm": 0.10566121339797974, "learning_rate": 1.718999774327934e-07, "loss": 0.0029, "step": 219010 }, { "epoch": 1.849401532583226, "grad_norm": 0.11159317195415497, "learning_rate": 1.7170847175068727e-07, "loss": 0.0046, "step": 219020 }, { "epoch": 1.84948597243039, "grad_norm": 0.014715573750436306, "learning_rate": 1.7151707093808312e-07, "loss": 0.0026, "step": 219030 }, { "epoch": 1.8495704122775538, "grad_norm": 0.001047141500748694, "learning_rate": 1.7132577499913982e-07, "loss": 0.0043, "step": 219040 }, { "epoch": 1.8496548521247176, "grad_norm": 0.22663083672523499, "learning_rate": 1.7113458393801074e-07, "loss": 0.0086, "step": 219050 }, { "epoch": 1.8497392919718816, "grad_norm": 0.08126318454742432, "learning_rate": 1.7094349775884867e-07, "loss": 0.0042, "step": 219060 }, { "epoch": 1.8498237318190454, "grad_norm": 0.07358342409133911, "learning_rate": 1.707525164658047e-07, "loss": 0.0066, "step": 219070 }, { "epoch": 1.8499081716662094, "grad_norm": 0.1267586648464203, "learning_rate": 1.7056164006302556e-07, "loss": 0.0049, "step": 219080 }, { "epoch": 1.8499926115133731, "grad_norm": 0.24640314280986786, "learning_rate": 1.7037086855465902e-07, "loss": 0.0031, "step": 219090 }, { "epoch": 1.850077051360537, "grad_norm": 0.19524861872196198, "learning_rate": 1.7018020194484674e-07, "loss": 0.0098, "step": 219100 }, { "epoch": 1.850161491207701, "grad_norm": 0.2710903584957123, "learning_rate": 1.69989640237731e-07, "loss": 0.007, "step": 219110 }, { "epoch": 1.850245931054865, "grad_norm": 0.19946295022964478, "learning_rate": 1.6979918343745017e-07, "loss": 0.0034, "step": 219120 }, { "epoch": 1.8503303709020287, "grad_norm": 0.1938711702823639, "learning_rate": 1.69608831548142e-07, "loss": 0.0057, "step": 219130 }, { "epoch": 1.8504148107491925, "grad_norm": 0.08926436305046082, "learning_rate": 1.6941858457393988e-07, "loss": 0.0064, "step": 219140 }, { "epoch": 1.8504992505963564, "grad_norm": 0.10127561539411545, "learning_rate": 1.6922844251897664e-07, "loss": 0.0109, "step": 219150 }, { "epoch": 1.8505836904435204, "grad_norm": 0.4091196060180664, "learning_rate": 1.690384053873806e-07, "loss": 0.0081, "step": 219160 }, { "epoch": 1.8506681302906842, "grad_norm": 0.13234853744506836, "learning_rate": 1.6884847318328123e-07, "loss": 0.0062, "step": 219170 }, { "epoch": 1.850752570137848, "grad_norm": 0.1045849397778511, "learning_rate": 1.6865864591080305e-07, "loss": 0.0067, "step": 219180 }, { "epoch": 1.8508370099850118, "grad_norm": 0.2481599897146225, "learning_rate": 1.6846892357406886e-07, "loss": 0.0072, "step": 219190 }, { "epoch": 1.8509214498321758, "grad_norm": 0.11724340170621872, "learning_rate": 1.682793061771998e-07, "loss": 0.007, "step": 219200 }, { "epoch": 1.8510058896793398, "grad_norm": 0.3524472415447235, "learning_rate": 1.6808979372431366e-07, "loss": 0.0062, "step": 219210 }, { "epoch": 1.8510903295265035, "grad_norm": 0.2628009021282196, "learning_rate": 1.6790038621952776e-07, "loss": 0.0042, "step": 219220 }, { "epoch": 1.8511747693736673, "grad_norm": 0.244672954082489, "learning_rate": 1.6771108366695432e-07, "loss": 0.003, "step": 219230 }, { "epoch": 1.8512592092208313, "grad_norm": 0.2333875149488449, "learning_rate": 1.6752188607070673e-07, "loss": 0.0042, "step": 219240 }, { "epoch": 1.8513436490679953, "grad_norm": 0.15141528844833374, "learning_rate": 1.6733279343489284e-07, "loss": 0.0056, "step": 219250 }, { "epoch": 1.851428088915159, "grad_norm": 0.058212969452142715, "learning_rate": 1.67143805763621e-07, "loss": 0.0092, "step": 219260 }, { "epoch": 1.8515125287623229, "grad_norm": 0.15368759632110596, "learning_rate": 1.669549230609946e-07, "loss": 0.0027, "step": 219270 }, { "epoch": 1.8515969686094869, "grad_norm": 0.3610913157463074, "learning_rate": 1.6676614533111756e-07, "loss": 0.0073, "step": 219280 }, { "epoch": 1.8516814084566509, "grad_norm": 0.6417860984802246, "learning_rate": 1.6657747257808944e-07, "loss": 0.006, "step": 219290 }, { "epoch": 1.8517658483038146, "grad_norm": 0.5307621359825134, "learning_rate": 1.66388904806008e-07, "loss": 0.0092, "step": 219300 }, { "epoch": 1.8518502881509784, "grad_norm": 0.4056565761566162, "learning_rate": 1.662004420189689e-07, "loss": 0.0059, "step": 219310 }, { "epoch": 1.8519347279981422, "grad_norm": 0.24977779388427734, "learning_rate": 1.6601208422106552e-07, "loss": 0.0043, "step": 219320 }, { "epoch": 1.8520191678453062, "grad_norm": 0.24372000992298126, "learning_rate": 1.6582383141638958e-07, "loss": 0.007, "step": 219330 }, { "epoch": 1.8521036076924702, "grad_norm": 0.8911218047142029, "learning_rate": 1.6563568360902837e-07, "loss": 0.0067, "step": 219340 }, { "epoch": 1.852188047539634, "grad_norm": 0.15531039237976074, "learning_rate": 1.654476408030703e-07, "loss": 0.0026, "step": 219350 }, { "epoch": 1.8522724873867977, "grad_norm": 0.055238161236047745, "learning_rate": 1.6525970300259874e-07, "loss": 0.0115, "step": 219360 }, { "epoch": 1.8523569272339617, "grad_norm": 0.19462336599826813, "learning_rate": 1.6507187021169547e-07, "loss": 0.0041, "step": 219370 }, { "epoch": 1.8524413670811257, "grad_norm": 0.15255606174468994, "learning_rate": 1.6488414243444108e-07, "loss": 0.0064, "step": 219380 }, { "epoch": 1.8525258069282895, "grad_norm": 0.331529438495636, "learning_rate": 1.6469651967491173e-07, "loss": 0.0066, "step": 219390 }, { "epoch": 1.8526102467754533, "grad_norm": 0.016885045915842056, "learning_rate": 1.645090019371831e-07, "loss": 0.0063, "step": 219400 }, { "epoch": 1.852694686622617, "grad_norm": 0.3511061668395996, "learning_rate": 1.6432158922532805e-07, "loss": 0.0057, "step": 219410 }, { "epoch": 1.852779126469781, "grad_norm": 0.36151379346847534, "learning_rate": 1.641342815434177e-07, "loss": 0.0031, "step": 219420 }, { "epoch": 1.852863566316945, "grad_norm": 0.006365749053657055, "learning_rate": 1.6394707889551886e-07, "loss": 0.0076, "step": 219430 }, { "epoch": 1.8529480061641088, "grad_norm": 0.14578814804553986, "learning_rate": 1.6375998128569936e-07, "loss": 0.0053, "step": 219440 }, { "epoch": 1.8530324460112726, "grad_norm": 0.745496392250061, "learning_rate": 1.6357298871802098e-07, "loss": 0.0132, "step": 219450 }, { "epoch": 1.8531168858584366, "grad_norm": 0.27141886949539185, "learning_rate": 1.633861011965471e-07, "loss": 0.005, "step": 219460 }, { "epoch": 1.8532013257056006, "grad_norm": 0.2559909522533417, "learning_rate": 1.6319931872533612e-07, "loss": 0.006, "step": 219470 }, { "epoch": 1.8532857655527644, "grad_norm": 0.29079440236091614, "learning_rate": 1.630126413084443e-07, "loss": 0.0024, "step": 219480 }, { "epoch": 1.8533702053999281, "grad_norm": 0.17270910739898682, "learning_rate": 1.6282606894992668e-07, "loss": 0.0028, "step": 219490 }, { "epoch": 1.8534546452470921, "grad_norm": 0.49234873056411743, "learning_rate": 1.6263960165383563e-07, "loss": 0.0068, "step": 219500 }, { "epoch": 1.8535390850942561, "grad_norm": 0.014094292186200619, "learning_rate": 1.624532394242212e-07, "loss": 0.0043, "step": 219510 }, { "epoch": 1.85362352494142, "grad_norm": 0.014059633016586304, "learning_rate": 1.6226698226513126e-07, "loss": 0.0048, "step": 219520 }, { "epoch": 1.8537079647885837, "grad_norm": 1.3686354160308838, "learning_rate": 1.6208083018061093e-07, "loss": 0.0186, "step": 219530 }, { "epoch": 1.8537924046357475, "grad_norm": 0.23631282150745392, "learning_rate": 1.6189478317470419e-07, "loss": 0.0044, "step": 219540 }, { "epoch": 1.8538768444829115, "grad_norm": 0.06000027060508728, "learning_rate": 1.6170884125145114e-07, "loss": 0.0251, "step": 219550 }, { "epoch": 1.8539612843300755, "grad_norm": 0.22713792324066162, "learning_rate": 1.6152300441488965e-07, "loss": 0.0078, "step": 219560 }, { "epoch": 1.8540457241772392, "grad_norm": 0.3350580036640167, "learning_rate": 1.613372726690582e-07, "loss": 0.0039, "step": 219570 }, { "epoch": 1.854130164024403, "grad_norm": 0.308676153421402, "learning_rate": 1.611516460179885e-07, "loss": 0.0061, "step": 219580 }, { "epoch": 1.854214603871567, "grad_norm": 0.4514080882072449, "learning_rate": 1.6096612446571458e-07, "loss": 0.006, "step": 219590 }, { "epoch": 1.854299043718731, "grad_norm": 0.03720539063215256, "learning_rate": 1.607807080162638e-07, "loss": 0.0055, "step": 219600 }, { "epoch": 1.8543834835658948, "grad_norm": 0.028454232960939407, "learning_rate": 1.6059539667366507e-07, "loss": 0.0092, "step": 219610 }, { "epoch": 1.8544679234130585, "grad_norm": 0.6188083291053772, "learning_rate": 1.604101904419425e-07, "loss": 0.0068, "step": 219620 }, { "epoch": 1.8545523632602225, "grad_norm": 0.2526944577693939, "learning_rate": 1.6022508932511893e-07, "loss": 0.0049, "step": 219630 }, { "epoch": 1.8546368031073863, "grad_norm": 0.08440542221069336, "learning_rate": 1.6004009332721448e-07, "loss": 0.0064, "step": 219640 }, { "epoch": 1.8547212429545503, "grad_norm": 0.33314189314842224, "learning_rate": 1.598552024522476e-07, "loss": 0.0074, "step": 219650 }, { "epoch": 1.854805682801714, "grad_norm": 0.18144552409648895, "learning_rate": 1.59670416704234e-07, "loss": 0.0039, "step": 219660 }, { "epoch": 1.8548901226488779, "grad_norm": 0.38025596737861633, "learning_rate": 1.5948573608718653e-07, "loss": 0.0054, "step": 219670 }, { "epoch": 1.8549745624960419, "grad_norm": 0.4072250425815582, "learning_rate": 1.593011606051176e-07, "loss": 0.0031, "step": 219680 }, { "epoch": 1.8550590023432059, "grad_norm": 0.2870437204837799, "learning_rate": 1.5911669026203445e-07, "loss": 0.0031, "step": 219690 }, { "epoch": 1.8551434421903696, "grad_norm": 0.01970915123820305, "learning_rate": 1.5893232506194622e-07, "loss": 0.0081, "step": 219700 }, { "epoch": 1.8552278820375334, "grad_norm": 0.0010167175205424428, "learning_rate": 1.587480650088552e-07, "loss": 0.0074, "step": 219710 }, { "epoch": 1.8553123218846974, "grad_norm": 0.2562394142150879, "learning_rate": 1.585639101067643e-07, "loss": 0.0048, "step": 219720 }, { "epoch": 1.8553967617318614, "grad_norm": 0.22123946249485016, "learning_rate": 1.5837986035967257e-07, "loss": 0.0035, "step": 219730 }, { "epoch": 1.8554812015790252, "grad_norm": 0.3344797194004059, "learning_rate": 1.5819591577157844e-07, "loss": 0.0086, "step": 219740 }, { "epoch": 1.855565641426189, "grad_norm": 0.26883071660995483, "learning_rate": 1.580120763464771e-07, "loss": 0.0069, "step": 219750 }, { "epoch": 1.8556500812733527, "grad_norm": 1.04225492477417, "learning_rate": 1.5782834208836083e-07, "loss": 0.0125, "step": 219760 }, { "epoch": 1.8557345211205167, "grad_norm": 0.5583747029304504, "learning_rate": 1.5764471300122153e-07, "loss": 0.0055, "step": 219770 }, { "epoch": 1.8558189609676807, "grad_norm": 0.3750056326389313, "learning_rate": 1.5746118908904596e-07, "loss": 0.0053, "step": 219780 }, { "epoch": 1.8559034008148445, "grad_norm": 0.4369261860847473, "learning_rate": 1.5727777035582147e-07, "loss": 0.0059, "step": 219790 }, { "epoch": 1.8559878406620083, "grad_norm": 0.10147463530302048, "learning_rate": 1.5709445680553104e-07, "loss": 0.0048, "step": 219800 }, { "epoch": 1.8560722805091723, "grad_norm": 0.1517479121685028, "learning_rate": 1.56911248442157e-07, "loss": 0.0034, "step": 219810 }, { "epoch": 1.8561567203563363, "grad_norm": 0.005421113688498735, "learning_rate": 1.5672814526967784e-07, "loss": 0.0021, "step": 219820 }, { "epoch": 1.8562411602035, "grad_norm": 0.1958141028881073, "learning_rate": 1.565451472920715e-07, "loss": 0.0051, "step": 219830 }, { "epoch": 1.8563256000506638, "grad_norm": 0.1271160989999771, "learning_rate": 1.563622545133109e-07, "loss": 0.005, "step": 219840 }, { "epoch": 1.8564100398978278, "grad_norm": 0.22995121777057648, "learning_rate": 1.5617946693737063e-07, "loss": 0.003, "step": 219850 }, { "epoch": 1.8564944797449918, "grad_norm": 0.16904880106449127, "learning_rate": 1.5599678456821977e-07, "loss": 0.0044, "step": 219860 }, { "epoch": 1.8565789195921556, "grad_norm": 0.23335053026676178, "learning_rate": 1.5581420740982567e-07, "loss": 0.0053, "step": 219870 }, { "epoch": 1.8566633594393194, "grad_norm": 0.19449178874492645, "learning_rate": 1.5563173546615462e-07, "loss": 0.0111, "step": 219880 }, { "epoch": 1.8567477992864831, "grad_norm": 0.3485009968280792, "learning_rate": 1.55449368741169e-07, "loss": 0.0043, "step": 219890 }, { "epoch": 1.8568322391336471, "grad_norm": 0.3921482264995575, "learning_rate": 1.5526710723883065e-07, "loss": 0.0035, "step": 219900 }, { "epoch": 1.8569166789808111, "grad_norm": 0.13649657368659973, "learning_rate": 1.5508495096309805e-07, "loss": 0.01, "step": 219910 }, { "epoch": 1.857001118827975, "grad_norm": 0.08699371665716171, "learning_rate": 1.549028999179275e-07, "loss": 0.0046, "step": 219920 }, { "epoch": 1.8570855586751387, "grad_norm": 0.28130748867988586, "learning_rate": 1.547209541072725e-07, "loss": 0.0041, "step": 219930 }, { "epoch": 1.8571699985223027, "grad_norm": 0.07221588492393494, "learning_rate": 1.5453911353508656e-07, "loss": 0.0035, "step": 219940 }, { "epoch": 1.8572544383694667, "grad_norm": 0.18294815719127655, "learning_rate": 1.543573782053176e-07, "loss": 0.0054, "step": 219950 }, { "epoch": 1.8573388782166305, "grad_norm": 0.1610564887523651, "learning_rate": 1.5417574812191361e-07, "loss": 0.005, "step": 219960 }, { "epoch": 1.8574233180637942, "grad_norm": 0.023931952193379402, "learning_rate": 1.5399422328881863e-07, "loss": 0.002, "step": 219970 }, { "epoch": 1.8575077579109582, "grad_norm": 0.391080766916275, "learning_rate": 1.5381280370997675e-07, "loss": 0.0058, "step": 219980 }, { "epoch": 1.857592197758122, "grad_norm": 0.2934572100639343, "learning_rate": 1.5363148938932758e-07, "loss": 0.0038, "step": 219990 }, { "epoch": 1.857676637605286, "grad_norm": 0.1666223108768463, "learning_rate": 1.5345028033080967e-07, "loss": 0.0066, "step": 220000 }, { "epoch": 1.8577610774524498, "grad_norm": 0.2718411386013031, "learning_rate": 1.5326917653835815e-07, "loss": 0.0086, "step": 220010 }, { "epoch": 1.8578455172996136, "grad_norm": 0.09990305453538895, "learning_rate": 1.5308817801590658e-07, "loss": 0.0067, "step": 220020 }, { "epoch": 1.8579299571467776, "grad_norm": 0.4244624972343445, "learning_rate": 1.5290728476738737e-07, "loss": 0.0081, "step": 220030 }, { "epoch": 1.8580143969939416, "grad_norm": 0.334441602230072, "learning_rate": 1.5272649679672847e-07, "loss": 0.0055, "step": 220040 }, { "epoch": 1.8580988368411053, "grad_norm": 0.25676706433296204, "learning_rate": 1.5254581410785674e-07, "loss": 0.0031, "step": 220050 }, { "epoch": 1.858183276688269, "grad_norm": 0.14647434651851654, "learning_rate": 1.5236523670469571e-07, "loss": 0.0089, "step": 220060 }, { "epoch": 1.858267716535433, "grad_norm": 0.06420246511697769, "learning_rate": 1.521847645911695e-07, "loss": 0.0029, "step": 220070 }, { "epoch": 1.858352156382597, "grad_norm": 0.12136182188987732, "learning_rate": 1.5200439777119602e-07, "loss": 0.0074, "step": 220080 }, { "epoch": 1.8584365962297609, "grad_norm": 1.6697540283203125, "learning_rate": 1.5182413624869386e-07, "loss": 0.0093, "step": 220090 }, { "epoch": 1.8585210360769246, "grad_norm": 0.22721119225025177, "learning_rate": 1.5164398002757763e-07, "loss": 0.0064, "step": 220100 }, { "epoch": 1.8586054759240884, "grad_norm": 0.13818512856960297, "learning_rate": 1.51463929111762e-07, "loss": 0.0036, "step": 220110 }, { "epoch": 1.8586899157712524, "grad_norm": 0.09955161064863205, "learning_rate": 1.512839835051555e-07, "loss": 0.0079, "step": 220120 }, { "epoch": 1.8587743556184164, "grad_norm": 0.2217644304037094, "learning_rate": 1.5110414321166667e-07, "loss": 0.0037, "step": 220130 }, { "epoch": 1.8588587954655802, "grad_norm": 0.01045155804604292, "learning_rate": 1.5092440823520237e-07, "loss": 0.006, "step": 220140 }, { "epoch": 1.858943235312744, "grad_norm": 0.16662201285362244, "learning_rate": 1.5074477857966618e-07, "loss": 0.0058, "step": 220150 }, { "epoch": 1.859027675159908, "grad_norm": 0.31451019644737244, "learning_rate": 1.505652542489605e-07, "loss": 0.0096, "step": 220160 }, { "epoch": 1.859112115007072, "grad_norm": 0.07690814137458801, "learning_rate": 1.503858352469828e-07, "loss": 0.0063, "step": 220170 }, { "epoch": 1.8591965548542357, "grad_norm": 0.29709118604660034, "learning_rate": 1.502065215776316e-07, "loss": 0.0091, "step": 220180 }, { "epoch": 1.8592809947013995, "grad_norm": 0.3751344382762909, "learning_rate": 1.50027313244801e-07, "loss": 0.0051, "step": 220190 }, { "epoch": 1.8593654345485635, "grad_norm": 0.3099175691604614, "learning_rate": 1.498482102523835e-07, "loss": 0.008, "step": 220200 }, { "epoch": 1.8594498743957275, "grad_norm": 0.17406320571899414, "learning_rate": 1.4966921260426813e-07, "loss": 0.0052, "step": 220210 }, { "epoch": 1.8595343142428913, "grad_norm": 0.1722194403409958, "learning_rate": 1.4949032030434407e-07, "loss": 0.0037, "step": 220220 }, { "epoch": 1.859618754090055, "grad_norm": 0.2589118480682373, "learning_rate": 1.4931153335649595e-07, "loss": 0.0037, "step": 220230 }, { "epoch": 1.8597031939372188, "grad_norm": 0.3242325782775879, "learning_rate": 1.4913285176460736e-07, "loss": 0.0042, "step": 220240 }, { "epoch": 1.8597876337843828, "grad_norm": 0.27575981616973877, "learning_rate": 1.489542755325596e-07, "loss": 0.0113, "step": 220250 }, { "epoch": 1.8598720736315468, "grad_norm": 0.03725181519985199, "learning_rate": 1.487758046642307e-07, "loss": 0.002, "step": 220260 }, { "epoch": 1.8599565134787106, "grad_norm": 0.0006555567379109561, "learning_rate": 1.48597439163497e-07, "loss": 0.0041, "step": 220270 }, { "epoch": 1.8600409533258744, "grad_norm": 0.14289215207099915, "learning_rate": 1.484191790342332e-07, "loss": 0.0049, "step": 220280 }, { "epoch": 1.8601253931730384, "grad_norm": 0.1749039739370346, "learning_rate": 1.4824102428031062e-07, "loss": 0.0052, "step": 220290 }, { "epoch": 1.8602098330202024, "grad_norm": 0.1924365609884262, "learning_rate": 1.480629749055984e-07, "loss": 0.0056, "step": 220300 }, { "epoch": 1.8602942728673661, "grad_norm": 0.45643144845962524, "learning_rate": 1.4788503091396454e-07, "loss": 0.0053, "step": 220310 }, { "epoch": 1.86037871271453, "grad_norm": 0.12980219721794128, "learning_rate": 1.4770719230927266e-07, "loss": 0.0102, "step": 220320 }, { "epoch": 1.8604631525616937, "grad_norm": 0.05566044896841049, "learning_rate": 1.4752945909538686e-07, "loss": 0.0065, "step": 220330 }, { "epoch": 1.8605475924088577, "grad_norm": 0.1588132679462433, "learning_rate": 1.473518312761668e-07, "loss": 0.0048, "step": 220340 }, { "epoch": 1.8606320322560217, "grad_norm": 0.5849225521087646, "learning_rate": 1.4717430885547058e-07, "loss": 0.0075, "step": 220350 }, { "epoch": 1.8607164721031855, "grad_norm": 0.10675458610057831, "learning_rate": 1.4699689183715448e-07, "loss": 0.004, "step": 220360 }, { "epoch": 1.8608009119503492, "grad_norm": 0.4527025818824768, "learning_rate": 1.4681958022507103e-07, "loss": 0.0124, "step": 220370 }, { "epoch": 1.8608853517975132, "grad_norm": 0.14857615530490875, "learning_rate": 1.4664237402307157e-07, "loss": 0.005, "step": 220380 }, { "epoch": 1.8609697916446772, "grad_norm": 0.10797733813524246, "learning_rate": 1.464652732350047e-07, "loss": 0.0044, "step": 220390 }, { "epoch": 1.861054231491841, "grad_norm": 0.5339713096618652, "learning_rate": 1.4628827786471843e-07, "loss": 0.0065, "step": 220400 }, { "epoch": 1.8611386713390048, "grad_norm": 0.6134420037269592, "learning_rate": 1.4611138791605528e-07, "loss": 0.0126, "step": 220410 }, { "epoch": 1.8612231111861688, "grad_norm": 0.4934011399745941, "learning_rate": 1.459346033928588e-07, "loss": 0.0046, "step": 220420 }, { "epoch": 1.8613075510333328, "grad_norm": 0.14625860750675201, "learning_rate": 1.4575792429896818e-07, "loss": 0.0059, "step": 220430 }, { "epoch": 1.8613919908804966, "grad_norm": 0.36407315731048584, "learning_rate": 1.455813506382209e-07, "loss": 0.0073, "step": 220440 }, { "epoch": 1.8614764307276603, "grad_norm": 0.31731271743774414, "learning_rate": 1.4540488241445104e-07, "loss": 0.0052, "step": 220450 }, { "epoch": 1.8615608705748241, "grad_norm": 0.03604138270020485, "learning_rate": 1.4522851963149288e-07, "loss": 0.0046, "step": 220460 }, { "epoch": 1.861645310421988, "grad_norm": 0.24406354129314423, "learning_rate": 1.4505226229317604e-07, "loss": 0.0056, "step": 220470 }, { "epoch": 1.861729750269152, "grad_norm": 0.17602810263633728, "learning_rate": 1.4487611040332917e-07, "loss": 0.0059, "step": 220480 }, { "epoch": 1.8618141901163159, "grad_norm": 0.3502714931964874, "learning_rate": 1.4470006396577864e-07, "loss": 0.0084, "step": 220490 }, { "epoch": 1.8618986299634797, "grad_norm": 0.07851371169090271, "learning_rate": 1.445241229843475e-07, "loss": 0.0039, "step": 220500 }, { "epoch": 1.8619830698106437, "grad_norm": 0.2836335599422455, "learning_rate": 1.4434828746285769e-07, "loss": 0.0058, "step": 220510 }, { "epoch": 1.8620675096578077, "grad_norm": 0.42388978600502014, "learning_rate": 1.4417255740512838e-07, "loss": 0.0163, "step": 220520 }, { "epoch": 1.8621519495049714, "grad_norm": 0.06398522108793259, "learning_rate": 1.4399693281497596e-07, "loss": 0.0036, "step": 220530 }, { "epoch": 1.8622363893521352, "grad_norm": 0.3615942597389221, "learning_rate": 1.4382141369621406e-07, "loss": 0.0051, "step": 220540 }, { "epoch": 1.8623208291992992, "grad_norm": 0.28722184896469116, "learning_rate": 1.4364600005265682e-07, "loss": 0.0062, "step": 220550 }, { "epoch": 1.862405269046463, "grad_norm": 0.010461952537298203, "learning_rate": 1.434706918881129e-07, "loss": 0.0027, "step": 220560 }, { "epoch": 1.862489708893627, "grad_norm": 0.2932230234146118, "learning_rate": 1.4329548920639092e-07, "loss": 0.0063, "step": 220570 }, { "epoch": 1.8625741487407907, "grad_norm": 0.07665096968412399, "learning_rate": 1.4312039201129446e-07, "loss": 0.0042, "step": 220580 }, { "epoch": 1.8626585885879545, "grad_norm": 0.1892724186182022, "learning_rate": 1.4294540030662885e-07, "loss": 0.0042, "step": 220590 }, { "epoch": 1.8627430284351185, "grad_norm": 0.2442592978477478, "learning_rate": 1.4277051409619437e-07, "loss": 0.0034, "step": 220600 }, { "epoch": 1.8628274682822825, "grad_norm": 0.320670485496521, "learning_rate": 1.4259573338378795e-07, "loss": 0.0064, "step": 220610 }, { "epoch": 1.8629119081294463, "grad_norm": 0.22498098015785217, "learning_rate": 1.4242105817320773e-07, "loss": 0.0053, "step": 220620 }, { "epoch": 1.86299634797661, "grad_norm": 0.39608055353164673, "learning_rate": 1.4224648846824562e-07, "loss": 0.0061, "step": 220630 }, { "epoch": 1.863080787823774, "grad_norm": 0.012477018870413303, "learning_rate": 1.4207202427269528e-07, "loss": 0.0087, "step": 220640 }, { "epoch": 1.863165227670938, "grad_norm": 0.24846181273460388, "learning_rate": 1.4189766559034424e-07, "loss": 0.0043, "step": 220650 }, { "epoch": 1.8632496675181018, "grad_norm": 0.11804146319627762, "learning_rate": 1.4172341242498056e-07, "loss": 0.0038, "step": 220660 }, { "epoch": 1.8633341073652656, "grad_norm": 0.17204658687114716, "learning_rate": 1.41549264780389e-07, "loss": 0.0053, "step": 220670 }, { "epoch": 1.8634185472124294, "grad_norm": 0.0685531347990036, "learning_rate": 1.413752226603521e-07, "loss": 0.0078, "step": 220680 }, { "epoch": 1.8635029870595934, "grad_norm": 0.07297814637422562, "learning_rate": 1.4120128606864959e-07, "loss": 0.0094, "step": 220690 }, { "epoch": 1.8635874269067574, "grad_norm": 0.08303613215684891, "learning_rate": 1.4102745500905957e-07, "loss": 0.0058, "step": 220700 }, { "epoch": 1.8636718667539212, "grad_norm": 0.09222517907619476, "learning_rate": 1.4085372948535736e-07, "loss": 0.0071, "step": 220710 }, { "epoch": 1.863756306601085, "grad_norm": 0.18553371727466583, "learning_rate": 1.4068010950131606e-07, "loss": 0.0054, "step": 220720 }, { "epoch": 1.863840746448249, "grad_norm": 0.044745251536369324, "learning_rate": 1.4050659506070763e-07, "loss": 0.0085, "step": 220730 }, { "epoch": 1.863925186295413, "grad_norm": 0.39936938881874084, "learning_rate": 1.4033318616729962e-07, "loss": 0.0076, "step": 220740 }, { "epoch": 1.8640096261425767, "grad_norm": 0.11719249933958054, "learning_rate": 1.4015988282485958e-07, "loss": 0.0091, "step": 220750 }, { "epoch": 1.8640940659897405, "grad_norm": 0.11491497606039047, "learning_rate": 1.3998668503715062e-07, "loss": 0.0053, "step": 220760 }, { "epoch": 1.8641785058369045, "grad_norm": 0.33536699414253235, "learning_rate": 1.3981359280793527e-07, "loss": 0.0057, "step": 220770 }, { "epoch": 1.8642629456840685, "grad_norm": 0.3080991208553314, "learning_rate": 1.396406061409722e-07, "loss": 0.0063, "step": 220780 }, { "epoch": 1.8643473855312322, "grad_norm": 0.5308783054351807, "learning_rate": 1.394677250400195e-07, "loss": 0.0053, "step": 220790 }, { "epoch": 1.864431825378396, "grad_norm": 0.45794740319252014, "learning_rate": 1.3929494950883138e-07, "loss": 0.011, "step": 220800 }, { "epoch": 1.8645162652255598, "grad_norm": 0.2622171640396118, "learning_rate": 1.3912227955116153e-07, "loss": 0.0053, "step": 220810 }, { "epoch": 1.8646007050727238, "grad_norm": 0.0022495544981211424, "learning_rate": 1.3894971517075918e-07, "loss": 0.0043, "step": 220820 }, { "epoch": 1.8646851449198878, "grad_norm": 0.3035699427127838, "learning_rate": 1.3877725637137297e-07, "loss": 0.0049, "step": 220830 }, { "epoch": 1.8647695847670516, "grad_norm": 0.05613984167575836, "learning_rate": 1.386049031567488e-07, "loss": 0.0051, "step": 220840 }, { "epoch": 1.8648540246142153, "grad_norm": 0.6672411561012268, "learning_rate": 1.3843265553063035e-07, "loss": 0.0049, "step": 220850 }, { "epoch": 1.8649384644613793, "grad_norm": 0.1221388578414917, "learning_rate": 1.3826051349675796e-07, "loss": 0.0038, "step": 220860 }, { "epoch": 1.8650229043085433, "grad_norm": 0.21231709420681, "learning_rate": 1.3808847705887085e-07, "loss": 0.0052, "step": 220870 }, { "epoch": 1.8651073441557071, "grad_norm": 0.013196907937526703, "learning_rate": 1.379165462207055e-07, "loss": 0.0034, "step": 220880 }, { "epoch": 1.865191784002871, "grad_norm": 0.19405119121074677, "learning_rate": 1.3774472098599668e-07, "loss": 0.015, "step": 220890 }, { "epoch": 1.8652762238500349, "grad_norm": 0.2541571855545044, "learning_rate": 1.375730013584764e-07, "loss": 0.0057, "step": 220900 }, { "epoch": 1.8653606636971987, "grad_norm": 0.26303476095199585, "learning_rate": 1.3740138734187335e-07, "loss": 0.0024, "step": 220910 }, { "epoch": 1.8654451035443627, "grad_norm": 0.2859123945236206, "learning_rate": 1.372298789399168e-07, "loss": 0.0054, "step": 220920 }, { "epoch": 1.8655295433915264, "grad_norm": 0.18032248318195343, "learning_rate": 1.370584761563304e-07, "loss": 0.0067, "step": 220930 }, { "epoch": 1.8656139832386902, "grad_norm": 0.03580015152692795, "learning_rate": 1.3688717899483727e-07, "loss": 0.0083, "step": 220940 }, { "epoch": 1.8656984230858542, "grad_norm": 0.8745840787887573, "learning_rate": 1.3671598745915837e-07, "loss": 0.0127, "step": 220950 }, { "epoch": 1.8657828629330182, "grad_norm": 0.28202950954437256, "learning_rate": 1.365449015530107e-07, "loss": 0.011, "step": 220960 }, { "epoch": 1.865867302780182, "grad_norm": 0.12520422041416168, "learning_rate": 1.363739212801124e-07, "loss": 0.004, "step": 220970 }, { "epoch": 1.8659517426273458, "grad_norm": 0.46006670594215393, "learning_rate": 1.3620304664417494e-07, "loss": 0.005, "step": 220980 }, { "epoch": 1.8660361824745098, "grad_norm": 0.1608394980430603, "learning_rate": 1.3603227764891092e-07, "loss": 0.0084, "step": 220990 }, { "epoch": 1.8661206223216737, "grad_norm": 0.4031074345111847, "learning_rate": 1.3586161429802902e-07, "loss": 0.0074, "step": 221000 }, { "epoch": 1.8662050621688375, "grad_norm": 0.10011495649814606, "learning_rate": 1.3569105659523685e-07, "loss": 0.0022, "step": 221010 }, { "epoch": 1.8662895020160013, "grad_norm": 0.08170448988676071, "learning_rate": 1.35520604544237e-07, "loss": 0.0038, "step": 221020 }, { "epoch": 1.866373941863165, "grad_norm": 0.029621172696352005, "learning_rate": 1.353502581487337e-07, "loss": 0.0036, "step": 221030 }, { "epoch": 1.866458381710329, "grad_norm": 0.288853257894516, "learning_rate": 1.3518001741242514e-07, "loss": 0.0081, "step": 221040 }, { "epoch": 1.866542821557493, "grad_norm": 0.14028115570545197, "learning_rate": 1.3500988233901002e-07, "loss": 0.0034, "step": 221050 }, { "epoch": 1.8666272614046568, "grad_norm": 0.21754296123981476, "learning_rate": 1.3483985293218316e-07, "loss": 0.0027, "step": 221060 }, { "epoch": 1.8667117012518206, "grad_norm": 0.07259116321802139, "learning_rate": 1.3466992919563825e-07, "loss": 0.0121, "step": 221070 }, { "epoch": 1.8667961410989846, "grad_norm": 0.20363084971904755, "learning_rate": 1.3450011113306517e-07, "loss": 0.0073, "step": 221080 }, { "epoch": 1.8668805809461486, "grad_norm": 0.7217728495597839, "learning_rate": 1.3433039874815313e-07, "loss": 0.0071, "step": 221090 }, { "epoch": 1.8669650207933124, "grad_norm": 0.23133213818073273, "learning_rate": 1.3416079204458699e-07, "loss": 0.006, "step": 221100 }, { "epoch": 1.8670494606404762, "grad_norm": 0.22725039720535278, "learning_rate": 1.3399129102605158e-07, "loss": 0.0057, "step": 221110 }, { "epoch": 1.8671339004876402, "grad_norm": 0.16628070175647736, "learning_rate": 1.338218956962284e-07, "loss": 0.0056, "step": 221120 }, { "epoch": 1.867218340334804, "grad_norm": 0.3029763996601105, "learning_rate": 1.3365260605879614e-07, "loss": 0.0071, "step": 221130 }, { "epoch": 1.867302780181968, "grad_norm": 0.33948376774787903, "learning_rate": 1.334834221174325e-07, "loss": 0.0086, "step": 221140 }, { "epoch": 1.8673872200291317, "grad_norm": 0.2649664282798767, "learning_rate": 1.333143438758111e-07, "loss": 0.0033, "step": 221150 }, { "epoch": 1.8674716598762955, "grad_norm": 0.114251047372818, "learning_rate": 1.331453713376052e-07, "loss": 0.0072, "step": 221160 }, { "epoch": 1.8675560997234595, "grad_norm": 0.3803688883781433, "learning_rate": 1.3297650450648458e-07, "loss": 0.0046, "step": 221170 }, { "epoch": 1.8676405395706235, "grad_norm": 0.34581515192985535, "learning_rate": 1.328077433861169e-07, "loss": 0.0199, "step": 221180 }, { "epoch": 1.8677249794177873, "grad_norm": 0.09130079299211502, "learning_rate": 1.326390879801681e-07, "loss": 0.0062, "step": 221190 }, { "epoch": 1.867809419264951, "grad_norm": 0.11819500476121902, "learning_rate": 1.3247053829230028e-07, "loss": 0.0067, "step": 221200 }, { "epoch": 1.867893859112115, "grad_norm": 0.14322441816329956, "learning_rate": 1.323020943261749e-07, "loss": 0.0064, "step": 221210 }, { "epoch": 1.867978298959279, "grad_norm": 0.12883412837982178, "learning_rate": 1.3213375608545075e-07, "loss": 0.0064, "step": 221220 }, { "epoch": 1.8680627388064428, "grad_norm": 0.12916885316371918, "learning_rate": 1.319655235737838e-07, "loss": 0.0032, "step": 221230 }, { "epoch": 1.8681471786536066, "grad_norm": 0.37614870071411133, "learning_rate": 1.3179739679482772e-07, "loss": 0.0072, "step": 221240 }, { "epoch": 1.8682316185007704, "grad_norm": 0.2886533737182617, "learning_rate": 1.3162937575223522e-07, "loss": 0.0056, "step": 221250 }, { "epoch": 1.8683160583479343, "grad_norm": 0.19174431264400482, "learning_rate": 1.3146146044965503e-07, "loss": 0.0082, "step": 221260 }, { "epoch": 1.8684004981950983, "grad_norm": 0.013618916273117065, "learning_rate": 1.312936508907342e-07, "loss": 0.0043, "step": 221270 }, { "epoch": 1.8684849380422621, "grad_norm": 0.38466909527778625, "learning_rate": 1.3112594707911764e-07, "loss": 0.0067, "step": 221280 }, { "epoch": 1.868569377889426, "grad_norm": 0.5956031084060669, "learning_rate": 1.309583490184474e-07, "loss": 0.0075, "step": 221290 }, { "epoch": 1.86865381773659, "grad_norm": 0.14357393980026245, "learning_rate": 1.3079085671236448e-07, "loss": 0.0021, "step": 221300 }, { "epoch": 1.868738257583754, "grad_norm": 0.10263153910636902, "learning_rate": 1.3062347016450595e-07, "loss": 0.0094, "step": 221310 }, { "epoch": 1.8688226974309177, "grad_norm": 0.14488404989242554, "learning_rate": 1.3045618937850836e-07, "loss": 0.0112, "step": 221320 }, { "epoch": 1.8689071372780814, "grad_norm": 0.3872363567352295, "learning_rate": 1.3028901435800433e-07, "loss": 0.0068, "step": 221330 }, { "epoch": 1.8689915771252454, "grad_norm": 0.18671464920043945, "learning_rate": 1.301219451066249e-07, "loss": 0.006, "step": 221340 }, { "epoch": 1.8690760169724094, "grad_norm": 0.2523171901702881, "learning_rate": 1.299549816279988e-07, "loss": 0.0047, "step": 221350 }, { "epoch": 1.8691604568195732, "grad_norm": 0.0928531140089035, "learning_rate": 1.2978812392575258e-07, "loss": 0.006, "step": 221360 }, { "epoch": 1.869244896666737, "grad_norm": 0.19004391133785248, "learning_rate": 1.2962137200351e-07, "loss": 0.0076, "step": 221370 }, { "epoch": 1.8693293365139008, "grad_norm": 0.3520011603832245, "learning_rate": 1.2945472586489372e-07, "loss": 0.004, "step": 221380 }, { "epoch": 1.8694137763610648, "grad_norm": 0.5633141398429871, "learning_rate": 1.2928818551352195e-07, "loss": 0.0059, "step": 221390 }, { "epoch": 1.8694982162082288, "grad_norm": 0.5936441421508789, "learning_rate": 1.291217509530135e-07, "loss": 0.0052, "step": 221400 }, { "epoch": 1.8695826560553925, "grad_norm": 0.1168987825512886, "learning_rate": 1.289554221869821e-07, "loss": 0.0033, "step": 221410 }, { "epoch": 1.8696670959025563, "grad_norm": 0.2851458787918091, "learning_rate": 1.2878919921904098e-07, "loss": 0.008, "step": 221420 }, { "epoch": 1.8697515357497203, "grad_norm": 0.19517968595027924, "learning_rate": 1.286230820528006e-07, "loss": 0.0048, "step": 221430 }, { "epoch": 1.8698359755968843, "grad_norm": 0.3671623468399048, "learning_rate": 1.2845707069186754e-07, "loss": 0.0067, "step": 221440 }, { "epoch": 1.869920415444048, "grad_norm": 0.32213085889816284, "learning_rate": 1.282911651398494e-07, "loss": 0.0036, "step": 221450 }, { "epoch": 1.8700048552912119, "grad_norm": 0.21579407155513763, "learning_rate": 1.2812536540034837e-07, "loss": 0.0048, "step": 221460 }, { "epoch": 1.8700892951383759, "grad_norm": 0.2963508367538452, "learning_rate": 1.2795967147696654e-07, "loss": 0.0063, "step": 221470 }, { "epoch": 1.8701737349855396, "grad_norm": 0.10645181685686111, "learning_rate": 1.2779408337330213e-07, "loss": 0.0058, "step": 221480 }, { "epoch": 1.8702581748327036, "grad_norm": 0.887019693851471, "learning_rate": 1.2762860109295172e-07, "loss": 0.0072, "step": 221490 }, { "epoch": 1.8703426146798674, "grad_norm": 0.22128407657146454, "learning_rate": 1.2746322463951022e-07, "loss": 0.0044, "step": 221500 }, { "epoch": 1.8704270545270312, "grad_norm": 0.01163400150835514, "learning_rate": 1.2729795401656862e-07, "loss": 0.0099, "step": 221510 }, { "epoch": 1.8705114943741952, "grad_norm": 0.3738817572593689, "learning_rate": 1.2713278922771632e-07, "loss": 0.0053, "step": 221520 }, { "epoch": 1.8705959342213592, "grad_norm": 0.4044921398162842, "learning_rate": 1.2696773027654207e-07, "loss": 0.0033, "step": 221530 }, { "epoch": 1.870680374068523, "grad_norm": 0.5679775476455688, "learning_rate": 1.2680277716663027e-07, "loss": 0.0067, "step": 221540 }, { "epoch": 1.8707648139156867, "grad_norm": 0.09190017729997635, "learning_rate": 1.2663792990156242e-07, "loss": 0.0059, "step": 221550 }, { "epoch": 1.8708492537628507, "grad_norm": 0.42909860610961914, "learning_rate": 1.264731884849213e-07, "loss": 0.0051, "step": 221560 }, { "epoch": 1.8709336936100147, "grad_norm": 0.6890518665313721, "learning_rate": 1.2630855292028289e-07, "loss": 0.0077, "step": 221570 }, { "epoch": 1.8710181334571785, "grad_norm": 0.15199227631092072, "learning_rate": 1.2614402321122487e-07, "loss": 0.0035, "step": 221580 }, { "epoch": 1.8711025733043423, "grad_norm": 0.21211114525794983, "learning_rate": 1.2597959936131942e-07, "loss": 0.012, "step": 221590 }, { "epoch": 1.871187013151506, "grad_norm": 0.33345091342926025, "learning_rate": 1.2581528137413867e-07, "loss": 0.0111, "step": 221600 }, { "epoch": 1.87127145299867, "grad_norm": 0.24250032007694244, "learning_rate": 1.2565106925325033e-07, "loss": 0.006, "step": 221610 }, { "epoch": 1.871355892845834, "grad_norm": 0.12898530066013336, "learning_rate": 1.2548696300222263e-07, "loss": 0.0054, "step": 221620 }, { "epoch": 1.8714403326929978, "grad_norm": 0.2665720283985138, "learning_rate": 1.2532296262461886e-07, "loss": 0.0049, "step": 221630 }, { "epoch": 1.8715247725401616, "grad_norm": 0.37094712257385254, "learning_rate": 1.251590681240017e-07, "loss": 0.0062, "step": 221640 }, { "epoch": 1.8716092123873256, "grad_norm": 0.14342613518238068, "learning_rate": 1.2499527950393108e-07, "loss": 0.006, "step": 221650 }, { "epoch": 1.8716936522344896, "grad_norm": 0.23629851639270782, "learning_rate": 1.2483159676796365e-07, "loss": 0.0063, "step": 221660 }, { "epoch": 1.8717780920816534, "grad_norm": 0.10271436721086502, "learning_rate": 1.2466801991965483e-07, "loss": 0.0039, "step": 221670 }, { "epoch": 1.8718625319288171, "grad_norm": 0.43665003776550293, "learning_rate": 1.2450454896255738e-07, "loss": 0.0092, "step": 221680 }, { "epoch": 1.8719469717759811, "grad_norm": 0.09838758409023285, "learning_rate": 1.2434118390022232e-07, "loss": 0.007, "step": 221690 }, { "epoch": 1.8720314116231451, "grad_norm": 0.5393162965774536, "learning_rate": 1.2417792473619684e-07, "loss": 0.0027, "step": 221700 }, { "epoch": 1.872115851470309, "grad_norm": 0.6011455655097961, "learning_rate": 1.2401477147402863e-07, "loss": 0.0071, "step": 221710 }, { "epoch": 1.8722002913174727, "grad_norm": 0.03560381010174751, "learning_rate": 1.2385172411725987e-07, "loss": 0.0056, "step": 221720 }, { "epoch": 1.8722847311646365, "grad_norm": 0.35113948583602905, "learning_rate": 1.2368878266943274e-07, "loss": 0.0101, "step": 221730 }, { "epoch": 1.8723691710118004, "grad_norm": 0.1404293328523636, "learning_rate": 1.2352594713408607e-07, "loss": 0.0052, "step": 221740 }, { "epoch": 1.8724536108589644, "grad_norm": 0.17010855674743652, "learning_rate": 1.2336321751475644e-07, "loss": 0.0087, "step": 221750 }, { "epoch": 1.8725380507061282, "grad_norm": 0.43518027663230896, "learning_rate": 1.2320059381497828e-07, "loss": 0.0211, "step": 221760 }, { "epoch": 1.872622490553292, "grad_norm": 0.652131199836731, "learning_rate": 1.2303807603828433e-07, "loss": 0.0067, "step": 221770 }, { "epoch": 1.872706930400456, "grad_norm": 0.34321001172065735, "learning_rate": 1.228756641882034e-07, "loss": 0.0064, "step": 221780 }, { "epoch": 1.87279137024762, "grad_norm": 0.24644814431667328, "learning_rate": 1.227133582682638e-07, "loss": 0.0074, "step": 221790 }, { "epoch": 1.8728758100947838, "grad_norm": 0.36678531765937805, "learning_rate": 1.225511582819905e-07, "loss": 0.0076, "step": 221800 }, { "epoch": 1.8729602499419475, "grad_norm": 0.052679501473903656, "learning_rate": 1.2238906423290676e-07, "loss": 0.0078, "step": 221810 }, { "epoch": 1.8730446897891113, "grad_norm": 0.18444840610027313, "learning_rate": 1.222270761245331e-07, "loss": 0.0071, "step": 221820 }, { "epoch": 1.8731291296362753, "grad_norm": 0.38432714343070984, "learning_rate": 1.2206519396038786e-07, "loss": 0.0057, "step": 221830 }, { "epoch": 1.8732135694834393, "grad_norm": 0.4540195167064667, "learning_rate": 1.2190341774398706e-07, "loss": 0.0043, "step": 221840 }, { "epoch": 1.873298009330603, "grad_norm": 0.1405949592590332, "learning_rate": 1.217417474788435e-07, "loss": 0.0058, "step": 221850 }, { "epoch": 1.8733824491777669, "grad_norm": 0.13354167342185974, "learning_rate": 1.2158018316847042e-07, "loss": 0.0054, "step": 221860 }, { "epoch": 1.8734668890249309, "grad_norm": 0.5675475001335144, "learning_rate": 1.214187248163756e-07, "loss": 0.0049, "step": 221870 }, { "epoch": 1.8735513288720949, "grad_norm": 0.0038828446995466948, "learning_rate": 1.2125737242606682e-07, "loss": 0.0042, "step": 221880 }, { "epoch": 1.8736357687192586, "grad_norm": 0.36708566546440125, "learning_rate": 1.210961260010479e-07, "loss": 0.0099, "step": 221890 }, { "epoch": 1.8737202085664224, "grad_norm": 0.3729441165924072, "learning_rate": 1.2093498554482164e-07, "loss": 0.0024, "step": 221900 }, { "epoch": 1.8738046484135864, "grad_norm": 0.34825399518013, "learning_rate": 1.2077395106088796e-07, "loss": 0.0052, "step": 221910 }, { "epoch": 1.8738890882607504, "grad_norm": 0.067608542740345, "learning_rate": 1.2061302255274355e-07, "loss": 0.0058, "step": 221920 }, { "epoch": 1.8739735281079142, "grad_norm": 0.24223710596561432, "learning_rate": 1.2045220002388446e-07, "loss": 0.0065, "step": 221930 }, { "epoch": 1.874057967955078, "grad_norm": 0.300944447517395, "learning_rate": 1.202914834778035e-07, "loss": 0.0064, "step": 221940 }, { "epoch": 1.8741424078022417, "grad_norm": 0.19417621195316315, "learning_rate": 1.2013087291799175e-07, "loss": 0.0076, "step": 221950 }, { "epoch": 1.8742268476494057, "grad_norm": 0.46774083375930786, "learning_rate": 1.1997036834793752e-07, "loss": 0.0054, "step": 221960 }, { "epoch": 1.8743112874965697, "grad_norm": 0.5402678847312927, "learning_rate": 1.1980996977112692e-07, "loss": 0.009, "step": 221970 }, { "epoch": 1.8743957273437335, "grad_norm": 0.1948252022266388, "learning_rate": 1.1964967719104382e-07, "loss": 0.0057, "step": 221980 }, { "epoch": 1.8744801671908973, "grad_norm": 0.23836086690425873, "learning_rate": 1.1948949061116987e-07, "loss": 0.0039, "step": 221990 }, { "epoch": 1.8745646070380613, "grad_norm": 0.15536196529865265, "learning_rate": 1.1932941003498344e-07, "loss": 0.0084, "step": 222000 }, { "epoch": 1.8746490468852253, "grad_norm": 0.0631244033575058, "learning_rate": 1.1916943546596282e-07, "loss": 0.003, "step": 222010 }, { "epoch": 1.874733486732389, "grad_norm": 0.18220871686935425, "learning_rate": 1.1900956690758137e-07, "loss": 0.0055, "step": 222020 }, { "epoch": 1.8748179265795528, "grad_norm": 0.4911886155605316, "learning_rate": 1.1884980436331183e-07, "loss": 0.0048, "step": 222030 }, { "epoch": 1.8749023664267168, "grad_norm": 0.07047495990991592, "learning_rate": 1.1869014783662481e-07, "loss": 0.0052, "step": 222040 }, { "epoch": 1.8749868062738806, "grad_norm": 0.14426149427890778, "learning_rate": 1.1853059733098692e-07, "loss": 0.0131, "step": 222050 }, { "epoch": 1.8750712461210446, "grad_norm": 0.3604700565338135, "learning_rate": 1.1837115284986433e-07, "loss": 0.007, "step": 222060 }, { "epoch": 1.8751556859682084, "grad_norm": 0.3060392141342163, "learning_rate": 1.1821181439672036e-07, "loss": 0.0037, "step": 222070 }, { "epoch": 1.8752401258153721, "grad_norm": 0.28497761487960815, "learning_rate": 1.1805258197501501e-07, "loss": 0.0068, "step": 222080 }, { "epoch": 1.8753245656625361, "grad_norm": 0.2725183069705963, "learning_rate": 1.178934555882072e-07, "loss": 0.0071, "step": 222090 }, { "epoch": 1.8754090055097001, "grad_norm": 0.07862521708011627, "learning_rate": 1.1773443523975304e-07, "loss": 0.0085, "step": 222100 }, { "epoch": 1.875493445356864, "grad_norm": 0.48628419637680054, "learning_rate": 1.1757552093310642e-07, "loss": 0.0085, "step": 222110 }, { "epoch": 1.8755778852040277, "grad_norm": 0.8678957223892212, "learning_rate": 1.1741671267171906e-07, "loss": 0.0081, "step": 222120 }, { "epoch": 1.8756623250511917, "grad_norm": 0.0647621750831604, "learning_rate": 1.172580104590404e-07, "loss": 0.0061, "step": 222130 }, { "epoch": 1.8757467648983557, "grad_norm": 0.16979879140853882, "learning_rate": 1.1709941429851712e-07, "loss": 0.0038, "step": 222140 }, { "epoch": 1.8758312047455195, "grad_norm": 0.05267943814396858, "learning_rate": 1.1694092419359371e-07, "loss": 0.0036, "step": 222150 }, { "epoch": 1.8759156445926832, "grad_norm": 0.19757631421089172, "learning_rate": 1.1678254014771296e-07, "loss": 0.0052, "step": 222160 }, { "epoch": 1.876000084439847, "grad_norm": 0.3335161507129669, "learning_rate": 1.1662426216431489e-07, "loss": 0.0054, "step": 222170 }, { "epoch": 1.876084524287011, "grad_norm": 0.13167983293533325, "learning_rate": 1.1646609024683674e-07, "loss": 0.0047, "step": 222180 }, { "epoch": 1.876168964134175, "grad_norm": 0.10348795354366302, "learning_rate": 1.1630802439871525e-07, "loss": 0.0135, "step": 222190 }, { "epoch": 1.8762534039813388, "grad_norm": 0.03634968772530556, "learning_rate": 1.1615006462338152e-07, "loss": 0.0072, "step": 222200 }, { "epoch": 1.8763378438285025, "grad_norm": 0.23237664997577667, "learning_rate": 1.1599221092426838e-07, "loss": 0.0094, "step": 222210 }, { "epoch": 1.8764222836756665, "grad_norm": 0.1232333853840828, "learning_rate": 1.1583446330480307e-07, "loss": 0.0048, "step": 222220 }, { "epoch": 1.8765067235228305, "grad_norm": 0.09232670068740845, "learning_rate": 1.1567682176841344e-07, "loss": 0.0028, "step": 222230 }, { "epoch": 1.8765911633699943, "grad_norm": 0.6146718263626099, "learning_rate": 1.1551928631852116e-07, "loss": 0.0092, "step": 222240 }, { "epoch": 1.876675603217158, "grad_norm": 0.12017927318811417, "learning_rate": 1.1536185695854962e-07, "loss": 0.0041, "step": 222250 }, { "epoch": 1.876760043064322, "grad_norm": 0.2962813973426819, "learning_rate": 1.1520453369191775e-07, "loss": 0.0043, "step": 222260 }, { "epoch": 1.876844482911486, "grad_norm": 0.00372563349083066, "learning_rate": 1.1504731652204171e-07, "loss": 0.0113, "step": 222270 }, { "epoch": 1.8769289227586499, "grad_norm": 0.13546092808246613, "learning_rate": 1.1489020545233765e-07, "loss": 0.0071, "step": 222280 }, { "epoch": 1.8770133626058136, "grad_norm": 0.8102201223373413, "learning_rate": 1.1473320048621672e-07, "loss": 0.0065, "step": 222290 }, { "epoch": 1.8770978024529774, "grad_norm": 0.15671823918819427, "learning_rate": 1.1457630162709011e-07, "loss": 0.0115, "step": 222300 }, { "epoch": 1.8771822423001414, "grad_norm": 0.3367360830307007, "learning_rate": 1.144195088783645e-07, "loss": 0.0058, "step": 222310 }, { "epoch": 1.8772666821473054, "grad_norm": 0.04523376747965813, "learning_rate": 1.1426282224344665e-07, "loss": 0.0084, "step": 222320 }, { "epoch": 1.8773511219944692, "grad_norm": 0.4013042747974396, "learning_rate": 1.1410624172573881e-07, "loss": 0.0046, "step": 222330 }, { "epoch": 1.877435561841633, "grad_norm": 0.4441497027873993, "learning_rate": 1.1394976732864215e-07, "loss": 0.0067, "step": 222340 }, { "epoch": 1.877520001688797, "grad_norm": 0.5304726958274841, "learning_rate": 1.1379339905555509e-07, "loss": 0.0059, "step": 222350 }, { "epoch": 1.877604441535961, "grad_norm": 0.1916642189025879, "learning_rate": 1.1363713690987433e-07, "loss": 0.005, "step": 222360 }, { "epoch": 1.8776888813831247, "grad_norm": 0.6399948000907898, "learning_rate": 1.1348098089499382e-07, "loss": 0.006, "step": 222370 }, { "epoch": 1.8777733212302885, "grad_norm": 0.23339678347110748, "learning_rate": 1.1332493101430475e-07, "loss": 0.0059, "step": 222380 }, { "epoch": 1.8778577610774525, "grad_norm": 0.3028106689453125, "learning_rate": 1.1316898727119718e-07, "loss": 0.0048, "step": 222390 }, { "epoch": 1.8779422009246163, "grad_norm": 0.263395220041275, "learning_rate": 1.1301314966905786e-07, "loss": 0.0038, "step": 222400 }, { "epoch": 1.8780266407717803, "grad_norm": 0.38829106092453003, "learning_rate": 1.1285741821127127e-07, "loss": 0.0095, "step": 222410 }, { "epoch": 1.878111080618944, "grad_norm": 0.7036386728286743, "learning_rate": 1.1270179290121975e-07, "loss": 0.0082, "step": 222420 }, { "epoch": 1.8781955204661078, "grad_norm": 0.009644142352044582, "learning_rate": 1.1254627374228444e-07, "loss": 0.0071, "step": 222430 }, { "epoch": 1.8782799603132718, "grad_norm": 0.2306050956249237, "learning_rate": 1.1239086073784156e-07, "loss": 0.0073, "step": 222440 }, { "epoch": 1.8783644001604358, "grad_norm": 0.6997414231300354, "learning_rate": 1.1223555389126839e-07, "loss": 0.0072, "step": 222450 }, { "epoch": 1.8784488400075996, "grad_norm": 0.2677671015262604, "learning_rate": 1.1208035320593725e-07, "loss": 0.0122, "step": 222460 }, { "epoch": 1.8785332798547634, "grad_norm": 0.36779990792274475, "learning_rate": 1.1192525868521931e-07, "loss": 0.0054, "step": 222470 }, { "epoch": 1.8786177197019274, "grad_norm": 0.3375107944011688, "learning_rate": 1.11770270332483e-07, "loss": 0.0049, "step": 222480 }, { "epoch": 1.8787021595490914, "grad_norm": 0.15303468704223633, "learning_rate": 1.1161538815109508e-07, "loss": 0.0134, "step": 222490 }, { "epoch": 1.8787865993962551, "grad_norm": 0.3920714557170868, "learning_rate": 1.1146061214441895e-07, "loss": 0.0094, "step": 222500 }, { "epoch": 1.878871039243419, "grad_norm": 0.28561583161354065, "learning_rate": 1.1130594231581582e-07, "loss": 0.0064, "step": 222510 }, { "epoch": 1.8789554790905827, "grad_norm": 0.309167742729187, "learning_rate": 1.1115137866864689e-07, "loss": 0.0054, "step": 222520 }, { "epoch": 1.8790399189377467, "grad_norm": 0.2288440316915512, "learning_rate": 1.1099692120626726e-07, "loss": 0.0067, "step": 222530 }, { "epoch": 1.8791243587849107, "grad_norm": 0.0029076626524329185, "learning_rate": 1.1084256993203313e-07, "loss": 0.0036, "step": 222540 }, { "epoch": 1.8792087986320745, "grad_norm": 0.03836839646100998, "learning_rate": 1.1068832484929682e-07, "loss": 0.0096, "step": 222550 }, { "epoch": 1.8792932384792382, "grad_norm": 0.18317914009094238, "learning_rate": 1.1053418596140786e-07, "loss": 0.008, "step": 222560 }, { "epoch": 1.8793776783264022, "grad_norm": 0.13254962861537933, "learning_rate": 1.1038015327171414e-07, "loss": 0.0034, "step": 222570 }, { "epoch": 1.8794621181735662, "grad_norm": 0.2506031095981598, "learning_rate": 1.1022622678356242e-07, "loss": 0.0054, "step": 222580 }, { "epoch": 1.87954655802073, "grad_norm": 0.3904290199279785, "learning_rate": 1.1007240650029393e-07, "loss": 0.0061, "step": 222590 }, { "epoch": 1.8796309978678938, "grad_norm": 0.002649193163961172, "learning_rate": 1.0991869242525156e-07, "loss": 0.0062, "step": 222600 }, { "epoch": 1.8797154377150578, "grad_norm": 0.3077785074710846, "learning_rate": 1.097650845617726e-07, "loss": 0.0029, "step": 222610 }, { "epoch": 1.8797998775622218, "grad_norm": 0.12419185787439346, "learning_rate": 1.0961158291319385e-07, "loss": 0.0042, "step": 222620 }, { "epoch": 1.8798843174093856, "grad_norm": 0.06269937753677368, "learning_rate": 1.0945818748284986e-07, "loss": 0.0069, "step": 222630 }, { "epoch": 1.8799687572565493, "grad_norm": 0.39905115962028503, "learning_rate": 1.0930489827407187e-07, "loss": 0.0055, "step": 222640 }, { "epoch": 1.880053197103713, "grad_norm": 0.5255591869354248, "learning_rate": 1.0915171529018942e-07, "loss": 0.0062, "step": 222650 }, { "epoch": 1.880137636950877, "grad_norm": 0.4391011893749237, "learning_rate": 1.0899863853452875e-07, "loss": 0.0063, "step": 222660 }, { "epoch": 1.880222076798041, "grad_norm": 0.06661201268434525, "learning_rate": 1.0884566801041552e-07, "loss": 0.0031, "step": 222670 }, { "epoch": 1.8803065166452049, "grad_norm": 0.3236903250217438, "learning_rate": 1.0869280372117208e-07, "loss": 0.0066, "step": 222680 }, { "epoch": 1.8803909564923686, "grad_norm": 0.18141204118728638, "learning_rate": 1.0854004567011911e-07, "loss": 0.0056, "step": 222690 }, { "epoch": 1.8804753963395326, "grad_norm": 0.19330444931983948, "learning_rate": 1.083873938605734e-07, "loss": 0.0064, "step": 222700 }, { "epoch": 1.8805598361866966, "grad_norm": 0.17119550704956055, "learning_rate": 1.0823484829585174e-07, "loss": 0.0038, "step": 222710 }, { "epoch": 1.8806442760338604, "grad_norm": 0.14026318490505219, "learning_rate": 1.0808240897926648e-07, "loss": 0.0106, "step": 222720 }, { "epoch": 1.8807287158810242, "grad_norm": 0.1808875948190689, "learning_rate": 1.079300759141294e-07, "loss": 0.0047, "step": 222730 }, { "epoch": 1.880813155728188, "grad_norm": 0.13207045197486877, "learning_rate": 1.0777784910374789e-07, "loss": 0.0104, "step": 222740 }, { "epoch": 1.880897595575352, "grad_norm": 0.01650899089872837, "learning_rate": 1.0762572855142873e-07, "loss": 0.0042, "step": 222750 }, { "epoch": 1.880982035422516, "grad_norm": 0.12717227637767792, "learning_rate": 1.0747371426047704e-07, "loss": 0.0053, "step": 222760 }, { "epoch": 1.8810664752696797, "grad_norm": 0.9458240866661072, "learning_rate": 1.0732180623419297e-07, "loss": 0.0064, "step": 222770 }, { "epoch": 1.8811509151168435, "grad_norm": 0.7838173508644104, "learning_rate": 1.0717000447587722e-07, "loss": 0.0104, "step": 222780 }, { "epoch": 1.8812353549640075, "grad_norm": 0.1688358187675476, "learning_rate": 1.0701830898882549e-07, "loss": 0.0072, "step": 222790 }, { "epoch": 1.8813197948111715, "grad_norm": 0.18238377571105957, "learning_rate": 1.0686671977633456e-07, "loss": 0.0067, "step": 222800 }, { "epoch": 1.8814042346583353, "grad_norm": 0.25076740980148315, "learning_rate": 1.0671523684169515e-07, "loss": 0.0049, "step": 222810 }, { "epoch": 1.881488674505499, "grad_norm": 0.0005012597539462149, "learning_rate": 1.0656386018819853e-07, "loss": 0.0057, "step": 222820 }, { "epoch": 1.881573114352663, "grad_norm": 0.32626351714134216, "learning_rate": 1.0641258981913149e-07, "loss": 0.0071, "step": 222830 }, { "epoch": 1.881657554199827, "grad_norm": 0.45941296219825745, "learning_rate": 1.0626142573778032e-07, "loss": 0.0051, "step": 222840 }, { "epoch": 1.8817419940469908, "grad_norm": 0.2686847746372223, "learning_rate": 1.0611036794742846e-07, "loss": 0.0048, "step": 222850 }, { "epoch": 1.8818264338941546, "grad_norm": 0.12389251589775085, "learning_rate": 1.0595941645135611e-07, "loss": 0.009, "step": 222860 }, { "epoch": 1.8819108737413184, "grad_norm": 0.01889813132584095, "learning_rate": 1.0580857125284283e-07, "loss": 0.0046, "step": 222870 }, { "epoch": 1.8819953135884824, "grad_norm": 0.725959837436676, "learning_rate": 1.0565783235516436e-07, "loss": 0.0092, "step": 222880 }, { "epoch": 1.8820797534356464, "grad_norm": 0.35158661007881165, "learning_rate": 1.0550719976159473e-07, "loss": 0.0059, "step": 222890 }, { "epoch": 1.8821641932828101, "grad_norm": 0.17175056040287018, "learning_rate": 1.0535667347540579e-07, "loss": 0.0078, "step": 222900 }, { "epoch": 1.882248633129974, "grad_norm": 0.07948342710733414, "learning_rate": 1.0520625349986657e-07, "loss": 0.0055, "step": 222910 }, { "epoch": 1.882333072977138, "grad_norm": 0.00024157042207662016, "learning_rate": 1.0505593983824447e-07, "loss": 0.0062, "step": 222920 }, { "epoch": 1.882417512824302, "grad_norm": 0.3200988173484802, "learning_rate": 1.0490573249380521e-07, "loss": 0.0085, "step": 222930 }, { "epoch": 1.8825019526714657, "grad_norm": 0.36734890937805176, "learning_rate": 1.0475563146980894e-07, "loss": 0.0144, "step": 222940 }, { "epoch": 1.8825863925186295, "grad_norm": 0.3130761682987213, "learning_rate": 1.0460563676951807e-07, "loss": 0.0074, "step": 222950 }, { "epoch": 1.8826708323657935, "grad_norm": 0.14409737288951874, "learning_rate": 1.0445574839618999e-07, "loss": 0.0034, "step": 222960 }, { "epoch": 1.8827552722129572, "grad_norm": 0.4889093041419983, "learning_rate": 1.0430596635307932e-07, "loss": 0.0062, "step": 222970 }, { "epoch": 1.8828397120601212, "grad_norm": 0.22321467101573944, "learning_rate": 1.0415629064344012e-07, "loss": 0.0046, "step": 222980 }, { "epoch": 1.882924151907285, "grad_norm": 0.10393583029508591, "learning_rate": 1.0400672127052258e-07, "loss": 0.0053, "step": 222990 }, { "epoch": 1.8830085917544488, "grad_norm": 0.11873914301395416, "learning_rate": 1.0385725823757631e-07, "loss": 0.003, "step": 223000 }, { "epoch": 1.8830930316016128, "grad_norm": 0.020368652418255806, "learning_rate": 1.0370790154784649e-07, "loss": 0.0044, "step": 223010 }, { "epoch": 1.8831774714487768, "grad_norm": 0.1556715965270996, "learning_rate": 1.0355865120457831e-07, "loss": 0.0057, "step": 223020 }, { "epoch": 1.8832619112959406, "grad_norm": 0.23233245313167572, "learning_rate": 1.0340950721101194e-07, "loss": 0.0069, "step": 223030 }, { "epoch": 1.8833463511431043, "grad_norm": 4.413228089106269e-05, "learning_rate": 1.0326046957038871e-07, "loss": 0.006, "step": 223040 }, { "epoch": 1.8834307909902683, "grad_norm": 0.15713956952095032, "learning_rate": 1.0311153828594433e-07, "loss": 0.0076, "step": 223050 }, { "epoch": 1.8835152308374323, "grad_norm": 0.2112254947423935, "learning_rate": 1.0296271336091346e-07, "loss": 0.0025, "step": 223060 }, { "epoch": 1.883599670684596, "grad_norm": 0.012433293275535107, "learning_rate": 1.028139947985296e-07, "loss": 0.0028, "step": 223070 }, { "epoch": 1.8836841105317599, "grad_norm": 0.3212989270687103, "learning_rate": 1.0266538260202186e-07, "loss": 0.0049, "step": 223080 }, { "epoch": 1.8837685503789237, "grad_norm": 0.31543731689453125, "learning_rate": 1.0251687677461819e-07, "loss": 0.0092, "step": 223090 }, { "epoch": 1.8838529902260877, "grad_norm": 0.3740937411785126, "learning_rate": 1.0236847731954436e-07, "loss": 0.009, "step": 223100 }, { "epoch": 1.8839374300732517, "grad_norm": 0.439430832862854, "learning_rate": 1.0222018424002444e-07, "loss": 0.0079, "step": 223110 }, { "epoch": 1.8840218699204154, "grad_norm": 0.3841720223426819, "learning_rate": 1.020719975392781e-07, "loss": 0.0057, "step": 223120 }, { "epoch": 1.8841063097675792, "grad_norm": 0.13516369462013245, "learning_rate": 1.0192391722052386e-07, "loss": 0.0042, "step": 223130 }, { "epoch": 1.8841907496147432, "grad_norm": 0.3745998442173004, "learning_rate": 1.0177594328697859e-07, "loss": 0.0106, "step": 223140 }, { "epoch": 1.8842751894619072, "grad_norm": 0.21510177850723267, "learning_rate": 1.016280757418564e-07, "loss": 0.0049, "step": 223150 }, { "epoch": 1.884359629309071, "grad_norm": 0.20113223791122437, "learning_rate": 1.0148031458836804e-07, "loss": 0.0044, "step": 223160 }, { "epoch": 1.8844440691562347, "grad_norm": 0.23053394258022308, "learning_rate": 1.0133265982972373e-07, "loss": 0.0094, "step": 223170 }, { "epoch": 1.8845285090033987, "grad_norm": 0.47373461723327637, "learning_rate": 1.0118511146913034e-07, "loss": 0.007, "step": 223180 }, { "epoch": 1.8846129488505627, "grad_norm": 0.43781229853630066, "learning_rate": 1.0103766950979255e-07, "loss": 0.0038, "step": 223190 }, { "epoch": 1.8846973886977265, "grad_norm": 0.02194776013493538, "learning_rate": 1.0089033395491222e-07, "loss": 0.0063, "step": 223200 }, { "epoch": 1.8847818285448903, "grad_norm": 0.2546241879463196, "learning_rate": 1.0074310480769067e-07, "loss": 0.0084, "step": 223210 }, { "epoch": 1.884866268392054, "grad_norm": 0.2911582589149475, "learning_rate": 1.0059598207132482e-07, "loss": 0.006, "step": 223220 }, { "epoch": 1.884950708239218, "grad_norm": 0.18874651193618774, "learning_rate": 1.0044896574900931e-07, "loss": 0.0067, "step": 223230 }, { "epoch": 1.885035148086382, "grad_norm": 0.26733940839767456, "learning_rate": 1.0030205584393937e-07, "loss": 0.0031, "step": 223240 }, { "epoch": 1.8851195879335458, "grad_norm": 0.34333372116088867, "learning_rate": 1.0015525235930357e-07, "loss": 0.0084, "step": 223250 }, { "epoch": 1.8852040277807096, "grad_norm": 0.259321004152298, "learning_rate": 1.0000855529829268e-07, "loss": 0.0046, "step": 223260 }, { "epoch": 1.8852884676278736, "grad_norm": 0.20686689019203186, "learning_rate": 9.986196466409137e-08, "loss": 0.0058, "step": 223270 }, { "epoch": 1.8853729074750376, "grad_norm": 0.16189531981945038, "learning_rate": 9.971548045988378e-08, "loss": 0.0058, "step": 223280 }, { "epoch": 1.8854573473222014, "grad_norm": 0.31311842799186707, "learning_rate": 9.956910268885233e-08, "loss": 0.0043, "step": 223290 }, { "epoch": 1.8855417871693652, "grad_norm": 0.1980939656496048, "learning_rate": 9.942283135417618e-08, "loss": 0.0058, "step": 223300 }, { "epoch": 1.8856262270165292, "grad_norm": 0.6274896860122681, "learning_rate": 9.927666645903111e-08, "loss": 0.0065, "step": 223310 }, { "epoch": 1.885710666863693, "grad_norm": 0.0905800610780716, "learning_rate": 9.91306080065929e-08, "loss": 0.0019, "step": 223320 }, { "epoch": 1.885795106710857, "grad_norm": 0.4555840492248535, "learning_rate": 9.898465600003349e-08, "loss": 0.0069, "step": 223330 }, { "epoch": 1.8858795465580207, "grad_norm": 0.09569308906793594, "learning_rate": 9.883881044252253e-08, "loss": 0.0067, "step": 223340 }, { "epoch": 1.8859639864051845, "grad_norm": 0.08253642916679382, "learning_rate": 9.869307133722916e-08, "loss": 0.0031, "step": 223350 }, { "epoch": 1.8860484262523485, "grad_norm": 0.1278536319732666, "learning_rate": 9.854743868731753e-08, "loss": 0.0092, "step": 223360 }, { "epoch": 1.8861328660995125, "grad_norm": 0.4407059848308563, "learning_rate": 9.840191249595122e-08, "loss": 0.0061, "step": 223370 }, { "epoch": 1.8862173059466762, "grad_norm": 0.24257682263851166, "learning_rate": 9.825649276629102e-08, "loss": 0.0058, "step": 223380 }, { "epoch": 1.88630174579384, "grad_norm": 0.3377611041069031, "learning_rate": 9.811117950149496e-08, "loss": 0.0044, "step": 223390 }, { "epoch": 1.886386185641004, "grad_norm": 0.23375025391578674, "learning_rate": 9.796597270471997e-08, "loss": 0.0071, "step": 223400 }, { "epoch": 1.886470625488168, "grad_norm": 0.011040654964745045, "learning_rate": 9.782087237911908e-08, "loss": 0.0094, "step": 223410 }, { "epoch": 1.8865550653353318, "grad_norm": 0.351688951253891, "learning_rate": 9.767587852784477e-08, "loss": 0.0062, "step": 223420 }, { "epoch": 1.8866395051824956, "grad_norm": 0.23581543564796448, "learning_rate": 9.753099115404563e-08, "loss": 0.0061, "step": 223430 }, { "epoch": 1.8867239450296593, "grad_norm": 0.31137728691101074, "learning_rate": 9.738621026086859e-08, "loss": 0.007, "step": 223440 }, { "epoch": 1.8868083848768233, "grad_norm": 0.11100766807794571, "learning_rate": 9.72415358514589e-08, "loss": 0.0084, "step": 223450 }, { "epoch": 1.8868928247239873, "grad_norm": 0.20876196026802063, "learning_rate": 9.709696792895795e-08, "loss": 0.0051, "step": 223460 }, { "epoch": 1.8869772645711511, "grad_norm": 0.3483195900917053, "learning_rate": 9.695250649650601e-08, "loss": 0.0093, "step": 223470 }, { "epoch": 1.887061704418315, "grad_norm": 0.18957725167274475, "learning_rate": 9.680815155724166e-08, "loss": 0.003, "step": 223480 }, { "epoch": 1.8871461442654789, "grad_norm": 0.22941362857818604, "learning_rate": 9.666390311429852e-08, "loss": 0.0068, "step": 223490 }, { "epoch": 1.8872305841126429, "grad_norm": 0.06900011003017426, "learning_rate": 9.65197611708113e-08, "loss": 0.0102, "step": 223500 }, { "epoch": 1.8873150239598067, "grad_norm": 0.14151635766029358, "learning_rate": 9.637572572990916e-08, "loss": 0.0075, "step": 223510 }, { "epoch": 1.8873994638069704, "grad_norm": 0.0007713421946391463, "learning_rate": 9.623179679472239e-08, "loss": 0.0044, "step": 223520 }, { "epoch": 1.8874839036541344, "grad_norm": 0.5821493268013, "learning_rate": 9.608797436837625e-08, "loss": 0.008, "step": 223530 }, { "epoch": 1.8875683435012984, "grad_norm": 0.03819672390818596, "learning_rate": 9.59442584539938e-08, "loss": 0.0061, "step": 223540 }, { "epoch": 1.8876527833484622, "grad_norm": 0.3701383173465729, "learning_rate": 9.580064905469755e-08, "loss": 0.0105, "step": 223550 }, { "epoch": 1.887737223195626, "grad_norm": 0.46333739161491394, "learning_rate": 9.565714617360555e-08, "loss": 0.0024, "step": 223560 }, { "epoch": 1.8878216630427898, "grad_norm": 0.465046226978302, "learning_rate": 9.551374981383588e-08, "loss": 0.0031, "step": 223570 }, { "epoch": 1.8879061028899538, "grad_norm": 0.2821422815322876, "learning_rate": 9.537045997850213e-08, "loss": 0.0043, "step": 223580 }, { "epoch": 1.8879905427371177, "grad_norm": 0.22447583079338074, "learning_rate": 9.52272766707174e-08, "loss": 0.0049, "step": 223590 }, { "epoch": 1.8880749825842815, "grad_norm": 0.1414734274148941, "learning_rate": 9.508419989359085e-08, "loss": 0.0047, "step": 223600 }, { "epoch": 1.8881594224314453, "grad_norm": 0.0656561553478241, "learning_rate": 9.494122965023056e-08, "loss": 0.0118, "step": 223610 }, { "epoch": 1.8882438622786093, "grad_norm": 0.003005732549354434, "learning_rate": 9.479836594374181e-08, "loss": 0.0048, "step": 223620 }, { "epoch": 1.8883283021257733, "grad_norm": 0.3762318193912506, "learning_rate": 9.465560877722713e-08, "loss": 0.0051, "step": 223630 }, { "epoch": 1.888412741972937, "grad_norm": 0.4945761561393738, "learning_rate": 9.45129581537868e-08, "loss": 0.0095, "step": 223640 }, { "epoch": 1.8884971818201008, "grad_norm": 0.42987361550331116, "learning_rate": 9.437041407652058e-08, "loss": 0.0072, "step": 223650 }, { "epoch": 1.8885816216672646, "grad_norm": 0.28219661116600037, "learning_rate": 9.422797654852323e-08, "loss": 0.0076, "step": 223660 }, { "epoch": 1.8886660615144286, "grad_norm": 0.19079576432704926, "learning_rate": 9.408564557288946e-08, "loss": 0.0043, "step": 223670 }, { "epoch": 1.8887505013615926, "grad_norm": 0.33446839451789856, "learning_rate": 9.394342115271016e-08, "loss": 0.006, "step": 223680 }, { "epoch": 1.8888349412087564, "grad_norm": 0.15830324590206146, "learning_rate": 9.380130329107395e-08, "loss": 0.0048, "step": 223690 }, { "epoch": 1.8889193810559202, "grad_norm": 0.6850807070732117, "learning_rate": 9.365929199106837e-08, "loss": 0.0059, "step": 223700 }, { "epoch": 1.8890038209030842, "grad_norm": 0.42111843824386597, "learning_rate": 9.351738725577764e-08, "loss": 0.0035, "step": 223710 }, { "epoch": 1.8890882607502482, "grad_norm": 0.32080814242362976, "learning_rate": 9.33755890882837e-08, "loss": 0.0059, "step": 223720 }, { "epoch": 1.889172700597412, "grad_norm": 0.0908123254776001, "learning_rate": 9.323389749166578e-08, "loss": 0.0041, "step": 223730 }, { "epoch": 1.8892571404445757, "grad_norm": 0.11614549905061722, "learning_rate": 9.309231246900307e-08, "loss": 0.0038, "step": 223740 }, { "epoch": 1.8893415802917397, "grad_norm": 0.5736641883850098, "learning_rate": 9.295083402336924e-08, "loss": 0.0049, "step": 223750 }, { "epoch": 1.8894260201389037, "grad_norm": 0.3432045876979828, "learning_rate": 9.280946215783848e-08, "loss": 0.0034, "step": 223760 }, { "epoch": 1.8895104599860675, "grad_norm": 0.35432517528533936, "learning_rate": 9.266819687548001e-08, "loss": 0.0072, "step": 223770 }, { "epoch": 1.8895948998332313, "grad_norm": 0.39062967896461487, "learning_rate": 9.252703817936304e-08, "loss": 0.0082, "step": 223780 }, { "epoch": 1.889679339680395, "grad_norm": 0.10830796509981155, "learning_rate": 9.238598607255289e-08, "loss": 0.0037, "step": 223790 }, { "epoch": 1.889763779527559, "grad_norm": 0.27600914239883423, "learning_rate": 9.224504055811323e-08, "loss": 0.0093, "step": 223800 }, { "epoch": 1.889848219374723, "grad_norm": 0.7466633915901184, "learning_rate": 9.210420163910606e-08, "loss": 0.0044, "step": 223810 }, { "epoch": 1.8899326592218868, "grad_norm": 0.2187625616788864, "learning_rate": 9.196346931858945e-08, "loss": 0.0074, "step": 223820 }, { "epoch": 1.8900170990690506, "grad_norm": 0.3978255093097687, "learning_rate": 9.182284359962102e-08, "loss": 0.0085, "step": 223830 }, { "epoch": 1.8901015389162146, "grad_norm": 0.3067258596420288, "learning_rate": 9.168232448525439e-08, "loss": 0.0058, "step": 223840 }, { "epoch": 1.8901859787633786, "grad_norm": 0.144697368144989, "learning_rate": 9.154191197854213e-08, "loss": 0.008, "step": 223850 }, { "epoch": 1.8902704186105423, "grad_norm": 0.8244286775588989, "learning_rate": 9.140160608253346e-08, "loss": 0.0089, "step": 223860 }, { "epoch": 1.8903548584577061, "grad_norm": 0.0025776971597224474, "learning_rate": 9.12614068002765e-08, "loss": 0.0039, "step": 223870 }, { "epoch": 1.8904392983048701, "grad_norm": 0.2885606586933136, "learning_rate": 9.112131413481496e-08, "loss": 0.0072, "step": 223880 }, { "epoch": 1.890523738152034, "grad_norm": 0.4720590114593506, "learning_rate": 9.098132808919357e-08, "loss": 0.0128, "step": 223890 }, { "epoch": 1.890608177999198, "grad_norm": 0.3095211088657379, "learning_rate": 9.084144866645106e-08, "loss": 0.0065, "step": 223900 }, { "epoch": 1.8906926178463617, "grad_norm": 0.42029640078544617, "learning_rate": 9.070167586962664e-08, "loss": 0.0053, "step": 223910 }, { "epoch": 1.8907770576935254, "grad_norm": 0.07859983295202255, "learning_rate": 9.056200970175566e-08, "loss": 0.0059, "step": 223920 }, { "epoch": 1.8908614975406894, "grad_norm": 0.06303762644529343, "learning_rate": 9.042245016587125e-08, "loss": 0.0061, "step": 223930 }, { "epoch": 1.8909459373878534, "grad_norm": 0.5582675337791443, "learning_rate": 9.0282997265006e-08, "loss": 0.0085, "step": 223940 }, { "epoch": 1.8910303772350172, "grad_norm": 0.2681729793548584, "learning_rate": 9.014365100218803e-08, "loss": 0.0075, "step": 223950 }, { "epoch": 1.891114817082181, "grad_norm": 0.3056519627571106, "learning_rate": 9.000441138044325e-08, "loss": 0.0084, "step": 223960 }, { "epoch": 1.891199256929345, "grad_norm": 0.11472781747579575, "learning_rate": 8.986527840279647e-08, "loss": 0.0031, "step": 223970 }, { "epoch": 1.891283696776509, "grad_norm": 0.6198348999023438, "learning_rate": 8.972625207227026e-08, "loss": 0.0057, "step": 223980 }, { "epoch": 1.8913681366236728, "grad_norm": 0.3029929995536804, "learning_rate": 8.958733239188333e-08, "loss": 0.0055, "step": 223990 }, { "epoch": 1.8914525764708365, "grad_norm": 0.41018980741500854, "learning_rate": 8.944851936465327e-08, "loss": 0.0042, "step": 224000 }, { "epoch": 1.8915370163180003, "grad_norm": 0.09965027868747711, "learning_rate": 8.930981299359486e-08, "loss": 0.0031, "step": 224010 }, { "epoch": 1.8916214561651643, "grad_norm": 0.31610870361328125, "learning_rate": 8.917121328172185e-08, "loss": 0.0065, "step": 224020 }, { "epoch": 1.8917058960123283, "grad_norm": 0.03439716622233391, "learning_rate": 8.90327202320429e-08, "loss": 0.0053, "step": 224030 }, { "epoch": 1.891790335859492, "grad_norm": 0.210985466837883, "learning_rate": 8.88943338475673e-08, "loss": 0.0057, "step": 224040 }, { "epoch": 1.8918747757066559, "grad_norm": 0.322680801153183, "learning_rate": 8.875605413130039e-08, "loss": 0.005, "step": 224050 }, { "epoch": 1.8919592155538199, "grad_norm": 0.2321581393480301, "learning_rate": 8.861788108624536e-08, "loss": 0.005, "step": 224060 }, { "epoch": 1.8920436554009838, "grad_norm": 0.05338351055979729, "learning_rate": 8.847981471540368e-08, "loss": 0.0078, "step": 224070 }, { "epoch": 1.8921280952481476, "grad_norm": 0.24314796924591064, "learning_rate": 8.83418550217735e-08, "loss": 0.0064, "step": 224080 }, { "epoch": 1.8922125350953114, "grad_norm": 0.29119113087654114, "learning_rate": 8.820400200835244e-08, "loss": 0.0062, "step": 224090 }, { "epoch": 1.8922969749424754, "grad_norm": 0.3300741910934448, "learning_rate": 8.806625567813365e-08, "loss": 0.0067, "step": 224100 }, { "epoch": 1.8923814147896394, "grad_norm": 0.005444289650768042, "learning_rate": 8.792861603410919e-08, "loss": 0.0035, "step": 224110 }, { "epoch": 1.8924658546368032, "grad_norm": 0.013775455765426159, "learning_rate": 8.779108307926831e-08, "loss": 0.0118, "step": 224120 }, { "epoch": 1.892550294483967, "grad_norm": 0.44229361414909363, "learning_rate": 8.765365681659921e-08, "loss": 0.0039, "step": 224130 }, { "epoch": 1.8926347343311307, "grad_norm": 0.10827692598104477, "learning_rate": 8.75163372490856e-08, "loss": 0.0075, "step": 224140 }, { "epoch": 1.8927191741782947, "grad_norm": 0.2774149179458618, "learning_rate": 8.737912437971008e-08, "loss": 0.0072, "step": 224150 }, { "epoch": 1.8928036140254587, "grad_norm": 0.0999106764793396, "learning_rate": 8.724201821145417e-08, "loss": 0.0093, "step": 224160 }, { "epoch": 1.8928880538726225, "grad_norm": 0.35483792424201965, "learning_rate": 8.710501874729438e-08, "loss": 0.0037, "step": 224170 }, { "epoch": 1.8929724937197863, "grad_norm": 0.06540852040052414, "learning_rate": 8.696812599020665e-08, "loss": 0.0069, "step": 224180 }, { "epoch": 1.8930569335669503, "grad_norm": 0.2217724770307541, "learning_rate": 8.683133994316529e-08, "loss": 0.0098, "step": 224190 }, { "epoch": 1.8931413734141143, "grad_norm": 0.01947169378399849, "learning_rate": 8.669466060914011e-08, "loss": 0.004, "step": 224200 }, { "epoch": 1.893225813261278, "grad_norm": 0.020813388749957085, "learning_rate": 8.655808799109933e-08, "loss": 0.0094, "step": 224210 }, { "epoch": 1.8933102531084418, "grad_norm": 0.15564991533756256, "learning_rate": 8.642162209201055e-08, "loss": 0.0084, "step": 224220 }, { "epoch": 1.8933946929556056, "grad_norm": 0.2877540588378906, "learning_rate": 8.628526291483696e-08, "loss": 0.0113, "step": 224230 }, { "epoch": 1.8934791328027696, "grad_norm": 0.41472646594047546, "learning_rate": 8.614901046254121e-08, "loss": 0.0084, "step": 224240 }, { "epoch": 1.8935635726499336, "grad_norm": 0.2687359154224396, "learning_rate": 8.60128647380809e-08, "loss": 0.0066, "step": 224250 }, { "epoch": 1.8936480124970974, "grad_norm": 0.2854887843132019, "learning_rate": 8.587682574441536e-08, "loss": 0.0064, "step": 224260 }, { "epoch": 1.8937324523442611, "grad_norm": 0.36186957359313965, "learning_rate": 8.574089348449777e-08, "loss": 0.0049, "step": 224270 }, { "epoch": 1.8938168921914251, "grad_norm": 0.09313452243804932, "learning_rate": 8.560506796128076e-08, "loss": 0.0051, "step": 224280 }, { "epoch": 1.8939013320385891, "grad_norm": 0.28946244716644287, "learning_rate": 8.546934917771533e-08, "loss": 0.0048, "step": 224290 }, { "epoch": 1.893985771885753, "grad_norm": 0.14112220704555511, "learning_rate": 8.533373713674742e-08, "loss": 0.0032, "step": 224300 }, { "epoch": 1.8940702117329167, "grad_norm": 0.32562729716300964, "learning_rate": 8.519823184132414e-08, "loss": 0.0034, "step": 224310 }, { "epoch": 1.8941546515800807, "grad_norm": 0.12023473531007767, "learning_rate": 8.506283329438814e-08, "loss": 0.0022, "step": 224320 }, { "epoch": 1.8942390914272447, "grad_norm": 0.14197885990142822, "learning_rate": 8.492754149888039e-08, "loss": 0.007, "step": 224330 }, { "epoch": 1.8943235312744084, "grad_norm": 0.49074798822402954, "learning_rate": 8.479235645773908e-08, "loss": 0.0081, "step": 224340 }, { "epoch": 1.8944079711215722, "grad_norm": 0.532743513584137, "learning_rate": 8.465727817390079e-08, "loss": 0.0042, "step": 224350 }, { "epoch": 1.894492410968736, "grad_norm": 0.19606637954711914, "learning_rate": 8.45223066502987e-08, "loss": 0.0077, "step": 224360 }, { "epoch": 1.8945768508159, "grad_norm": 0.276546448469162, "learning_rate": 8.438744188986547e-08, "loss": 0.0044, "step": 224370 }, { "epoch": 1.894661290663064, "grad_norm": 0.14402836561203003, "learning_rate": 8.425268389552932e-08, "loss": 0.0033, "step": 224380 }, { "epoch": 1.8947457305102278, "grad_norm": 0.7281991243362427, "learning_rate": 8.411803267021734e-08, "loss": 0.0108, "step": 224390 }, { "epoch": 1.8948301703573915, "grad_norm": 0.3173103332519531, "learning_rate": 8.398348821685443e-08, "loss": 0.0075, "step": 224400 }, { "epoch": 1.8949146102045555, "grad_norm": 0.3870507478713989, "learning_rate": 8.384905053836267e-08, "loss": 0.0044, "step": 224410 }, { "epoch": 1.8949990500517195, "grad_norm": 0.008414031937718391, "learning_rate": 8.371471963766253e-08, "loss": 0.0083, "step": 224420 }, { "epoch": 1.8950834898988833, "grad_norm": 0.28717660903930664, "learning_rate": 8.358049551767111e-08, "loss": 0.0037, "step": 224430 }, { "epoch": 1.895167929746047, "grad_norm": 0.07015980780124664, "learning_rate": 8.344637818130386e-08, "loss": 0.0055, "step": 224440 }, { "epoch": 1.895252369593211, "grad_norm": 0.26779088377952576, "learning_rate": 8.331236763147343e-08, "loss": 0.0057, "step": 224450 }, { "epoch": 1.8953368094403749, "grad_norm": 0.2541082203388214, "learning_rate": 8.317846387109141e-08, "loss": 0.0052, "step": 224460 }, { "epoch": 1.8954212492875389, "grad_norm": 0.19092679023742676, "learning_rate": 8.30446669030649e-08, "loss": 0.0045, "step": 224470 }, { "epoch": 1.8955056891347026, "grad_norm": 0.18908759951591492, "learning_rate": 8.291097673030158e-08, "loss": 0.0053, "step": 224480 }, { "epoch": 1.8955901289818664, "grad_norm": 0.3020869493484497, "learning_rate": 8.277739335570356e-08, "loss": 0.007, "step": 224490 }, { "epoch": 1.8956745688290304, "grad_norm": 0.29466763138771057, "learning_rate": 8.264391678217298e-08, "loss": 0.0047, "step": 224500 }, { "epoch": 1.8957590086761944, "grad_norm": 0.06347712129354477, "learning_rate": 8.251054701260919e-08, "loss": 0.0143, "step": 224510 }, { "epoch": 1.8958434485233582, "grad_norm": 0.3344748914241791, "learning_rate": 8.23772840499082e-08, "loss": 0.0074, "step": 224520 }, { "epoch": 1.895927888370522, "grad_norm": 0.11600226908922195, "learning_rate": 8.224412789696545e-08, "loss": 0.0063, "step": 224530 }, { "epoch": 1.896012328217686, "grad_norm": 0.18157821893692017, "learning_rate": 8.2111078556672e-08, "loss": 0.0073, "step": 224540 }, { "epoch": 1.89609676806485, "grad_norm": 0.041485849767923355, "learning_rate": 8.197813603191829e-08, "loss": 0.0047, "step": 224550 }, { "epoch": 1.8961812079120137, "grad_norm": 0.3094521760940552, "learning_rate": 8.184530032559146e-08, "loss": 0.007, "step": 224560 }, { "epoch": 1.8962656477591775, "grad_norm": 0.1898365616798401, "learning_rate": 8.171257144057698e-08, "loss": 0.0048, "step": 224570 }, { "epoch": 1.8963500876063413, "grad_norm": 0.5008957386016846, "learning_rate": 8.157994937975755e-08, "loss": 0.0081, "step": 224580 }, { "epoch": 1.8964345274535053, "grad_norm": 0.11995203793048859, "learning_rate": 8.144743414601419e-08, "loss": 0.0061, "step": 224590 }, { "epoch": 1.8965189673006693, "grad_norm": 0.01821008138358593, "learning_rate": 8.131502574222405e-08, "loss": 0.0044, "step": 224600 }, { "epoch": 1.896603407147833, "grad_norm": 0.7152616381645203, "learning_rate": 8.118272417126428e-08, "loss": 0.0043, "step": 224610 }, { "epoch": 1.8966878469949968, "grad_norm": 0.176534041762352, "learning_rate": 8.105052943600755e-08, "loss": 0.0064, "step": 224620 }, { "epoch": 1.8967722868421608, "grad_norm": 0.09367094933986664, "learning_rate": 8.091844153932493e-08, "loss": 0.0086, "step": 224630 }, { "epoch": 1.8968567266893248, "grad_norm": 0.4876099228858948, "learning_rate": 8.078646048408634e-08, "loss": 0.008, "step": 224640 }, { "epoch": 1.8969411665364886, "grad_norm": 0.38722124695777893, "learning_rate": 8.065458627315726e-08, "loss": 0.0078, "step": 224650 }, { "epoch": 1.8970256063836524, "grad_norm": 0.40429604053497314, "learning_rate": 8.052281890940262e-08, "loss": 0.0081, "step": 224660 }, { "epoch": 1.8971100462308164, "grad_norm": 0.4833073914051056, "learning_rate": 8.039115839568457e-08, "loss": 0.0042, "step": 224670 }, { "epoch": 1.8971944860779804, "grad_norm": 0.05298496037721634, "learning_rate": 8.025960473486249e-08, "loss": 0.004, "step": 224680 }, { "epoch": 1.8972789259251441, "grad_norm": 0.005412060301750898, "learning_rate": 8.0128157929793e-08, "loss": 0.0078, "step": 224690 }, { "epoch": 1.897363365772308, "grad_norm": 0.3354645073413849, "learning_rate": 7.999681798333214e-08, "loss": 0.0091, "step": 224700 }, { "epoch": 1.8974478056194717, "grad_norm": 0.1430133432149887, "learning_rate": 7.986558489833208e-08, "loss": 0.0065, "step": 224710 }, { "epoch": 1.8975322454666357, "grad_norm": 0.45800426602363586, "learning_rate": 7.973445867764328e-08, "loss": 0.0075, "step": 224720 }, { "epoch": 1.8976166853137997, "grad_norm": 0.4027343988418579, "learning_rate": 7.960343932411408e-08, "loss": 0.0089, "step": 224730 }, { "epoch": 1.8977011251609635, "grad_norm": 0.10882627964019775, "learning_rate": 7.947252684058937e-08, "loss": 0.0051, "step": 224740 }, { "epoch": 1.8977855650081272, "grad_norm": 0.392803430557251, "learning_rate": 7.934172122991357e-08, "loss": 0.0062, "step": 224750 }, { "epoch": 1.8978700048552912, "grad_norm": 0.1546006053686142, "learning_rate": 7.921102249492718e-08, "loss": 0.0074, "step": 224760 }, { "epoch": 1.8979544447024552, "grad_norm": 0.3040260076522827, "learning_rate": 7.908043063846905e-08, "loss": 0.0093, "step": 224770 }, { "epoch": 1.898038884549619, "grad_norm": 0.13280457258224487, "learning_rate": 7.894994566337466e-08, "loss": 0.0023, "step": 224780 }, { "epoch": 1.8981233243967828, "grad_norm": 0.13007740676403046, "learning_rate": 7.88195675724801e-08, "loss": 0.004, "step": 224790 }, { "epoch": 1.8982077642439468, "grad_norm": 0.24422630667686462, "learning_rate": 7.868929636861533e-08, "loss": 0.0029, "step": 224800 }, { "epoch": 1.8982922040911105, "grad_norm": 0.016309257596731186, "learning_rate": 7.855913205461141e-08, "loss": 0.0052, "step": 224810 }, { "epoch": 1.8983766439382745, "grad_norm": 0.305645227432251, "learning_rate": 7.842907463329386e-08, "loss": 0.0067, "step": 224820 }, { "epoch": 1.8984610837854383, "grad_norm": 0.0033119108993560076, "learning_rate": 7.829912410748875e-08, "loss": 0.0146, "step": 224830 }, { "epoch": 1.898545523632602, "grad_norm": 0.11006800830364227, "learning_rate": 7.816928048001826e-08, "loss": 0.0069, "step": 224840 }, { "epoch": 1.898629963479766, "grad_norm": 0.4652932584285736, "learning_rate": 7.803954375370237e-08, "loss": 0.0075, "step": 224850 }, { "epoch": 1.89871440332693, "grad_norm": 0.07831128686666489, "learning_rate": 7.790991393135883e-08, "loss": 0.0114, "step": 224860 }, { "epoch": 1.8987988431740939, "grad_norm": 0.32504650950431824, "learning_rate": 7.778039101580315e-08, "loss": 0.0062, "step": 224870 }, { "epoch": 1.8988832830212576, "grad_norm": 1.3296231031417847, "learning_rate": 7.765097500984864e-08, "loss": 0.0089, "step": 224880 }, { "epoch": 1.8989677228684216, "grad_norm": 0.22866015136241913, "learning_rate": 7.752166591630639e-08, "loss": 0.007, "step": 224890 }, { "epoch": 1.8990521627155856, "grad_norm": 0.15689100325107574, "learning_rate": 7.739246373798526e-08, "loss": 0.0058, "step": 224900 }, { "epoch": 1.8991366025627494, "grad_norm": 0.21291787922382355, "learning_rate": 7.726336847769023e-08, "loss": 0.0056, "step": 224910 }, { "epoch": 1.8992210424099132, "grad_norm": 0.3660289943218231, "learning_rate": 7.713438013822738e-08, "loss": 0.0088, "step": 224920 }, { "epoch": 1.899305482257077, "grad_norm": 0.7904077768325806, "learning_rate": 7.70054987223956e-08, "loss": 0.0092, "step": 224930 }, { "epoch": 1.899389922104241, "grad_norm": 0.17852023243904114, "learning_rate": 7.687672423299653e-08, "loss": 0.0065, "step": 224940 }, { "epoch": 1.899474361951405, "grad_norm": 0.36438965797424316, "learning_rate": 7.67480566728257e-08, "loss": 0.0084, "step": 224950 }, { "epoch": 1.8995588017985687, "grad_norm": 0.3827907145023346, "learning_rate": 7.661949604467811e-08, "loss": 0.0075, "step": 224960 }, { "epoch": 1.8996432416457325, "grad_norm": 0.1885407567024231, "learning_rate": 7.649104235134653e-08, "loss": 0.0063, "step": 224970 }, { "epoch": 1.8997276814928965, "grad_norm": 0.18633197247982025, "learning_rate": 7.636269559562037e-08, "loss": 0.0048, "step": 224980 }, { "epoch": 1.8998121213400605, "grad_norm": 0.4054788053035736, "learning_rate": 7.623445578028798e-08, "loss": 0.0027, "step": 224990 }, { "epoch": 1.8998965611872243, "grad_norm": 0.18424782156944275, "learning_rate": 7.61063229081338e-08, "loss": 0.0036, "step": 225000 }, { "epoch": 1.899981001034388, "grad_norm": 0.14136239886283875, "learning_rate": 7.597829698194115e-08, "loss": 0.0104, "step": 225010 }, { "epoch": 1.900065440881552, "grad_norm": 0.3931455612182617, "learning_rate": 7.585037800449113e-08, "loss": 0.0105, "step": 225020 }, { "epoch": 1.900149880728716, "grad_norm": 0.17985942959785461, "learning_rate": 7.572256597856154e-08, "loss": 0.0033, "step": 225030 }, { "epoch": 1.9002343205758798, "grad_norm": 0.2000722587108612, "learning_rate": 7.559486090692902e-08, "loss": 0.0024, "step": 225040 }, { "epoch": 1.9003187604230436, "grad_norm": 0.014549979008734226, "learning_rate": 7.546726279236693e-08, "loss": 0.0056, "step": 225050 }, { "epoch": 1.9004032002702074, "grad_norm": 0.24399243295192719, "learning_rate": 7.533977163764639e-08, "loss": 0.0038, "step": 225060 }, { "epoch": 1.9004876401173714, "grad_norm": 0.1927587389945984, "learning_rate": 7.521238744553738e-08, "loss": 0.0056, "step": 225070 }, { "epoch": 1.9005720799645354, "grad_norm": 0.6654924154281616, "learning_rate": 7.50851102188066e-08, "loss": 0.0112, "step": 225080 }, { "epoch": 1.9006565198116991, "grad_norm": 0.2556864023208618, "learning_rate": 7.495793996021739e-08, "loss": 0.0085, "step": 225090 }, { "epoch": 1.900740959658863, "grad_norm": 0.4579785466194153, "learning_rate": 7.483087667253253e-08, "loss": 0.0052, "step": 225100 }, { "epoch": 1.900825399506027, "grad_norm": 0.9777542948722839, "learning_rate": 7.470392035851148e-08, "loss": 0.0042, "step": 225110 }, { "epoch": 1.900909839353191, "grad_norm": 0.13175702095031738, "learning_rate": 7.457707102091261e-08, "loss": 0.0061, "step": 225120 }, { "epoch": 1.9009942792003547, "grad_norm": 0.41104528307914734, "learning_rate": 7.44503286624898e-08, "loss": 0.0077, "step": 225130 }, { "epoch": 1.9010787190475185, "grad_norm": 0.862572193145752, "learning_rate": 7.432369328599698e-08, "loss": 0.0089, "step": 225140 }, { "epoch": 1.9011631588946822, "grad_norm": 0.3887938857078552, "learning_rate": 7.41971648941836e-08, "loss": 0.0038, "step": 225150 }, { "epoch": 1.9012475987418462, "grad_norm": 0.18717466294765472, "learning_rate": 7.407074348979858e-08, "loss": 0.01, "step": 225160 }, { "epoch": 1.9013320385890102, "grad_norm": 0.251858651638031, "learning_rate": 7.394442907558808e-08, "loss": 0.0044, "step": 225170 }, { "epoch": 1.901416478436174, "grad_norm": 0.11324875801801682, "learning_rate": 7.381822165429486e-08, "loss": 0.0046, "step": 225180 }, { "epoch": 1.9015009182833378, "grad_norm": 0.06025709956884384, "learning_rate": 7.36921212286601e-08, "loss": 0.0031, "step": 225190 }, { "epoch": 1.9015853581305018, "grad_norm": 0.3151080310344696, "learning_rate": 7.35661278014238e-08, "loss": 0.0063, "step": 225200 }, { "epoch": 1.9016697979776658, "grad_norm": 0.18490639328956604, "learning_rate": 7.344024137532102e-08, "loss": 0.0035, "step": 225210 }, { "epoch": 1.9017542378248296, "grad_norm": 0.22604238986968994, "learning_rate": 7.331446195308677e-08, "loss": 0.0054, "step": 225220 }, { "epoch": 1.9018386776719933, "grad_norm": 0.29603323340415955, "learning_rate": 7.318878953745279e-08, "loss": 0.0081, "step": 225230 }, { "epoch": 1.9019231175191573, "grad_norm": 0.5491458177566528, "learning_rate": 7.306322413114908e-08, "loss": 0.0071, "step": 225240 }, { "epoch": 1.9020075573663213, "grad_norm": 0.6448717713356018, "learning_rate": 7.293776573690237e-08, "loss": 0.0061, "step": 225250 }, { "epoch": 1.902091997213485, "grad_norm": 0.1921858936548233, "learning_rate": 7.281241435743713e-08, "loss": 0.0054, "step": 225260 }, { "epoch": 1.9021764370606489, "grad_norm": 0.518885612487793, "learning_rate": 7.268716999547731e-08, "loss": 0.0066, "step": 225270 }, { "epoch": 1.9022608769078126, "grad_norm": 0.2775355577468872, "learning_rate": 7.256203265374184e-08, "loss": 0.0055, "step": 225280 }, { "epoch": 1.9023453167549766, "grad_norm": 0.3965979814529419, "learning_rate": 7.243700233494966e-08, "loss": 0.007, "step": 225290 }, { "epoch": 1.9024297566021406, "grad_norm": 0.10936971753835678, "learning_rate": 7.231207904181581e-08, "loss": 0.0089, "step": 225300 }, { "epoch": 1.9025141964493044, "grad_norm": 0.38059312105178833, "learning_rate": 7.218726277705367e-08, "loss": 0.0041, "step": 225310 }, { "epoch": 1.9025986362964682, "grad_norm": 0.00118854152970016, "learning_rate": 7.206255354337499e-08, "loss": 0.0053, "step": 225320 }, { "epoch": 1.9026830761436322, "grad_norm": 0.030683279037475586, "learning_rate": 7.193795134348758e-08, "loss": 0.0042, "step": 225330 }, { "epoch": 1.9027675159907962, "grad_norm": 0.2801552414894104, "learning_rate": 7.181345618009817e-08, "loss": 0.0054, "step": 225340 }, { "epoch": 1.90285195583796, "grad_norm": 0.03056168556213379, "learning_rate": 7.168906805591014e-08, "loss": 0.0069, "step": 225350 }, { "epoch": 1.9029363956851237, "grad_norm": 0.041359830647706985, "learning_rate": 7.156478697362635e-08, "loss": 0.0017, "step": 225360 }, { "epoch": 1.9030208355322877, "grad_norm": 0.20239466428756714, "learning_rate": 7.144061293594463e-08, "loss": 0.0048, "step": 225370 }, { "epoch": 1.9031052753794515, "grad_norm": 0.21737629175186157, "learning_rate": 7.131654594556336e-08, "loss": 0.0068, "step": 225380 }, { "epoch": 1.9031897152266155, "grad_norm": 0.16548830270767212, "learning_rate": 7.119258600517598e-08, "loss": 0.0079, "step": 225390 }, { "epoch": 1.9032741550737793, "grad_norm": 0.15654918551445007, "learning_rate": 7.106873311747641e-08, "loss": 0.0049, "step": 225400 }, { "epoch": 1.903358594920943, "grad_norm": 0.18638747930526733, "learning_rate": 7.09449872851542e-08, "loss": 0.0054, "step": 225410 }, { "epoch": 1.903443034768107, "grad_norm": 0.8247725367546082, "learning_rate": 7.082134851089606e-08, "loss": 0.0092, "step": 225420 }, { "epoch": 1.903527474615271, "grad_norm": 0.17433512210845947, "learning_rate": 7.069781679738818e-08, "loss": 0.0077, "step": 225430 }, { "epoch": 1.9036119144624348, "grad_norm": 0.07178734987974167, "learning_rate": 7.057439214731398e-08, "loss": 0.0058, "step": 225440 }, { "epoch": 1.9036963543095986, "grad_norm": 0.1010279729962349, "learning_rate": 7.04510745633541e-08, "loss": 0.0102, "step": 225450 }, { "epoch": 1.9037807941567626, "grad_norm": 0.3271322548389435, "learning_rate": 7.032786404818637e-08, "loss": 0.004, "step": 225460 }, { "epoch": 1.9038652340039266, "grad_norm": 0.33883136510849, "learning_rate": 7.0204760604487e-08, "loss": 0.0036, "step": 225470 }, { "epoch": 1.9039496738510904, "grad_norm": 0.08235493302345276, "learning_rate": 7.008176423492996e-08, "loss": 0.0037, "step": 225480 }, { "epoch": 1.9040341136982541, "grad_norm": 0.404865562915802, "learning_rate": 6.995887494218756e-08, "loss": 0.0056, "step": 225490 }, { "epoch": 1.904118553545418, "grad_norm": 0.3249688744544983, "learning_rate": 6.983609272892766e-08, "loss": 0.0111, "step": 225500 }, { "epoch": 1.904202993392582, "grad_norm": 0.8079228401184082, "learning_rate": 6.971341759781758e-08, "loss": 0.0073, "step": 225510 }, { "epoch": 1.904287433239746, "grad_norm": 0.11213476955890656, "learning_rate": 6.959084955152128e-08, "loss": 0.0043, "step": 225520 }, { "epoch": 1.9043718730869097, "grad_norm": 0.09237197786569595, "learning_rate": 6.946838859270221e-08, "loss": 0.0092, "step": 225530 }, { "epoch": 1.9044563129340735, "grad_norm": 0.408352255821228, "learning_rate": 6.934603472401824e-08, "loss": 0.0042, "step": 225540 }, { "epoch": 1.9045407527812375, "grad_norm": 0.14654310047626495, "learning_rate": 6.922378794812889e-08, "loss": 0.0033, "step": 225550 }, { "epoch": 1.9046251926284015, "grad_norm": 0.0005516682285815477, "learning_rate": 6.910164826768872e-08, "loss": 0.0063, "step": 225560 }, { "epoch": 1.9047096324755652, "grad_norm": 0.241676926612854, "learning_rate": 6.897961568535006e-08, "loss": 0.0064, "step": 225570 }, { "epoch": 1.904794072322729, "grad_norm": 0.27962708473205566, "learning_rate": 6.885769020376354e-08, "loss": 0.0068, "step": 225580 }, { "epoch": 1.904878512169893, "grad_norm": 0.009804980829358101, "learning_rate": 6.873587182557706e-08, "loss": 0.0025, "step": 225590 }, { "epoch": 1.904962952017057, "grad_norm": 0.12363113462924957, "learning_rate": 6.86141605534374e-08, "loss": 0.0061, "step": 225600 }, { "epoch": 1.9050473918642208, "grad_norm": 0.18813194334506989, "learning_rate": 6.849255638998742e-08, "loss": 0.0057, "step": 225610 }, { "epoch": 1.9051318317113846, "grad_norm": 0.2865023910999298, "learning_rate": 6.837105933786892e-08, "loss": 0.0055, "step": 225620 }, { "epoch": 1.9052162715585483, "grad_norm": 0.1309717893600464, "learning_rate": 6.824966939971978e-08, "loss": 0.0048, "step": 225630 }, { "epoch": 1.9053007114057123, "grad_norm": 0.33950215578079224, "learning_rate": 6.812838657817788e-08, "loss": 0.0067, "step": 225640 }, { "epoch": 1.9053851512528763, "grad_norm": 0.19603703916072845, "learning_rate": 6.80072108758767e-08, "loss": 0.0024, "step": 225650 }, { "epoch": 1.90546959110004, "grad_norm": 0.062252502888441086, "learning_rate": 6.788614229544799e-08, "loss": 0.0049, "step": 225660 }, { "epoch": 1.9055540309472039, "grad_norm": 0.1793002337217331, "learning_rate": 6.776518083952189e-08, "loss": 0.0087, "step": 225670 }, { "epoch": 1.9056384707943679, "grad_norm": 0.29082533717155457, "learning_rate": 6.764432651072517e-08, "loss": 0.0082, "step": 225680 }, { "epoch": 1.9057229106415319, "grad_norm": 0.30007442831993103, "learning_rate": 6.752357931168296e-08, "loss": 0.005, "step": 225690 }, { "epoch": 1.9058073504886957, "grad_norm": 0.27235889434814453, "learning_rate": 6.740293924501762e-08, "loss": 0.0066, "step": 225700 }, { "epoch": 1.9058917903358594, "grad_norm": 0.3819112181663513, "learning_rate": 6.728240631334982e-08, "loss": 0.0061, "step": 225710 }, { "epoch": 1.9059762301830234, "grad_norm": 0.22510451078414917, "learning_rate": 6.716198051929746e-08, "loss": 0.0084, "step": 225720 }, { "epoch": 1.9060606700301872, "grad_norm": 0.2185976207256317, "learning_rate": 6.704166186547567e-08, "loss": 0.0028, "step": 225730 }, { "epoch": 1.9061451098773512, "grad_norm": 0.18557021021842957, "learning_rate": 6.692145035449848e-08, "loss": 0.0073, "step": 225740 }, { "epoch": 1.906229549724515, "grad_norm": 0.032299503684043884, "learning_rate": 6.680134598897658e-08, "loss": 0.0043, "step": 225750 }, { "epoch": 1.9063139895716787, "grad_norm": 0.22014938294887543, "learning_rate": 6.668134877151844e-08, "loss": 0.0029, "step": 225760 }, { "epoch": 1.9063984294188427, "grad_norm": 0.04722791910171509, "learning_rate": 6.65614587047303e-08, "loss": 0.0042, "step": 225770 }, { "epoch": 1.9064828692660067, "grad_norm": 0.3452630043029785, "learning_rate": 6.644167579121618e-08, "loss": 0.0044, "step": 225780 }, { "epoch": 1.9065673091131705, "grad_norm": 0.13000640273094177, "learning_rate": 6.632200003357847e-08, "loss": 0.0095, "step": 225790 }, { "epoch": 1.9066517489603343, "grad_norm": 0.16648288071155548, "learning_rate": 6.620243143441563e-08, "loss": 0.0042, "step": 225800 }, { "epoch": 1.9067361888074983, "grad_norm": 0.23704136908054352, "learning_rate": 6.608296999632447e-08, "loss": 0.0075, "step": 225810 }, { "epoch": 1.9068206286546623, "grad_norm": 0.15906855463981628, "learning_rate": 6.596361572190125e-08, "loss": 0.0063, "step": 225820 }, { "epoch": 1.906905068501826, "grad_norm": 0.0004306399787310511, "learning_rate": 6.584436861373611e-08, "loss": 0.0031, "step": 225830 }, { "epoch": 1.9069895083489898, "grad_norm": 0.24972346425056458, "learning_rate": 6.572522867442089e-08, "loss": 0.0059, "step": 225840 }, { "epoch": 1.9070739481961536, "grad_norm": 0.3205699920654297, "learning_rate": 6.560619590654238e-08, "loss": 0.0057, "step": 225850 }, { "epoch": 1.9071583880433176, "grad_norm": 0.3391035497188568, "learning_rate": 6.548727031268576e-08, "loss": 0.0085, "step": 225860 }, { "epoch": 1.9072428278904816, "grad_norm": 0.6437113285064697, "learning_rate": 6.53684518954345e-08, "loss": 0.0105, "step": 225870 }, { "epoch": 1.9073272677376454, "grad_norm": 0.1523909717798233, "learning_rate": 6.524974065736988e-08, "loss": 0.0043, "step": 225880 }, { "epoch": 1.9074117075848092, "grad_norm": 0.5549122095108032, "learning_rate": 6.513113660106929e-08, "loss": 0.0055, "step": 225890 }, { "epoch": 1.9074961474319732, "grad_norm": 0.2878403663635254, "learning_rate": 6.501263972910954e-08, "loss": 0.008, "step": 225900 }, { "epoch": 1.9075805872791372, "grad_norm": 0.26063621044158936, "learning_rate": 6.489425004406357e-08, "loss": 0.0027, "step": 225910 }, { "epoch": 1.907665027126301, "grad_norm": 0.003984363283962011, "learning_rate": 6.477596754850268e-08, "loss": 0.0055, "step": 225920 }, { "epoch": 1.9077494669734647, "grad_norm": 0.18599089980125427, "learning_rate": 6.465779224499758e-08, "loss": 0.0035, "step": 225930 }, { "epoch": 1.9078339068206287, "grad_norm": 0.1293463110923767, "learning_rate": 6.453972413611287e-08, "loss": 0.0042, "step": 225940 }, { "epoch": 1.9079183466677927, "grad_norm": 0.18616339564323425, "learning_rate": 6.442176322441429e-08, "loss": 0.0072, "step": 225950 }, { "epoch": 1.9080027865149565, "grad_norm": 0.05236416310071945, "learning_rate": 6.430390951246369e-08, "loss": 0.0044, "step": 225960 }, { "epoch": 1.9080872263621202, "grad_norm": 0.2863905429840088, "learning_rate": 6.418616300282121e-08, "loss": 0.0052, "step": 225970 }, { "epoch": 1.908171666209284, "grad_norm": 0.3733927309513092, "learning_rate": 6.406852369804317e-08, "loss": 0.0072, "step": 225980 }, { "epoch": 1.908256106056448, "grad_norm": 0.3276865482330322, "learning_rate": 6.395099160068585e-08, "loss": 0.0081, "step": 225990 }, { "epoch": 1.908340545903612, "grad_norm": 0.28291434049606323, "learning_rate": 6.38335667133011e-08, "loss": 0.0096, "step": 226000 }, { "epoch": 1.9084249857507758, "grad_norm": 0.6632104516029358, "learning_rate": 6.371624903843964e-08, "loss": 0.0113, "step": 226010 }, { "epoch": 1.9085094255979396, "grad_norm": 0.2876826524734497, "learning_rate": 6.359903857865002e-08, "loss": 0.0065, "step": 226020 }, { "epoch": 1.9085938654451036, "grad_norm": 0.2649083435535431, "learning_rate": 6.348193533647795e-08, "loss": 0.0034, "step": 226030 }, { "epoch": 1.9086783052922676, "grad_norm": 0.27291521430015564, "learning_rate": 6.336493931446641e-08, "loss": 0.0045, "step": 226040 }, { "epoch": 1.9087627451394313, "grad_norm": 0.17747069895267487, "learning_rate": 6.324805051515614e-08, "loss": 0.0039, "step": 226050 }, { "epoch": 1.9088471849865951, "grad_norm": 0.48831406235694885, "learning_rate": 6.313126894108734e-08, "loss": 0.0086, "step": 226060 }, { "epoch": 1.9089316248337589, "grad_norm": 0.769463300704956, "learning_rate": 6.301459459479576e-08, "loss": 0.0089, "step": 226070 }, { "epoch": 1.9090160646809229, "grad_norm": 0.2160925567150116, "learning_rate": 6.289802747881546e-08, "loss": 0.0062, "step": 226080 }, { "epoch": 1.9091005045280869, "grad_norm": 0.14814041554927826, "learning_rate": 6.278156759567778e-08, "loss": 0.0079, "step": 226090 }, { "epoch": 1.9091849443752507, "grad_norm": 0.047434400767087936, "learning_rate": 6.266521494791344e-08, "loss": 0.0107, "step": 226100 }, { "epoch": 1.9092693842224144, "grad_norm": 0.5250747203826904, "learning_rate": 6.254896953804824e-08, "loss": 0.0049, "step": 226110 }, { "epoch": 1.9093538240695784, "grad_norm": 0.39425262808799744, "learning_rate": 6.243283136860789e-08, "loss": 0.0083, "step": 226120 }, { "epoch": 1.9094382639167424, "grad_norm": 0.00121596222743392, "learning_rate": 6.23168004421143e-08, "loss": 0.0069, "step": 226130 }, { "epoch": 1.9095227037639062, "grad_norm": 0.3175096809864044, "learning_rate": 6.220087676108876e-08, "loss": 0.0139, "step": 226140 }, { "epoch": 1.90960714361107, "grad_norm": 0.07887767255306244, "learning_rate": 6.208506032804818e-08, "loss": 0.0058, "step": 226150 }, { "epoch": 1.909691583458234, "grad_norm": 0.23395909368991852, "learning_rate": 6.196935114550773e-08, "loss": 0.0093, "step": 226160 }, { "epoch": 1.909776023305398, "grad_norm": 0.4212575852870941, "learning_rate": 6.185374921598153e-08, "loss": 0.0034, "step": 226170 }, { "epoch": 1.9098604631525617, "grad_norm": 0.33937302231788635, "learning_rate": 6.173825454197923e-08, "loss": 0.0048, "step": 226180 }, { "epoch": 1.9099449029997255, "grad_norm": 0.40510258078575134, "learning_rate": 6.162286712601107e-08, "loss": 0.0065, "step": 226190 }, { "epoch": 1.9100293428468893, "grad_norm": 0.020881401374936104, "learning_rate": 6.150758697058112e-08, "loss": 0.0066, "step": 226200 }, { "epoch": 1.9101137826940533, "grad_norm": 0.24750831723213196, "learning_rate": 6.139241407819519e-08, "loss": 0.0049, "step": 226210 }, { "epoch": 1.9101982225412173, "grad_norm": 0.4882330000400543, "learning_rate": 6.127734845135402e-08, "loss": 0.0058, "step": 226220 }, { "epoch": 1.910282662388381, "grad_norm": 0.21475662291049957, "learning_rate": 6.116239009255676e-08, "loss": 0.0052, "step": 226230 }, { "epoch": 1.9103671022355448, "grad_norm": 0.3057439625263214, "learning_rate": 6.104753900430027e-08, "loss": 0.0099, "step": 226240 }, { "epoch": 1.9104515420827088, "grad_norm": 0.23988761007785797, "learning_rate": 6.093279518907924e-08, "loss": 0.0073, "step": 226250 }, { "epoch": 1.9105359819298728, "grad_norm": 0.2361653745174408, "learning_rate": 6.081815864938501e-08, "loss": 0.0049, "step": 226260 }, { "epoch": 1.9106204217770366, "grad_norm": 0.6516200304031372, "learning_rate": 6.070362938770891e-08, "loss": 0.0092, "step": 226270 }, { "epoch": 1.9107048616242004, "grad_norm": 0.519891619682312, "learning_rate": 6.05892074065384e-08, "loss": 0.0078, "step": 226280 }, { "epoch": 1.9107893014713644, "grad_norm": 0.09741218388080597, "learning_rate": 6.047489270835705e-08, "loss": 0.0084, "step": 226290 }, { "epoch": 1.9108737413185282, "grad_norm": 0.07547377794981003, "learning_rate": 6.036068529564953e-08, "loss": 0.0091, "step": 226300 }, { "epoch": 1.9109581811656922, "grad_norm": 0.36692386865615845, "learning_rate": 6.024658517089554e-08, "loss": 0.0046, "step": 226310 }, { "epoch": 1.911042621012856, "grad_norm": 0.08758803457021713, "learning_rate": 6.01325923365731e-08, "loss": 0.0057, "step": 226320 }, { "epoch": 1.9111270608600197, "grad_norm": 0.9441208243370056, "learning_rate": 6.001870679515853e-08, "loss": 0.0067, "step": 226330 }, { "epoch": 1.9112115007071837, "grad_norm": 0.25551092624664307, "learning_rate": 5.990492854912599e-08, "loss": 0.0044, "step": 226340 }, { "epoch": 1.9112959405543477, "grad_norm": 0.2085283249616623, "learning_rate": 5.979125760094517e-08, "loss": 0.0021, "step": 226350 }, { "epoch": 1.9113803804015115, "grad_norm": 0.07693032920360565, "learning_rate": 5.967769395308687e-08, "loss": 0.0065, "step": 226360 }, { "epoch": 1.9114648202486753, "grad_norm": 0.3560539186000824, "learning_rate": 5.9564237608015777e-08, "loss": 0.0067, "step": 226370 }, { "epoch": 1.9115492600958393, "grad_norm": 0.26676398515701294, "learning_rate": 5.945088856819769e-08, "loss": 0.0069, "step": 226380 }, { "epoch": 1.9116336999430033, "grad_norm": 0.1498601883649826, "learning_rate": 5.933764683609344e-08, "loss": 0.0069, "step": 226390 }, { "epoch": 1.911718139790167, "grad_norm": 0.4252423048019409, "learning_rate": 5.9224512414163826e-08, "loss": 0.0117, "step": 226400 }, { "epoch": 1.9118025796373308, "grad_norm": 0.1329757422208786, "learning_rate": 5.911148530486466e-08, "loss": 0.0143, "step": 226410 }, { "epoch": 1.9118870194844946, "grad_norm": 0.12395310401916504, "learning_rate": 5.8998565510651754e-08, "loss": 0.0076, "step": 226420 }, { "epoch": 1.9119714593316586, "grad_norm": 0.17141595482826233, "learning_rate": 5.888575303397759e-08, "loss": 0.0051, "step": 226430 }, { "epoch": 1.9120558991788226, "grad_norm": 0.11844361573457718, "learning_rate": 5.877304787729188e-08, "loss": 0.0035, "step": 226440 }, { "epoch": 1.9121403390259863, "grad_norm": 0.06601590663194656, "learning_rate": 5.866045004304322e-08, "loss": 0.0048, "step": 226450 }, { "epoch": 1.9122247788731501, "grad_norm": 0.10606760531663895, "learning_rate": 5.854795953367742e-08, "loss": 0.0051, "step": 226460 }, { "epoch": 1.9123092187203141, "grad_norm": 0.23254790902137756, "learning_rate": 5.843557635163699e-08, "loss": 0.0068, "step": 226470 }, { "epoch": 1.9123936585674781, "grad_norm": 0.5919749140739441, "learning_rate": 5.83233004993633e-08, "loss": 0.0063, "step": 226480 }, { "epoch": 1.912478098414642, "grad_norm": 0.026491565629839897, "learning_rate": 5.8211131979294955e-08, "loss": 0.0071, "step": 226490 }, { "epoch": 1.9125625382618057, "grad_norm": 0.07359576970338821, "learning_rate": 5.8099070793867785e-08, "loss": 0.0087, "step": 226500 }, { "epoch": 1.9126469781089697, "grad_norm": 0.10459685325622559, "learning_rate": 5.798711694551651e-08, "loss": 0.0082, "step": 226510 }, { "epoch": 1.9127314179561337, "grad_norm": 0.05450589209794998, "learning_rate": 5.787527043667196e-08, "loss": 0.0069, "step": 226520 }, { "epoch": 1.9128158578032974, "grad_norm": 0.21642568707466125, "learning_rate": 5.776353126976386e-08, "loss": 0.006, "step": 226530 }, { "epoch": 1.9129002976504612, "grad_norm": 0.11429725587368011, "learning_rate": 5.7651899447219714e-08, "loss": 0.0116, "step": 226540 }, { "epoch": 1.912984737497625, "grad_norm": 0.29249995946884155, "learning_rate": 5.7540374971463144e-08, "loss": 0.0068, "step": 226550 }, { "epoch": 1.913069177344789, "grad_norm": 0.16095714271068573, "learning_rate": 5.742895784491664e-08, "loss": 0.0028, "step": 226560 }, { "epoch": 1.913153617191953, "grad_norm": 0.2512541115283966, "learning_rate": 5.731764807000051e-08, "loss": 0.0049, "step": 226570 }, { "epoch": 1.9132380570391168, "grad_norm": 0.013409717939794064, "learning_rate": 5.7206445649132245e-08, "loss": 0.0088, "step": 226580 }, { "epoch": 1.9133224968862805, "grad_norm": 0.1472811996936798, "learning_rate": 5.709535058472659e-08, "loss": 0.0063, "step": 226590 }, { "epoch": 1.9134069367334445, "grad_norm": 0.12604118883609772, "learning_rate": 5.698436287919773e-08, "loss": 0.0034, "step": 226600 }, { "epoch": 1.9134913765806085, "grad_norm": 0.8125227689743042, "learning_rate": 5.687348253495484e-08, "loss": 0.0139, "step": 226610 }, { "epoch": 1.9135758164277723, "grad_norm": 0.07699697464704514, "learning_rate": 5.676270955440766e-08, "loss": 0.0046, "step": 226620 }, { "epoch": 1.913660256274936, "grad_norm": 0.056902118027210236, "learning_rate": 5.665204393996149e-08, "loss": 0.006, "step": 226630 }, { "epoch": 1.9137446961220999, "grad_norm": 0.25561410188674927, "learning_rate": 5.654148569401996e-08, "loss": 0.0056, "step": 226640 }, { "epoch": 1.9138291359692639, "grad_norm": 0.11033398658037186, "learning_rate": 5.643103481898393e-08, "loss": 0.0054, "step": 226650 }, { "epoch": 1.9139135758164278, "grad_norm": 0.12612634897232056, "learning_rate": 5.632069131725259e-08, "loss": 0.0154, "step": 226660 }, { "epoch": 1.9139980156635916, "grad_norm": 0.3230104446411133, "learning_rate": 5.621045519122348e-08, "loss": 0.0068, "step": 226670 }, { "epoch": 1.9140824555107554, "grad_norm": 0.251948744058609, "learning_rate": 5.610032644328967e-08, "loss": 0.0074, "step": 226680 }, { "epoch": 1.9141668953579194, "grad_norm": 0.252266526222229, "learning_rate": 5.599030507584369e-08, "loss": 0.0061, "step": 226690 }, { "epoch": 1.9142513352050834, "grad_norm": 0.2931865155696869, "learning_rate": 5.58803910912753e-08, "loss": 0.0039, "step": 226700 }, { "epoch": 1.9143357750522472, "grad_norm": 0.001960062887519598, "learning_rate": 5.577058449197203e-08, "loss": 0.0088, "step": 226710 }, { "epoch": 1.914420214899411, "grad_norm": 0.4577837288379669, "learning_rate": 5.56608852803181e-08, "loss": 0.0136, "step": 226720 }, { "epoch": 1.914504654746575, "grad_norm": 0.47096243500709534, "learning_rate": 5.555129345869659e-08, "loss": 0.0154, "step": 226730 }, { "epoch": 1.914589094593739, "grad_norm": 0.08149727433919907, "learning_rate": 5.5441809029487814e-08, "loss": 0.0054, "step": 226740 }, { "epoch": 1.9146735344409027, "grad_norm": 0.09203334897756577, "learning_rate": 5.533243199506988e-08, "loss": 0.0068, "step": 226750 }, { "epoch": 1.9147579742880665, "grad_norm": 0.406417578458786, "learning_rate": 5.5223162357818106e-08, "loss": 0.0042, "step": 226760 }, { "epoch": 1.9148424141352303, "grad_norm": 0.22173263132572174, "learning_rate": 5.511400012010559e-08, "loss": 0.0072, "step": 226770 }, { "epoch": 1.9149268539823943, "grad_norm": 0.2607041299343109, "learning_rate": 5.500494528430433e-08, "loss": 0.0046, "step": 226780 }, { "epoch": 1.9150112938295583, "grad_norm": 0.2089405506849289, "learning_rate": 5.489599785278188e-08, "loss": 0.0063, "step": 226790 }, { "epoch": 1.915095733676722, "grad_norm": 0.39040446281433105, "learning_rate": 5.478715782790522e-08, "loss": 0.0073, "step": 226800 }, { "epoch": 1.9151801735238858, "grad_norm": 0.30098459124565125, "learning_rate": 5.467842521203803e-08, "loss": 0.0033, "step": 226810 }, { "epoch": 1.9152646133710498, "grad_norm": 0.10559961199760437, "learning_rate": 5.4569800007542306e-08, "loss": 0.0058, "step": 226820 }, { "epoch": 1.9153490532182138, "grad_norm": 0.25419074296951294, "learning_rate": 5.446128221677671e-08, "loss": 0.0059, "step": 226830 }, { "epoch": 1.9154334930653776, "grad_norm": 0.10516060143709183, "learning_rate": 5.435287184209881e-08, "loss": 0.0057, "step": 226840 }, { "epoch": 1.9155179329125414, "grad_norm": 0.23445738852024078, "learning_rate": 5.4244568885862825e-08, "loss": 0.0034, "step": 226850 }, { "epoch": 1.9156023727597054, "grad_norm": 0.2051437795162201, "learning_rate": 5.413637335042188e-08, "loss": 0.0031, "step": 226860 }, { "epoch": 1.9156868126068691, "grad_norm": 0.35203632712364197, "learning_rate": 5.40282852381252e-08, "loss": 0.0053, "step": 226870 }, { "epoch": 1.9157712524540331, "grad_norm": 0.09684418886899948, "learning_rate": 5.392030455132036e-08, "loss": 0.0046, "step": 226880 }, { "epoch": 1.915855692301197, "grad_norm": 0.15518403053283691, "learning_rate": 5.381243129235325e-08, "loss": 0.0046, "step": 226890 }, { "epoch": 1.9159401321483607, "grad_norm": 0.12520495057106018, "learning_rate": 5.3704665463566454e-08, "loss": 0.0033, "step": 226900 }, { "epoch": 1.9160245719955247, "grad_norm": 0.24175235629081726, "learning_rate": 5.359700706730086e-08, "loss": 0.0054, "step": 226910 }, { "epoch": 1.9161090118426887, "grad_norm": 0.1348254382610321, "learning_rate": 5.3489456105894064e-08, "loss": 0.006, "step": 226920 }, { "epoch": 1.9161934516898524, "grad_norm": 0.17909204959869385, "learning_rate": 5.338201258168307e-08, "loss": 0.0055, "step": 226930 }, { "epoch": 1.9162778915370162, "grad_norm": 0.35793620347976685, "learning_rate": 5.3274676497001024e-08, "loss": 0.0048, "step": 226940 }, { "epoch": 1.9163623313841802, "grad_norm": 0.3718341588973999, "learning_rate": 5.316744785417938e-08, "loss": 0.0029, "step": 226950 }, { "epoch": 1.9164467712313442, "grad_norm": 0.17691102623939514, "learning_rate": 5.306032665554739e-08, "loss": 0.0093, "step": 226960 }, { "epoch": 1.916531211078508, "grad_norm": 0.27253541350364685, "learning_rate": 5.295331290343153e-08, "loss": 0.0107, "step": 226970 }, { "epoch": 1.9166156509256718, "grad_norm": 0.02611682191491127, "learning_rate": 5.28464066001555e-08, "loss": 0.0065, "step": 226980 }, { "epoch": 1.9167000907728355, "grad_norm": 0.3013070225715637, "learning_rate": 5.273960774804132e-08, "loss": 0.0049, "step": 226990 }, { "epoch": 1.9167845306199995, "grad_norm": 0.13962982594966888, "learning_rate": 5.2632916349409924e-08, "loss": 0.007, "step": 227000 }, { "epoch": 1.9168689704671635, "grad_norm": 0.02832469530403614, "learning_rate": 5.252633240657723e-08, "loss": 0.0085, "step": 227010 }, { "epoch": 1.9169534103143273, "grad_norm": 0.08785637468099594, "learning_rate": 5.241985592185916e-08, "loss": 0.007, "step": 227020 }, { "epoch": 1.917037850161491, "grad_norm": 0.1299142688512802, "learning_rate": 5.2313486897567214e-08, "loss": 0.0063, "step": 227030 }, { "epoch": 1.917122290008655, "grad_norm": 0.20901960134506226, "learning_rate": 5.220722533601341e-08, "loss": 0.0038, "step": 227040 }, { "epoch": 1.917206729855819, "grad_norm": 0.17014211416244507, "learning_rate": 5.2101071239504254e-08, "loss": 0.0051, "step": 227050 }, { "epoch": 1.9172911697029829, "grad_norm": 0.19013682007789612, "learning_rate": 5.1995024610346224e-08, "loss": 0.0038, "step": 227060 }, { "epoch": 1.9173756095501466, "grad_norm": 0.20338034629821777, "learning_rate": 5.188908545084193e-08, "loss": 0.0089, "step": 227070 }, { "epoch": 1.9174600493973106, "grad_norm": 0.21308757364749908, "learning_rate": 5.1783253763292854e-08, "loss": 0.005, "step": 227080 }, { "epoch": 1.9175444892444746, "grad_norm": 0.6052597165107727, "learning_rate": 5.1677529549997716e-08, "loss": 0.008, "step": 227090 }, { "epoch": 1.9176289290916384, "grad_norm": 0.2855571508407593, "learning_rate": 5.157191281325191e-08, "loss": 0.0063, "step": 227100 }, { "epoch": 1.9177133689388022, "grad_norm": 0.008589601144194603, "learning_rate": 5.146640355535082e-08, "loss": 0.0068, "step": 227110 }, { "epoch": 1.917797808785966, "grad_norm": 0.08688801527023315, "learning_rate": 5.136100177858538e-08, "loss": 0.0059, "step": 227120 }, { "epoch": 1.91788224863313, "grad_norm": 0.1153729259967804, "learning_rate": 5.125570748524489e-08, "loss": 0.0065, "step": 227130 }, { "epoch": 1.917966688480294, "grad_norm": 0.23418371379375458, "learning_rate": 5.115052067761639e-08, "loss": 0.0069, "step": 227140 }, { "epoch": 1.9180511283274577, "grad_norm": 0.21638891100883484, "learning_rate": 5.104544135798417e-08, "loss": 0.0043, "step": 227150 }, { "epoch": 1.9181355681746215, "grad_norm": 0.11379428952932358, "learning_rate": 5.094046952863085e-08, "loss": 0.0052, "step": 227160 }, { "epoch": 1.9182200080217855, "grad_norm": 0.20509713888168335, "learning_rate": 5.083560519183683e-08, "loss": 0.0075, "step": 227170 }, { "epoch": 1.9183044478689495, "grad_norm": 0.04405948892235756, "learning_rate": 5.073084834987862e-08, "loss": 0.0042, "step": 227180 }, { "epoch": 1.9183888877161133, "grad_norm": 0.3502923250198364, "learning_rate": 5.062619900503274e-08, "loss": 0.0068, "step": 227190 }, { "epoch": 1.918473327563277, "grad_norm": 0.1457347571849823, "learning_rate": 5.052165715957125e-08, "loss": 0.0081, "step": 227200 }, { "epoch": 1.918557767410441, "grad_norm": 0.7658582329750061, "learning_rate": 5.0417222815765664e-08, "loss": 0.0039, "step": 227210 }, { "epoch": 1.9186422072576048, "grad_norm": 0.5146938562393188, "learning_rate": 5.031289597588307e-08, "loss": 0.0042, "step": 227220 }, { "epoch": 1.9187266471047688, "grad_norm": 0.19316305220127106, "learning_rate": 5.020867664218998e-08, "loss": 0.007, "step": 227230 }, { "epoch": 1.9188110869519326, "grad_norm": 0.16640183329582214, "learning_rate": 5.0104564816950696e-08, "loss": 0.0058, "step": 227240 }, { "epoch": 1.9188955267990964, "grad_norm": 0.1457521766424179, "learning_rate": 5.000056050242563e-08, "loss": 0.0041, "step": 227250 }, { "epoch": 1.9189799666462604, "grad_norm": 0.35210803151130676, "learning_rate": 4.989666370087409e-08, "loss": 0.0056, "step": 227260 }, { "epoch": 1.9190644064934244, "grad_norm": 0.4049127697944641, "learning_rate": 4.97928744145526e-08, "loss": 0.0042, "step": 227270 }, { "epoch": 1.9191488463405881, "grad_norm": 0.2665548622608185, "learning_rate": 4.968919264571548e-08, "loss": 0.005, "step": 227280 }, { "epoch": 1.919233286187752, "grad_norm": 0.3352099657058716, "learning_rate": 4.9585618396614795e-08, "loss": 0.0057, "step": 227290 }, { "epoch": 1.919317726034916, "grad_norm": 0.15574558079242706, "learning_rate": 4.948215166950043e-08, "loss": 0.0038, "step": 227300 }, { "epoch": 1.91940216588208, "grad_norm": 0.3277156949043274, "learning_rate": 4.937879246661836e-08, "loss": 0.0039, "step": 227310 }, { "epoch": 1.9194866057292437, "grad_norm": 0.4515138566493988, "learning_rate": 4.9275540790215125e-08, "loss": 0.0075, "step": 227320 }, { "epoch": 1.9195710455764075, "grad_norm": 0.1914934366941452, "learning_rate": 4.9172396642532816e-08, "loss": 0.0061, "step": 227330 }, { "epoch": 1.9196554854235712, "grad_norm": 0.3486239016056061, "learning_rate": 4.906936002581075e-08, "loss": 0.006, "step": 227340 }, { "epoch": 1.9197399252707352, "grad_norm": 0.5752688646316528, "learning_rate": 4.8966430942288255e-08, "loss": 0.0065, "step": 227350 }, { "epoch": 1.9198243651178992, "grad_norm": 0.3320223391056061, "learning_rate": 4.8863609394200205e-08, "loss": 0.0113, "step": 227360 }, { "epoch": 1.919908804965063, "grad_norm": 0.16939161717891693, "learning_rate": 4.8760895383779814e-08, "loss": 0.0025, "step": 227370 }, { "epoch": 1.9199932448122268, "grad_norm": 0.36721983551979065, "learning_rate": 4.8658288913258076e-08, "loss": 0.0068, "step": 227380 }, { "epoch": 1.9200776846593908, "grad_norm": 0.13497589528560638, "learning_rate": 4.855578998486377e-08, "loss": 0.0109, "step": 227390 }, { "epoch": 1.9201621245065548, "grad_norm": 0.045580990612506866, "learning_rate": 4.845339860082343e-08, "loss": 0.0066, "step": 227400 }, { "epoch": 1.9202465643537185, "grad_norm": 0.2287340611219406, "learning_rate": 4.83511147633603e-08, "loss": 0.005, "step": 227410 }, { "epoch": 1.9203310042008823, "grad_norm": 0.3198543190956116, "learning_rate": 4.8248938474695916e-08, "loss": 0.0084, "step": 227420 }, { "epoch": 1.9204154440480463, "grad_norm": 0.11612551659345627, "learning_rate": 4.814686973705018e-08, "loss": 0.0085, "step": 227430 }, { "epoch": 1.9204998838952103, "grad_norm": 0.11021290719509125, "learning_rate": 4.8044908552640216e-08, "loss": 0.0075, "step": 227440 }, { "epoch": 1.920584323742374, "grad_norm": 0.08694673329591751, "learning_rate": 4.7943054923679786e-08, "loss": 0.0048, "step": 227450 }, { "epoch": 1.9206687635895379, "grad_norm": 0.2105581909418106, "learning_rate": 4.784130885238103e-08, "loss": 0.0081, "step": 227460 }, { "epoch": 1.9207532034367016, "grad_norm": 0.21332691609859467, "learning_rate": 4.773967034095439e-08, "loss": 0.0037, "step": 227470 }, { "epoch": 1.9208376432838656, "grad_norm": 0.2171107679605484, "learning_rate": 4.763813939160755e-08, "loss": 0.0052, "step": 227480 }, { "epoch": 1.9209220831310296, "grad_norm": 0.3710320293903351, "learning_rate": 4.753671600654486e-08, "loss": 0.0088, "step": 227490 }, { "epoch": 1.9210065229781934, "grad_norm": 0.3297215700149536, "learning_rate": 4.743540018797066e-08, "loss": 0.0038, "step": 227500 }, { "epoch": 1.9210909628253572, "grad_norm": 0.008298737928271294, "learning_rate": 4.733419193808431e-08, "loss": 0.0048, "step": 227510 }, { "epoch": 1.9211754026725212, "grad_norm": 0.5274357795715332, "learning_rate": 4.72330912590846e-08, "loss": 0.0089, "step": 227520 }, { "epoch": 1.9212598425196852, "grad_norm": 0.24723432958126068, "learning_rate": 4.7132098153166995e-08, "loss": 0.0151, "step": 227530 }, { "epoch": 1.921344282366849, "grad_norm": 0.37176036834716797, "learning_rate": 4.7031212622525304e-08, "loss": 0.0063, "step": 227540 }, { "epoch": 1.9214287222140127, "grad_norm": 0.10232467204332352, "learning_rate": 4.6930434669350545e-08, "loss": 0.0057, "step": 227550 }, { "epoch": 1.9215131620611765, "grad_norm": 0.31301984190940857, "learning_rate": 4.682976429583208e-08, "loss": 0.0052, "step": 227560 }, { "epoch": 1.9215976019083405, "grad_norm": 0.14467650651931763, "learning_rate": 4.6729201504156494e-08, "loss": 0.0068, "step": 227570 }, { "epoch": 1.9216820417555045, "grad_norm": 0.3363402485847473, "learning_rate": 4.6628746296507045e-08, "loss": 0.0083, "step": 227580 }, { "epoch": 1.9217664816026683, "grad_norm": 0.42187008261680603, "learning_rate": 4.6528398675066425e-08, "loss": 0.0066, "step": 227590 }, { "epoch": 1.921850921449832, "grad_norm": 0.08111193031072617, "learning_rate": 4.642815864201344e-08, "loss": 0.0071, "step": 227600 }, { "epoch": 1.921935361296996, "grad_norm": 0.39989688992500305, "learning_rate": 4.6328026199526364e-08, "loss": 0.0065, "step": 227610 }, { "epoch": 1.92201980114416, "grad_norm": 0.3667726516723633, "learning_rate": 4.6228001349779005e-08, "loss": 0.0055, "step": 227620 }, { "epoch": 1.9221042409913238, "grad_norm": 0.21495169401168823, "learning_rate": 4.6128084094944626e-08, "loss": 0.0056, "step": 227630 }, { "epoch": 1.9221886808384876, "grad_norm": 0.23689782619476318, "learning_rate": 4.602827443719315e-08, "loss": 0.0036, "step": 227640 }, { "epoch": 1.9222731206856516, "grad_norm": 0.06274304538965225, "learning_rate": 4.592857237869231e-08, "loss": 0.0034, "step": 227650 }, { "epoch": 1.9223575605328156, "grad_norm": 0.8220347762107849, "learning_rate": 4.582897792160756e-08, "loss": 0.0074, "step": 227660 }, { "epoch": 1.9224420003799794, "grad_norm": 0.32132217288017273, "learning_rate": 4.57294910681022e-08, "loss": 0.0088, "step": 227670 }, { "epoch": 1.9225264402271431, "grad_norm": 0.1928189992904663, "learning_rate": 4.563011182033727e-08, "loss": 0.0035, "step": 227680 }, { "epoch": 1.922610880074307, "grad_norm": 0.25113987922668457, "learning_rate": 4.5530840180471044e-08, "loss": 0.0053, "step": 227690 }, { "epoch": 1.922695319921471, "grad_norm": 0.2822715640068054, "learning_rate": 4.543167615066013e-08, "loss": 0.0021, "step": 227700 }, { "epoch": 1.922779759768635, "grad_norm": 0.04602190852165222, "learning_rate": 4.533261973305725e-08, "loss": 0.0066, "step": 227710 }, { "epoch": 1.9228641996157987, "grad_norm": 0.14424794912338257, "learning_rate": 4.523367092981456e-08, "loss": 0.0064, "step": 227720 }, { "epoch": 1.9229486394629625, "grad_norm": 0.4172336757183075, "learning_rate": 4.513482974308148e-08, "loss": 0.0082, "step": 227730 }, { "epoch": 1.9230330793101265, "grad_norm": 0.030445272102952003, "learning_rate": 4.5036096175004595e-08, "loss": 0.0036, "step": 227740 }, { "epoch": 1.9231175191572905, "grad_norm": 0.15438312292099, "learning_rate": 4.4937470227727764e-08, "loss": 0.0051, "step": 227750 }, { "epoch": 1.9232019590044542, "grad_norm": 0.3889531195163727, "learning_rate": 4.483895190339427e-08, "loss": 0.008, "step": 227760 }, { "epoch": 1.923286398851618, "grad_norm": 0.256925493478775, "learning_rate": 4.474054120414351e-08, "loss": 0.0078, "step": 227770 }, { "epoch": 1.923370838698782, "grad_norm": 0.14865003526210785, "learning_rate": 4.4642238132112106e-08, "loss": 0.0069, "step": 227780 }, { "epoch": 1.9234552785459458, "grad_norm": 0.2345270812511444, "learning_rate": 4.4544042689436126e-08, "loss": 0.0068, "step": 227790 }, { "epoch": 1.9235397183931098, "grad_norm": 0.5029885768890381, "learning_rate": 4.444595487824832e-08, "loss": 0.0049, "step": 227800 }, { "epoch": 1.9236241582402736, "grad_norm": 0.17347005009651184, "learning_rate": 4.4347974700678643e-08, "loss": 0.0045, "step": 227810 }, { "epoch": 1.9237085980874373, "grad_norm": 0.0788511112332344, "learning_rate": 4.425010215885539e-08, "loss": 0.0044, "step": 227820 }, { "epoch": 1.9237930379346013, "grad_norm": 0.1564953774213791, "learning_rate": 4.415233725490464e-08, "loss": 0.0083, "step": 227830 }, { "epoch": 1.9238774777817653, "grad_norm": 0.3298541307449341, "learning_rate": 4.4054679990949146e-08, "loss": 0.007, "step": 227840 }, { "epoch": 1.923961917628929, "grad_norm": 0.0013718566624447703, "learning_rate": 4.395713036911109e-08, "loss": 0.0043, "step": 227850 }, { "epoch": 1.9240463574760929, "grad_norm": 0.08022339642047882, "learning_rate": 4.385968839150823e-08, "loss": 0.0033, "step": 227860 }, { "epoch": 1.9241307973232569, "grad_norm": 0.31430694460868835, "learning_rate": 4.3762354060257195e-08, "loss": 0.0079, "step": 227870 }, { "epoch": 1.9242152371704209, "grad_norm": 0.46716776490211487, "learning_rate": 4.366512737747186e-08, "loss": 0.0031, "step": 227880 }, { "epoch": 1.9242996770175846, "grad_norm": 0.08185118436813354, "learning_rate": 4.3568008345264975e-08, "loss": 0.0042, "step": 227890 }, { "epoch": 1.9243841168647484, "grad_norm": 0.02741219289600849, "learning_rate": 4.347099696574486e-08, "loss": 0.0037, "step": 227900 }, { "epoch": 1.9244685567119122, "grad_norm": 0.22028490900993347, "learning_rate": 4.337409324101926e-08, "loss": 0.0059, "step": 227910 }, { "epoch": 1.9245529965590762, "grad_norm": 0.34299883246421814, "learning_rate": 4.3277297173192625e-08, "loss": 0.0074, "step": 227920 }, { "epoch": 1.9246374364062402, "grad_norm": 0.43781426548957825, "learning_rate": 4.318060876436714e-08, "loss": 0.0067, "step": 227930 }, { "epoch": 1.924721876253404, "grad_norm": 0.2797102630138397, "learning_rate": 4.3084028016643906e-08, "loss": 0.0098, "step": 227940 }, { "epoch": 1.9248063161005677, "grad_norm": 0.1559383124113083, "learning_rate": 4.298755493211903e-08, "loss": 0.0045, "step": 227950 }, { "epoch": 1.9248907559477317, "grad_norm": 0.22147060930728912, "learning_rate": 4.289118951288862e-08, "loss": 0.0049, "step": 227960 }, { "epoch": 1.9249751957948957, "grad_norm": 0.07873614877462387, "learning_rate": 4.279493176104599e-08, "loss": 0.0061, "step": 227970 }, { "epoch": 1.9250596356420595, "grad_norm": 0.314901739358902, "learning_rate": 4.269878167868169e-08, "loss": 0.0052, "step": 227980 }, { "epoch": 1.9251440754892233, "grad_norm": 0.3279353082180023, "learning_rate": 4.2602739267883494e-08, "loss": 0.006, "step": 227990 }, { "epoch": 1.9252285153363873, "grad_norm": 0.11919005960226059, "learning_rate": 4.250680453073863e-08, "loss": 0.0046, "step": 228000 }, { "epoch": 1.9253129551835513, "grad_norm": 0.27310922741889954, "learning_rate": 4.241097746932987e-08, "loss": 0.0085, "step": 228010 }, { "epoch": 1.925397395030715, "grad_norm": 0.003506877226755023, "learning_rate": 4.231525808573889e-08, "loss": 0.0056, "step": 228020 }, { "epoch": 1.9254818348778788, "grad_norm": 0.25517651438713074, "learning_rate": 4.221964638204401e-08, "loss": 0.0045, "step": 228030 }, { "epoch": 1.9255662747250426, "grad_norm": 0.05421469733119011, "learning_rate": 4.212414236032303e-08, "loss": 0.004, "step": 228040 }, { "epoch": 1.9256507145722066, "grad_norm": 0.18887078762054443, "learning_rate": 4.202874602264984e-08, "loss": 0.005, "step": 228050 }, { "epoch": 1.9257351544193706, "grad_norm": 0.24935179948806763, "learning_rate": 4.1933457371095556e-08, "loss": 0.0036, "step": 228060 }, { "epoch": 1.9258195942665344, "grad_norm": 0.24148212373256683, "learning_rate": 4.18382764077313e-08, "loss": 0.0041, "step": 228070 }, { "epoch": 1.9259040341136981, "grad_norm": 0.4453607201576233, "learning_rate": 4.174320313462321e-08, "loss": 0.0069, "step": 228080 }, { "epoch": 1.9259884739608621, "grad_norm": 0.07448235899209976, "learning_rate": 4.164823755383684e-08, "loss": 0.0045, "step": 228090 }, { "epoch": 1.9260729138080261, "grad_norm": 0.131569042801857, "learning_rate": 4.155337966743445e-08, "loss": 0.0043, "step": 228100 }, { "epoch": 1.92615735365519, "grad_norm": 0.27261337637901306, "learning_rate": 4.145862947747714e-08, "loss": 0.0078, "step": 228110 }, { "epoch": 1.9262417935023537, "grad_norm": 0.14187555015087128, "learning_rate": 4.136398698602162e-08, "loss": 0.0068, "step": 228120 }, { "epoch": 1.9263262333495177, "grad_norm": 0.0019934820011258125, "learning_rate": 4.126945219512457e-08, "loss": 0.002, "step": 228130 }, { "epoch": 1.9264106731966815, "grad_norm": 0.13307072222232819, "learning_rate": 4.1175025106838795e-08, "loss": 0.0042, "step": 228140 }, { "epoch": 1.9264951130438455, "grad_norm": 0.35396677255630493, "learning_rate": 4.108070572321543e-08, "loss": 0.0059, "step": 228150 }, { "epoch": 1.9265795528910092, "grad_norm": 0.2584422826766968, "learning_rate": 4.098649404630339e-08, "loss": 0.0075, "step": 228160 }, { "epoch": 1.926663992738173, "grad_norm": 0.3909796178340912, "learning_rate": 4.0892390078147715e-08, "loss": 0.0083, "step": 228170 }, { "epoch": 1.926748432585337, "grad_norm": 0.2196323722600937, "learning_rate": 4.0798393820793424e-08, "loss": 0.0099, "step": 228180 }, { "epoch": 1.926832872432501, "grad_norm": 0.2755897641181946, "learning_rate": 4.070450527628167e-08, "loss": 0.0037, "step": 228190 }, { "epoch": 1.9269173122796648, "grad_norm": 0.21846875548362732, "learning_rate": 4.0610724446651925e-08, "loss": 0.0039, "step": 228200 }, { "epoch": 1.9270017521268286, "grad_norm": 0.14104335010051727, "learning_rate": 4.0517051333940904e-08, "loss": 0.0073, "step": 228210 }, { "epoch": 1.9270861919739926, "grad_norm": 0.15706056356430054, "learning_rate": 4.042348594018364e-08, "loss": 0.0051, "step": 228220 }, { "epoch": 1.9271706318211566, "grad_norm": 0.11487626284360886, "learning_rate": 4.033002826741128e-08, "loss": 0.0033, "step": 228230 }, { "epoch": 1.9272550716683203, "grad_norm": 0.058751143515110016, "learning_rate": 4.023667831765443e-08, "loss": 0.0042, "step": 228240 }, { "epoch": 1.927339511515484, "grad_norm": 0.5317890048027039, "learning_rate": 4.014343609294091e-08, "loss": 0.0119, "step": 228250 }, { "epoch": 1.9274239513626479, "grad_norm": 0.0673193410038948, "learning_rate": 4.005030159529577e-08, "loss": 0.0036, "step": 228260 }, { "epoch": 1.9275083912098119, "grad_norm": 0.1171029731631279, "learning_rate": 3.995727482674128e-08, "loss": 0.0066, "step": 228270 }, { "epoch": 1.9275928310569759, "grad_norm": 0.2514127492904663, "learning_rate": 3.9864355789298595e-08, "loss": 0.0028, "step": 228280 }, { "epoch": 1.9276772709041397, "grad_norm": 0.29754045605659485, "learning_rate": 3.977154448498555e-08, "loss": 0.0071, "step": 228290 }, { "epoch": 1.9277617107513034, "grad_norm": 0.10205140709877014, "learning_rate": 3.9678840915818305e-08, "loss": 0.006, "step": 228300 }, { "epoch": 1.9278461505984674, "grad_norm": 0.2777486741542816, "learning_rate": 3.9586245083809705e-08, "loss": 0.0078, "step": 228310 }, { "epoch": 1.9279305904456314, "grad_norm": 0.247954860329628, "learning_rate": 3.9493756990971464e-08, "loss": 0.0064, "step": 228320 }, { "epoch": 1.9280150302927952, "grad_norm": 0.26966843008995056, "learning_rate": 3.940137663931254e-08, "loss": 0.0095, "step": 228330 }, { "epoch": 1.928099470139959, "grad_norm": 0.2063656896352768, "learning_rate": 3.9309104030839654e-08, "loss": 0.005, "step": 228340 }, { "epoch": 1.928183909987123, "grad_norm": 0.06647753715515137, "learning_rate": 3.92169391675562e-08, "loss": 0.0061, "step": 228350 }, { "epoch": 1.928268349834287, "grad_norm": 0.5877166986465454, "learning_rate": 3.912488205146392e-08, "loss": 0.0069, "step": 228360 }, { "epoch": 1.9283527896814507, "grad_norm": 0.6959238052368164, "learning_rate": 3.903293268456288e-08, "loss": 0.0112, "step": 228370 }, { "epoch": 1.9284372295286145, "grad_norm": 0.6108646392822266, "learning_rate": 3.8941091068849244e-08, "loss": 0.007, "step": 228380 }, { "epoch": 1.9285216693757783, "grad_norm": 0.37217971682548523, "learning_rate": 3.884935720631922e-08, "loss": 0.007, "step": 228390 }, { "epoch": 1.9286061092229423, "grad_norm": 0.40512290596961975, "learning_rate": 3.875773109896452e-08, "loss": 0.0065, "step": 228400 }, { "epoch": 1.9286905490701063, "grad_norm": 0.12329574674367905, "learning_rate": 3.8666212748775244e-08, "loss": 0.0058, "step": 228410 }, { "epoch": 1.92877498891727, "grad_norm": 0.2082187682390213, "learning_rate": 3.857480215773868e-08, "loss": 0.005, "step": 228420 }, { "epoch": 1.9288594287644338, "grad_norm": 0.08082786947488785, "learning_rate": 3.8483499327841566e-08, "loss": 0.0052, "step": 228430 }, { "epoch": 1.9289438686115978, "grad_norm": 0.16746769845485687, "learning_rate": 3.839230426106566e-08, "loss": 0.0038, "step": 228440 }, { "epoch": 1.9290283084587618, "grad_norm": 0.30152761936187744, "learning_rate": 3.830121695939215e-08, "loss": 0.0041, "step": 228450 }, { "epoch": 1.9291127483059256, "grad_norm": 0.23232464492321014, "learning_rate": 3.821023742479946e-08, "loss": 0.0035, "step": 228460 }, { "epoch": 1.9291971881530894, "grad_norm": 0.827794075012207, "learning_rate": 3.811936565926322e-08, "loss": 0.0142, "step": 228470 }, { "epoch": 1.9292816280002532, "grad_norm": 0.09642787277698517, "learning_rate": 3.8028601664757966e-08, "loss": 0.005, "step": 228480 }, { "epoch": 1.9293660678474172, "grad_norm": 1.2411231994628906, "learning_rate": 3.7937945443254907e-08, "loss": 0.0102, "step": 228490 }, { "epoch": 1.9294505076945812, "grad_norm": 0.0478825643658638, "learning_rate": 3.7847396996722464e-08, "loss": 0.01, "step": 228500 }, { "epoch": 1.929534947541745, "grad_norm": 0.2591250240802765, "learning_rate": 3.775695632712795e-08, "loss": 0.0072, "step": 228510 }, { "epoch": 1.9296193873889087, "grad_norm": 1.0884318351745605, "learning_rate": 3.76666234364359e-08, "loss": 0.0102, "step": 228520 }, { "epoch": 1.9297038272360727, "grad_norm": 0.08409477025270462, "learning_rate": 3.757639832660753e-08, "loss": 0.0047, "step": 228530 }, { "epoch": 1.9297882670832367, "grad_norm": 0.7923184633255005, "learning_rate": 3.748628099960239e-08, "loss": 0.0091, "step": 228540 }, { "epoch": 1.9298727069304005, "grad_norm": 0.3545224368572235, "learning_rate": 3.7396271457378895e-08, "loss": 0.0013, "step": 228550 }, { "epoch": 1.9299571467775642, "grad_norm": 0.11450465023517609, "learning_rate": 3.730636970189105e-08, "loss": 0.0053, "step": 228560 }, { "epoch": 1.9300415866247282, "grad_norm": 0.5567647218704224, "learning_rate": 3.72165757350923e-08, "loss": 0.0098, "step": 228570 }, { "epoch": 1.9301260264718922, "grad_norm": 0.21523691713809967, "learning_rate": 3.7126889558932736e-08, "loss": 0.0037, "step": 228580 }, { "epoch": 1.930210466319056, "grad_norm": 0.40601441264152527, "learning_rate": 3.703731117535969e-08, "loss": 0.0123, "step": 228590 }, { "epoch": 1.9302949061662198, "grad_norm": 0.24617145955562592, "learning_rate": 3.694784058631939e-08, "loss": 0.0066, "step": 228600 }, { "epoch": 1.9303793460133836, "grad_norm": 0.6071755886077881, "learning_rate": 3.6858477793754735e-08, "loss": 0.0095, "step": 228610 }, { "epoch": 1.9304637858605476, "grad_norm": 0.10247346013784409, "learning_rate": 3.676922279960693e-08, "loss": 0.0075, "step": 228620 }, { "epoch": 1.9305482257077116, "grad_norm": 0.07149253785610199, "learning_rate": 3.668007560581499e-08, "loss": 0.0065, "step": 228630 }, { "epoch": 1.9306326655548753, "grad_norm": 0.17457351088523865, "learning_rate": 3.659103621431459e-08, "loss": 0.0016, "step": 228640 }, { "epoch": 1.9307171054020391, "grad_norm": 0.22048072516918182, "learning_rate": 3.650210462703918e-08, "loss": 0.0066, "step": 228650 }, { "epoch": 1.930801545249203, "grad_norm": 0.3143162131309509, "learning_rate": 3.641328084592166e-08, "loss": 0.008, "step": 228660 }, { "epoch": 1.930885985096367, "grad_norm": 0.04070010036230087, "learning_rate": 3.632456487289049e-08, "loss": 0.0032, "step": 228670 }, { "epoch": 1.9309704249435309, "grad_norm": 0.1712469458580017, "learning_rate": 3.623595670987245e-08, "loss": 0.0048, "step": 228680 }, { "epoch": 1.9310548647906947, "grad_norm": 0.08792171627283096, "learning_rate": 3.6147456358791575e-08, "loss": 0.0047, "step": 228690 }, { "epoch": 1.9311393046378587, "grad_norm": 0.4856005907058716, "learning_rate": 3.6059063821571874e-08, "loss": 0.0048, "step": 228700 }, { "epoch": 1.9312237444850224, "grad_norm": 0.3329758942127228, "learning_rate": 3.597077910013125e-08, "loss": 0.0049, "step": 228710 }, { "epoch": 1.9313081843321864, "grad_norm": 0.2636755108833313, "learning_rate": 3.588260219638873e-08, "loss": 0.0029, "step": 228720 }, { "epoch": 1.9313926241793502, "grad_norm": 0.19722196459770203, "learning_rate": 3.579453311225833e-08, "loss": 0.0119, "step": 228730 }, { "epoch": 1.931477064026514, "grad_norm": 0.23835767805576324, "learning_rate": 3.570657184965354e-08, "loss": 0.0078, "step": 228740 }, { "epoch": 1.931561503873678, "grad_norm": 0.0989077091217041, "learning_rate": 3.561871841048503e-08, "loss": 0.0069, "step": 228750 }, { "epoch": 1.931645943720842, "grad_norm": 0.3413108289241791, "learning_rate": 3.553097279666073e-08, "loss": 0.0068, "step": 228760 }, { "epoch": 1.9317303835680057, "grad_norm": 0.06432978808879852, "learning_rate": 3.544333501008634e-08, "loss": 0.0126, "step": 228770 }, { "epoch": 1.9318148234151695, "grad_norm": 0.23176737129688263, "learning_rate": 3.535580505266478e-08, "loss": 0.0072, "step": 228780 }, { "epoch": 1.9318992632623335, "grad_norm": 0.014082776382565498, "learning_rate": 3.5268382926298414e-08, "loss": 0.006, "step": 228790 }, { "epoch": 1.9319837031094975, "grad_norm": 0.025269852951169014, "learning_rate": 3.518106863288462e-08, "loss": 0.0073, "step": 228800 }, { "epoch": 1.9320681429566613, "grad_norm": 0.1388293355703354, "learning_rate": 3.509386217432131e-08, "loss": 0.0071, "step": 228810 }, { "epoch": 1.932152582803825, "grad_norm": 0.1299666166305542, "learning_rate": 3.500676355250143e-08, "loss": 0.0078, "step": 228820 }, { "epoch": 1.9322370226509888, "grad_norm": 0.28916776180267334, "learning_rate": 3.4919772769317904e-08, "loss": 0.0076, "step": 228830 }, { "epoch": 1.9323214624981528, "grad_norm": 0.15831482410430908, "learning_rate": 3.4832889826659223e-08, "loss": 0.0046, "step": 228840 }, { "epoch": 1.9324059023453168, "grad_norm": 0.04303152114152908, "learning_rate": 3.474611472641276e-08, "loss": 0.0076, "step": 228850 }, { "epoch": 1.9324903421924806, "grad_norm": 0.10287712514400482, "learning_rate": 3.465944747046257e-08, "loss": 0.0037, "step": 228860 }, { "epoch": 1.9325747820396444, "grad_norm": 0.23786203563213348, "learning_rate": 3.4572888060692146e-08, "loss": 0.0054, "step": 228870 }, { "epoch": 1.9326592218868084, "grad_norm": 0.08302167057991028, "learning_rate": 3.4486436498981645e-08, "loss": 0.0026, "step": 228880 }, { "epoch": 1.9327436617339724, "grad_norm": 0.19114914536476135, "learning_rate": 3.4400092787207354e-08, "loss": 0.0055, "step": 228890 }, { "epoch": 1.9328281015811362, "grad_norm": 0.23498748242855072, "learning_rate": 3.431385692724609e-08, "loss": 0.0044, "step": 228900 }, { "epoch": 1.9329125414283, "grad_norm": 0.13057322800159454, "learning_rate": 3.4227728920969705e-08, "loss": 0.0041, "step": 228910 }, { "epoch": 1.932996981275464, "grad_norm": 0.10229771584272385, "learning_rate": 3.414170877025003e-08, "loss": 0.0072, "step": 228920 }, { "epoch": 1.933081421122628, "grad_norm": 0.07773087173700333, "learning_rate": 3.405579647695445e-08, "loss": 0.0064, "step": 228930 }, { "epoch": 1.9331658609697917, "grad_norm": 0.1980065405368805, "learning_rate": 3.3969992042949265e-08, "loss": 0.0066, "step": 228940 }, { "epoch": 1.9332503008169555, "grad_norm": 0.04492729529738426, "learning_rate": 3.388429547009853e-08, "loss": 0.0046, "step": 228950 }, { "epoch": 1.9333347406641193, "grad_norm": 0.31291335821151733, "learning_rate": 3.379870676026298e-08, "loss": 0.0043, "step": 228960 }, { "epoch": 1.9334191805112833, "grad_norm": 0.17994992434978485, "learning_rate": 3.371322591530224e-08, "loss": 0.0047, "step": 228970 }, { "epoch": 1.9335036203584473, "grad_norm": 0.09004063159227371, "learning_rate": 3.362785293707205e-08, "loss": 0.0042, "step": 228980 }, { "epoch": 1.933588060205611, "grad_norm": 0.0927920788526535, "learning_rate": 3.3542587827427584e-08, "loss": 0.0046, "step": 228990 }, { "epoch": 1.9336725000527748, "grad_norm": 0.385239839553833, "learning_rate": 3.3457430588220705e-08, "loss": 0.0075, "step": 229000 }, { "epoch": 1.9337569398999388, "grad_norm": 0.10973197966814041, "learning_rate": 3.337238122129993e-08, "loss": 0.0081, "step": 229010 }, { "epoch": 1.9338413797471028, "grad_norm": 0.009755044244229794, "learning_rate": 3.3287439728513784e-08, "loss": 0.0077, "step": 229020 }, { "epoch": 1.9339258195942666, "grad_norm": 0.42509740591049194, "learning_rate": 3.320260611170634e-08, "loss": 0.0072, "step": 229030 }, { "epoch": 1.9340102594414303, "grad_norm": 0.11480628699064255, "learning_rate": 3.311788037272057e-08, "loss": 0.0086, "step": 229040 }, { "epoch": 1.9340946992885941, "grad_norm": 0.37812361121177673, "learning_rate": 3.303326251339667e-08, "loss": 0.0055, "step": 229050 }, { "epoch": 1.9341791391357581, "grad_norm": 0.10042105615139008, "learning_rate": 3.294875253557261e-08, "loss": 0.0036, "step": 229060 }, { "epoch": 1.9342635789829221, "grad_norm": 0.1718887984752655, "learning_rate": 3.28643504410836e-08, "loss": 0.0039, "step": 229070 }, { "epoch": 1.934348018830086, "grad_norm": 0.2246813327074051, "learning_rate": 3.278005623176317e-08, "loss": 0.0056, "step": 229080 }, { "epoch": 1.9344324586772497, "grad_norm": 0.32597148418426514, "learning_rate": 3.269586990944207e-08, "loss": 0.0046, "step": 229090 }, { "epoch": 1.9345168985244137, "grad_norm": 0.06427199393510818, "learning_rate": 3.261179147594884e-08, "loss": 0.0045, "step": 229100 }, { "epoch": 1.9346013383715777, "grad_norm": 1.865478277206421, "learning_rate": 3.2527820933109243e-08, "loss": 0.0113, "step": 229110 }, { "epoch": 1.9346857782187414, "grad_norm": 0.19382601976394653, "learning_rate": 3.244395828274793e-08, "loss": 0.0071, "step": 229120 }, { "epoch": 1.9347702180659052, "grad_norm": 0.25489944219589233, "learning_rate": 3.236020352668512e-08, "loss": 0.0055, "step": 229130 }, { "epoch": 1.9348546579130692, "grad_norm": 0.2821994423866272, "learning_rate": 3.2276556666741563e-08, "loss": 0.0044, "step": 229140 }, { "epoch": 1.9349390977602332, "grad_norm": 0.41883760690689087, "learning_rate": 3.2193017704733045e-08, "loss": 0.0078, "step": 229150 }, { "epoch": 1.935023537607397, "grad_norm": 0.3718715310096741, "learning_rate": 3.2109586642474214e-08, "loss": 0.0036, "step": 229160 }, { "epoch": 1.9351079774545608, "grad_norm": 0.4432957172393799, "learning_rate": 3.20262634817764e-08, "loss": 0.0036, "step": 229170 }, { "epoch": 1.9351924173017245, "grad_norm": 0.1739301085472107, "learning_rate": 3.194304822445093e-08, "loss": 0.0069, "step": 229180 }, { "epoch": 1.9352768571488885, "grad_norm": 0.3689371943473816, "learning_rate": 3.185994087230415e-08, "loss": 0.0116, "step": 229190 }, { "epoch": 1.9353612969960525, "grad_norm": 0.21709121763706207, "learning_rate": 3.177694142714127e-08, "loss": 0.004, "step": 229200 }, { "epoch": 1.9354457368432163, "grad_norm": 0.41835451126098633, "learning_rate": 3.169404989076475e-08, "loss": 0.0062, "step": 229210 }, { "epoch": 1.93553017669038, "grad_norm": 0.40696829557418823, "learning_rate": 3.1611266264975904e-08, "loss": 0.0071, "step": 229220 }, { "epoch": 1.935614616537544, "grad_norm": 0.0005224660853855312, "learning_rate": 3.1528590551572204e-08, "loss": 0.0042, "step": 229230 }, { "epoch": 1.935699056384708, "grad_norm": 0.3448278605937958, "learning_rate": 3.1446022752349424e-08, "loss": 0.0098, "step": 229240 }, { "epoch": 1.9357834962318718, "grad_norm": 0.1730348765850067, "learning_rate": 3.136356286910114e-08, "loss": 0.0151, "step": 229250 }, { "epoch": 1.9358679360790356, "grad_norm": 0.8031905889511108, "learning_rate": 3.1281210903617575e-08, "loss": 0.0086, "step": 229260 }, { "epoch": 1.9359523759261996, "grad_norm": 0.10883601009845734, "learning_rate": 3.119896685768842e-08, "loss": 0.0024, "step": 229270 }, { "epoch": 1.9360368157733634, "grad_norm": 0.16486819088459015, "learning_rate": 3.11168307330989e-08, "loss": 0.0082, "step": 229280 }, { "epoch": 1.9361212556205274, "grad_norm": 0.15705116093158722, "learning_rate": 3.1034802531633714e-08, "loss": 0.0056, "step": 229290 }, { "epoch": 1.9362056954676912, "grad_norm": 0.41988468170166016, "learning_rate": 3.0952882255074755e-08, "loss": 0.0053, "step": 229300 }, { "epoch": 1.936290135314855, "grad_norm": 0.43068453669548035, "learning_rate": 3.087106990520061e-08, "loss": 0.006, "step": 229310 }, { "epoch": 1.936374575162019, "grad_norm": 0.11156950891017914, "learning_rate": 3.078936548378875e-08, "loss": 0.0051, "step": 229320 }, { "epoch": 1.936459015009183, "grad_norm": 0.2400888204574585, "learning_rate": 3.07077689926133e-08, "loss": 0.006, "step": 229330 }, { "epoch": 1.9365434548563467, "grad_norm": 0.2166496068239212, "learning_rate": 3.062628043344729e-08, "loss": 0.0044, "step": 229340 }, { "epoch": 1.9366278947035105, "grad_norm": 0.4379815459251404, "learning_rate": 3.054489980805986e-08, "loss": 0.0053, "step": 229350 }, { "epoch": 1.9367123345506745, "grad_norm": 0.6645283699035645, "learning_rate": 3.046362711821904e-08, "loss": 0.0053, "step": 229360 }, { "epoch": 1.9367967743978385, "grad_norm": 0.18075355887413025, "learning_rate": 3.0382462365689536e-08, "loss": 0.0048, "step": 229370 }, { "epoch": 1.9368812142450023, "grad_norm": 0.2058963179588318, "learning_rate": 3.030140555223493e-08, "loss": 0.0079, "step": 229380 }, { "epoch": 1.936965654092166, "grad_norm": 0.13089901208877563, "learning_rate": 3.022045667961493e-08, "loss": 0.0065, "step": 229390 }, { "epoch": 1.9370500939393298, "grad_norm": 0.25654736161231995, "learning_rate": 3.013961574958868e-08, "loss": 0.005, "step": 229400 }, { "epoch": 1.9371345337864938, "grad_norm": 0.0022821431048214436, "learning_rate": 3.005888276391145e-08, "loss": 0.0038, "step": 229410 }, { "epoch": 1.9372189736336578, "grad_norm": 0.1542757898569107, "learning_rate": 2.997825772433682e-08, "loss": 0.0039, "step": 229420 }, { "epoch": 1.9373034134808216, "grad_norm": 0.5874695181846619, "learning_rate": 2.989774063261563e-08, "loss": 0.01, "step": 229430 }, { "epoch": 1.9373878533279854, "grad_norm": 0.2219228744506836, "learning_rate": 2.9817331490497594e-08, "loss": 0.0082, "step": 229440 }, { "epoch": 1.9374722931751494, "grad_norm": 0.39431244134902954, "learning_rate": 2.9737030299727965e-08, "loss": 0.0059, "step": 229450 }, { "epoch": 1.9375567330223133, "grad_norm": 0.06342224031686783, "learning_rate": 2.9656837062052025e-08, "loss": 0.0111, "step": 229460 }, { "epoch": 1.9376411728694771, "grad_norm": 0.6015236377716064, "learning_rate": 2.9576751779211156e-08, "loss": 0.0059, "step": 229470 }, { "epoch": 1.937725612716641, "grad_norm": 0.23541259765625, "learning_rate": 2.9496774452944523e-08, "loss": 0.0074, "step": 229480 }, { "epoch": 1.937810052563805, "grad_norm": 0.06944650411605835, "learning_rate": 2.9416905084989622e-08, "loss": 0.0014, "step": 229490 }, { "epoch": 1.937894492410969, "grad_norm": 0.19961850345134735, "learning_rate": 2.9337143677080626e-08, "loss": 0.0066, "step": 229500 }, { "epoch": 1.9379789322581327, "grad_norm": 0.38297024369239807, "learning_rate": 2.9257490230950593e-08, "loss": 0.0074, "step": 229510 }, { "epoch": 1.9380633721052964, "grad_norm": 0.25892576575279236, "learning_rate": 2.917794474832869e-08, "loss": 0.0068, "step": 229520 }, { "epoch": 1.9381478119524602, "grad_norm": 0.21247276663780212, "learning_rate": 2.9098507230944095e-08, "loss": 0.004, "step": 229530 }, { "epoch": 1.9382322517996242, "grad_norm": 0.2458389848470688, "learning_rate": 2.9019177680520428e-08, "loss": 0.0052, "step": 229540 }, { "epoch": 1.9383166916467882, "grad_norm": 0.08078460395336151, "learning_rate": 2.8939956098782417e-08, "loss": 0.0046, "step": 229550 }, { "epoch": 1.938401131493952, "grad_norm": 0.33081644773483276, "learning_rate": 2.886084248744925e-08, "loss": 0.0063, "step": 229560 }, { "epoch": 1.9384855713411158, "grad_norm": 0.38954150676727295, "learning_rate": 2.8781836848240098e-08, "loss": 0.0036, "step": 229570 }, { "epoch": 1.9385700111882798, "grad_norm": 0.08962254971265793, "learning_rate": 2.8702939182870816e-08, "loss": 0.0032, "step": 229580 }, { "epoch": 1.9386544510354438, "grad_norm": 0.426786869764328, "learning_rate": 2.862414949305503e-08, "loss": 0.0052, "step": 229590 }, { "epoch": 1.9387388908826075, "grad_norm": 0.061889566481113434, "learning_rate": 2.85454677805036e-08, "loss": 0.007, "step": 229600 }, { "epoch": 1.9388233307297713, "grad_norm": 0.07510608434677124, "learning_rate": 2.8466894046926263e-08, "loss": 0.006, "step": 229610 }, { "epoch": 1.9389077705769353, "grad_norm": 0.25687703490257263, "learning_rate": 2.838842829402888e-08, "loss": 0.0055, "step": 229620 }, { "epoch": 1.938992210424099, "grad_norm": 0.0936051607131958, "learning_rate": 2.8310070523516197e-08, "loss": 0.0028, "step": 229630 }, { "epoch": 1.939076650271263, "grad_norm": 0.3898617625236511, "learning_rate": 2.8231820737090188e-08, "loss": 0.0057, "step": 229640 }, { "epoch": 1.9391610901184269, "grad_norm": 0.48090028762817383, "learning_rate": 2.8153678936450045e-08, "loss": 0.0046, "step": 229650 }, { "epoch": 1.9392455299655906, "grad_norm": 0.34026050567626953, "learning_rate": 2.8075645123293304e-08, "loss": 0.0044, "step": 229660 }, { "epoch": 1.9393299698127546, "grad_norm": 0.428104043006897, "learning_rate": 2.799771929931472e-08, "loss": 0.0083, "step": 229670 }, { "epoch": 1.9394144096599186, "grad_norm": 0.3505789637565613, "learning_rate": 2.791990146620682e-08, "loss": 0.0085, "step": 229680 }, { "epoch": 1.9394988495070824, "grad_norm": 0.06800241768360138, "learning_rate": 2.7842191625659375e-08, "loss": 0.0032, "step": 229690 }, { "epoch": 1.9395832893542462, "grad_norm": 0.09326961636543274, "learning_rate": 2.776458977936103e-08, "loss": 0.0061, "step": 229700 }, { "epoch": 1.9396677292014102, "grad_norm": 0.15749233961105347, "learning_rate": 2.7687095928996543e-08, "loss": 0.0051, "step": 229710 }, { "epoch": 1.9397521690485742, "grad_norm": 0.39292049407958984, "learning_rate": 2.7609710076249576e-08, "loss": 0.0075, "step": 229720 }, { "epoch": 1.939836608895738, "grad_norm": 0.05204445868730545, "learning_rate": 2.7532432222801e-08, "loss": 0.0051, "step": 229730 }, { "epoch": 1.9399210487429017, "grad_norm": 0.5828356742858887, "learning_rate": 2.7455262370328917e-08, "loss": 0.0127, "step": 229740 }, { "epoch": 1.9400054885900655, "grad_norm": 0.11618295311927795, "learning_rate": 2.737820052050977e-08, "loss": 0.004, "step": 229750 }, { "epoch": 1.9400899284372295, "grad_norm": 0.14137080311775208, "learning_rate": 2.7301246675016656e-08, "loss": 0.0039, "step": 229760 }, { "epoch": 1.9401743682843935, "grad_norm": 0.33527666330337524, "learning_rate": 2.7224400835521582e-08, "loss": 0.0061, "step": 229770 }, { "epoch": 1.9402588081315573, "grad_norm": 0.15353156626224518, "learning_rate": 2.714766300369376e-08, "loss": 0.0046, "step": 229780 }, { "epoch": 1.940343247978721, "grad_norm": 0.1571049839258194, "learning_rate": 2.7071033181199636e-08, "loss": 0.0105, "step": 229790 }, { "epoch": 1.940427687825885, "grad_norm": 0.23932045698165894, "learning_rate": 2.6994511369703436e-08, "loss": 0.0054, "step": 229800 }, { "epoch": 1.940512127673049, "grad_norm": 0.2324424833059311, "learning_rate": 2.6918097570867163e-08, "loss": 0.0068, "step": 229810 }, { "epoch": 1.9405965675202128, "grad_norm": 0.5322535037994385, "learning_rate": 2.6841791786351155e-08, "loss": 0.0045, "step": 229820 }, { "epoch": 1.9406810073673766, "grad_norm": 0.05755016207695007, "learning_rate": 2.6765594017812424e-08, "loss": 0.0033, "step": 229830 }, { "epoch": 1.9407654472145406, "grad_norm": 0.22283829748630524, "learning_rate": 2.6689504266905753e-08, "loss": 0.0052, "step": 229840 }, { "epoch": 1.9408498870617046, "grad_norm": 0.247670978307724, "learning_rate": 2.6613522535283155e-08, "loss": 0.0091, "step": 229850 }, { "epoch": 1.9409343269088684, "grad_norm": 0.039418548345565796, "learning_rate": 2.653764882459664e-08, "loss": 0.0038, "step": 229860 }, { "epoch": 1.9410187667560321, "grad_norm": 0.29976266622543335, "learning_rate": 2.646188313649267e-08, "loss": 0.0058, "step": 229870 }, { "epoch": 1.941103206603196, "grad_norm": 0.43434542417526245, "learning_rate": 2.638622547261771e-08, "loss": 0.0053, "step": 229880 }, { "epoch": 1.94118764645036, "grad_norm": 0.024284932762384415, "learning_rate": 2.6310675834614884e-08, "loss": 0.0052, "step": 229890 }, { "epoch": 1.941272086297524, "grad_norm": 0.07289663702249527, "learning_rate": 2.6235234224125105e-08, "loss": 0.0045, "step": 229900 }, { "epoch": 1.9413565261446877, "grad_norm": 0.29055914282798767, "learning_rate": 2.6159900642786508e-08, "loss": 0.0052, "step": 229910 }, { "epoch": 1.9414409659918515, "grad_norm": 0.3394607901573181, "learning_rate": 2.6084675092235556e-08, "loss": 0.0058, "step": 229920 }, { "epoch": 1.9415254058390155, "grad_norm": 0.010504670441150665, "learning_rate": 2.6009557574106504e-08, "loss": 0.0052, "step": 229930 }, { "epoch": 1.9416098456861794, "grad_norm": 0.6331208944320679, "learning_rate": 2.5934548090030264e-08, "loss": 0.005, "step": 229940 }, { "epoch": 1.9416942855333432, "grad_norm": 0.2764308452606201, "learning_rate": 2.585964664163665e-08, "loss": 0.0088, "step": 229950 }, { "epoch": 1.941778725380507, "grad_norm": 0.19937506318092346, "learning_rate": 2.5784853230551577e-08, "loss": 0.0051, "step": 229960 }, { "epoch": 1.9418631652276708, "grad_norm": 0.2127031832933426, "learning_rate": 2.571016785840097e-08, "loss": 0.0041, "step": 229970 }, { "epoch": 1.9419476050748348, "grad_norm": 0.18956223130226135, "learning_rate": 2.5635590526805753e-08, "loss": 0.0095, "step": 229980 }, { "epoch": 1.9420320449219988, "grad_norm": 0.2540558874607086, "learning_rate": 2.55611212373863e-08, "loss": 0.0066, "step": 229990 }, { "epoch": 1.9421164847691625, "grad_norm": 0.3291713297367096, "learning_rate": 2.5486759991759648e-08, "loss": 0.0087, "step": 230000 }, { "epoch": 1.9422009246163263, "grad_norm": 0.16922332346439362, "learning_rate": 2.5412506791541723e-08, "loss": 0.0045, "step": 230010 }, { "epoch": 1.9422853644634903, "grad_norm": 0.001320998533628881, "learning_rate": 2.5338361638344577e-08, "loss": 0.0065, "step": 230020 }, { "epoch": 1.9423698043106543, "grad_norm": 0.25354525446891785, "learning_rate": 2.5264324533778585e-08, "loss": 0.0074, "step": 230030 }, { "epoch": 1.942454244157818, "grad_norm": 0.24475239217281342, "learning_rate": 2.5190395479451902e-08, "loss": 0.0037, "step": 230040 }, { "epoch": 1.9425386840049819, "grad_norm": 0.7114006876945496, "learning_rate": 2.511657447697102e-08, "loss": 0.0076, "step": 230050 }, { "epoch": 1.9426231238521459, "grad_norm": 0.15234190225601196, "learning_rate": 2.5042861527938556e-08, "loss": 0.004, "step": 230060 }, { "epoch": 1.9427075636993099, "grad_norm": 0.11318261921405792, "learning_rate": 2.4969256633955442e-08, "loss": 0.0038, "step": 230070 }, { "epoch": 1.9427920035464736, "grad_norm": 0.142267107963562, "learning_rate": 2.4895759796620957e-08, "loss": 0.012, "step": 230080 }, { "epoch": 1.9428764433936374, "grad_norm": 0.16514474153518677, "learning_rate": 2.482237101753049e-08, "loss": 0.0054, "step": 230090 }, { "epoch": 1.9429608832408012, "grad_norm": 0.1793544888496399, "learning_rate": 2.4749090298278878e-08, "loss": 0.0042, "step": 230100 }, { "epoch": 1.9430453230879652, "grad_norm": 0.22165492177009583, "learning_rate": 2.467591764045707e-08, "loss": 0.0097, "step": 230110 }, { "epoch": 1.9431297629351292, "grad_norm": 0.02508336491882801, "learning_rate": 2.460285304565546e-08, "loss": 0.0053, "step": 230120 }, { "epoch": 1.943214202782293, "grad_norm": 0.06657442450523376, "learning_rate": 2.4529896515460006e-08, "loss": 0.0068, "step": 230130 }, { "epoch": 1.9432986426294567, "grad_norm": 0.4571838676929474, "learning_rate": 2.44570480514561e-08, "loss": 0.0077, "step": 230140 }, { "epoch": 1.9433830824766207, "grad_norm": 0.532918393611908, "learning_rate": 2.4384307655224703e-08, "loss": 0.0084, "step": 230150 }, { "epoch": 1.9434675223237847, "grad_norm": 0.0003589109983295202, "learning_rate": 2.4311675328347328e-08, "loss": 0.002, "step": 230160 }, { "epoch": 1.9435519621709485, "grad_norm": 0.16203679144382477, "learning_rate": 2.423915107240049e-08, "loss": 0.003, "step": 230170 }, { "epoch": 1.9436364020181123, "grad_norm": 0.1832035332918167, "learning_rate": 2.4166734888959042e-08, "loss": 0.0053, "step": 230180 }, { "epoch": 1.9437208418652763, "grad_norm": 0.07017071545124054, "learning_rate": 2.409442677959728e-08, "loss": 0.005, "step": 230190 }, { "epoch": 1.94380528171244, "grad_norm": 0.6261845231056213, "learning_rate": 2.4022226745884502e-08, "loss": 0.017, "step": 230200 }, { "epoch": 1.943889721559604, "grad_norm": 0.6439775824546814, "learning_rate": 2.3950134789389456e-08, "loss": 0.0071, "step": 230210 }, { "epoch": 1.9439741614067678, "grad_norm": 0.47295424342155457, "learning_rate": 2.3878150911677554e-08, "loss": 0.0082, "step": 230220 }, { "epoch": 1.9440586012539316, "grad_norm": 0.30478063225746155, "learning_rate": 2.3806275114313105e-08, "loss": 0.0032, "step": 230230 }, { "epoch": 1.9441430411010956, "grad_norm": 0.06444216519594193, "learning_rate": 2.3734507398855965e-08, "loss": 0.0034, "step": 230240 }, { "epoch": 1.9442274809482596, "grad_norm": 0.6681651473045349, "learning_rate": 2.3662847766866003e-08, "loss": 0.0088, "step": 230250 }, { "epoch": 1.9443119207954234, "grad_norm": 0.10467845946550369, "learning_rate": 2.359129621989864e-08, "loss": 0.0061, "step": 230260 }, { "epoch": 1.9443963606425871, "grad_norm": 0.12608695030212402, "learning_rate": 2.3519852759509298e-08, "loss": 0.0112, "step": 230270 }, { "epoch": 1.9444808004897511, "grad_norm": 0.04486241936683655, "learning_rate": 2.3448517387248404e-08, "loss": 0.0027, "step": 230280 }, { "epoch": 1.9445652403369151, "grad_norm": 0.3559037744998932, "learning_rate": 2.3377290104666384e-08, "loss": 0.0058, "step": 230290 }, { "epoch": 1.944649680184079, "grad_norm": 0.23601901531219482, "learning_rate": 2.3306170913309776e-08, "loss": 0.0071, "step": 230300 }, { "epoch": 1.9447341200312427, "grad_norm": 0.008464951068162918, "learning_rate": 2.323515981472346e-08, "loss": 0.0063, "step": 230310 }, { "epoch": 1.9448185598784065, "grad_norm": 0.12913256883621216, "learning_rate": 2.3164256810448983e-08, "loss": 0.0054, "step": 230320 }, { "epoch": 1.9449029997255705, "grad_norm": 0.0665685161948204, "learning_rate": 2.3093461902027324e-08, "loss": 0.0087, "step": 230330 }, { "epoch": 1.9449874395727345, "grad_norm": 0.44946223497390747, "learning_rate": 2.3022775090996152e-08, "loss": 0.0034, "step": 230340 }, { "epoch": 1.9450718794198982, "grad_norm": 0.09277037531137466, "learning_rate": 2.295219637888979e-08, "loss": 0.006, "step": 230350 }, { "epoch": 1.945156319267062, "grad_norm": 0.085604228079319, "learning_rate": 2.2881725767242015e-08, "loss": 0.0038, "step": 230360 }, { "epoch": 1.945240759114226, "grad_norm": 0.06016174703836441, "learning_rate": 2.281136325758382e-08, "loss": 0.0068, "step": 230370 }, { "epoch": 1.94532519896139, "grad_norm": 0.046687714755535126, "learning_rate": 2.2741108851442316e-08, "loss": 0.0025, "step": 230380 }, { "epoch": 1.9454096388085538, "grad_norm": 0.3709561824798584, "learning_rate": 2.2670962550344066e-08, "loss": 0.0043, "step": 230390 }, { "epoch": 1.9454940786557176, "grad_norm": 0.6089891791343689, "learning_rate": 2.2600924355812846e-08, "loss": 0.0031, "step": 230400 }, { "epoch": 1.9455785185028815, "grad_norm": 0.13084854185581207, "learning_rate": 2.253099426936911e-08, "loss": 0.0101, "step": 230410 }, { "epoch": 1.9456629583500455, "grad_norm": 0.35592636466026306, "learning_rate": 2.246117229253275e-08, "loss": 0.0068, "step": 230420 }, { "epoch": 1.9457473981972093, "grad_norm": 0.9320744276046753, "learning_rate": 2.2391458426819223e-08, "loss": 0.01, "step": 230430 }, { "epoch": 1.945831838044373, "grad_norm": 0.18377582728862762, "learning_rate": 2.232185267374343e-08, "loss": 0.0044, "step": 230440 }, { "epoch": 1.9459162778915369, "grad_norm": 0.26891010999679565, "learning_rate": 2.225235503481693e-08, "loss": 0.0021, "step": 230450 }, { "epoch": 1.9460007177387009, "grad_norm": 0.10367031395435333, "learning_rate": 2.2182965511549636e-08, "loss": 0.0027, "step": 230460 }, { "epoch": 1.9460851575858649, "grad_norm": 0.39564406871795654, "learning_rate": 2.2113684105447562e-08, "loss": 0.0072, "step": 230470 }, { "epoch": 1.9461695974330286, "grad_norm": 0.4278254806995392, "learning_rate": 2.2044510818016728e-08, "loss": 0.0067, "step": 230480 }, { "epoch": 1.9462540372801924, "grad_norm": 0.26151153445243835, "learning_rate": 2.197544565075871e-08, "loss": 0.0037, "step": 230490 }, { "epoch": 1.9463384771273564, "grad_norm": 0.08259815722703934, "learning_rate": 2.190648860517397e-08, "loss": 0.0055, "step": 230500 }, { "epoch": 1.9464229169745204, "grad_norm": 0.41902443766593933, "learning_rate": 2.1837639682760202e-08, "loss": 0.0063, "step": 230510 }, { "epoch": 1.9465073568216842, "grad_norm": 0.257500559091568, "learning_rate": 2.1768898885013433e-08, "loss": 0.0061, "step": 230520 }, { "epoch": 1.946591796668848, "grad_norm": 0.16395358741283417, "learning_rate": 2.170026621342525e-08, "loss": 0.0037, "step": 230530 }, { "epoch": 1.946676236516012, "grad_norm": 0.26513364911079407, "learning_rate": 2.1631741669487226e-08, "loss": 0.0053, "step": 230540 }, { "epoch": 1.9467606763631757, "grad_norm": 0.27160730957984924, "learning_rate": 2.1563325254688183e-08, "loss": 0.0076, "step": 230550 }, { "epoch": 1.9468451162103397, "grad_norm": 0.4048299789428711, "learning_rate": 2.149501697051304e-08, "loss": 0.0091, "step": 230560 }, { "epoch": 1.9469295560575035, "grad_norm": 0.2688472270965576, "learning_rate": 2.1426816818445607e-08, "loss": 0.0044, "step": 230570 }, { "epoch": 1.9470139959046673, "grad_norm": 0.14523828029632568, "learning_rate": 2.1358724799968032e-08, "loss": 0.0071, "step": 230580 }, { "epoch": 1.9470984357518313, "grad_norm": 0.01633073203265667, "learning_rate": 2.1290740916558585e-08, "loss": 0.0058, "step": 230590 }, { "epoch": 1.9471828755989953, "grad_norm": 0.4452528953552246, "learning_rate": 2.122286516969385e-08, "loss": 0.0052, "step": 230600 }, { "epoch": 1.947267315446159, "grad_norm": 0.32250267267227173, "learning_rate": 2.115509756084877e-08, "loss": 0.0044, "step": 230610 }, { "epoch": 1.9473517552933228, "grad_norm": 0.0006194605375640094, "learning_rate": 2.1087438091494384e-08, "loss": 0.0019, "step": 230620 }, { "epoch": 1.9474361951404868, "grad_norm": 0.18199777603149414, "learning_rate": 2.1019886763100627e-08, "loss": 0.006, "step": 230630 }, { "epoch": 1.9475206349876508, "grad_norm": 0.5086373090744019, "learning_rate": 2.0952443577134108e-08, "loss": 0.0048, "step": 230640 }, { "epoch": 1.9476050748348146, "grad_norm": 0.18170589208602905, "learning_rate": 2.0885108535060872e-08, "loss": 0.0044, "step": 230650 }, { "epoch": 1.9476895146819784, "grad_norm": 0.01771179400384426, "learning_rate": 2.0817881638342528e-08, "loss": 0.0058, "step": 230660 }, { "epoch": 1.9477739545291421, "grad_norm": 0.16829252243041992, "learning_rate": 2.075076288843958e-08, "loss": 0.0036, "step": 230670 }, { "epoch": 1.9478583943763061, "grad_norm": 0.3040715754032135, "learning_rate": 2.068375228680919e-08, "loss": 0.0067, "step": 230680 }, { "epoch": 1.9479428342234701, "grad_norm": 0.2103920876979828, "learning_rate": 2.0616849834907972e-08, "loss": 0.0032, "step": 230690 }, { "epoch": 1.948027274070634, "grad_norm": 0.5937595963478088, "learning_rate": 2.055005553418865e-08, "loss": 0.0067, "step": 230700 }, { "epoch": 1.9481117139177977, "grad_norm": 0.234156534075737, "learning_rate": 2.0483369386101183e-08, "loss": 0.0062, "step": 230710 }, { "epoch": 1.9481961537649617, "grad_norm": 0.43852663040161133, "learning_rate": 2.0416791392094403e-08, "loss": 0.0089, "step": 230720 }, { "epoch": 1.9482805936121257, "grad_norm": 0.2197490632534027, "learning_rate": 2.0350321553614937e-08, "loss": 0.0039, "step": 230730 }, { "epoch": 1.9483650334592895, "grad_norm": 0.35404956340789795, "learning_rate": 2.0283959872105518e-08, "loss": 0.0039, "step": 230740 }, { "epoch": 1.9484494733064532, "grad_norm": 0.6102731823921204, "learning_rate": 2.0217706349008327e-08, "loss": 0.0087, "step": 230750 }, { "epoch": 1.9485339131536172, "grad_norm": 0.06284836679697037, "learning_rate": 2.015156098576221e-08, "loss": 0.0037, "step": 230760 }, { "epoch": 1.9486183530007812, "grad_norm": 0.0029758168384432793, "learning_rate": 2.0085523783803796e-08, "loss": 0.0042, "step": 230770 }, { "epoch": 1.948702792847945, "grad_norm": 0.38347357511520386, "learning_rate": 2.0019594744567494e-08, "loss": 0.0048, "step": 230780 }, { "epoch": 1.9487872326951088, "grad_norm": 0.3869137763977051, "learning_rate": 1.9953773869484384e-08, "loss": 0.0038, "step": 230790 }, { "epoch": 1.9488716725422726, "grad_norm": 0.28485018014907837, "learning_rate": 1.988806115998554e-08, "loss": 0.0056, "step": 230800 }, { "epoch": 1.9489561123894366, "grad_norm": 0.3565640449523926, "learning_rate": 1.9822456617497044e-08, "loss": 0.0084, "step": 230810 }, { "epoch": 1.9490405522366006, "grad_norm": 0.20853202044963837, "learning_rate": 1.9756960243444422e-08, "loss": 0.0056, "step": 230820 }, { "epoch": 1.9491249920837643, "grad_norm": 0.0832902044057846, "learning_rate": 1.9691572039249873e-08, "loss": 0.0054, "step": 230830 }, { "epoch": 1.949209431930928, "grad_norm": 0.08282481878995895, "learning_rate": 1.9626292006333924e-08, "loss": 0.0038, "step": 230840 }, { "epoch": 1.949293871778092, "grad_norm": 0.21640393137931824, "learning_rate": 1.9561120146114332e-08, "loss": 0.009, "step": 230850 }, { "epoch": 1.949378311625256, "grad_norm": 0.00575300445780158, "learning_rate": 1.949605646000663e-08, "loss": 0.0095, "step": 230860 }, { "epoch": 1.9494627514724199, "grad_norm": 0.38933488726615906, "learning_rate": 1.9431100949424132e-08, "loss": 0.0066, "step": 230870 }, { "epoch": 1.9495471913195836, "grad_norm": 0.17555907368659973, "learning_rate": 1.9366253615777376e-08, "loss": 0.0064, "step": 230880 }, { "epoch": 1.9496316311667474, "grad_norm": 0.1617339551448822, "learning_rate": 1.9301514460475235e-08, "loss": 0.0064, "step": 230890 }, { "epoch": 1.9497160710139114, "grad_norm": 0.3438304662704468, "learning_rate": 1.9236883484923253e-08, "loss": 0.0064, "step": 230900 }, { "epoch": 1.9498005108610754, "grad_norm": 0.07961217314004898, "learning_rate": 1.9172360690525303e-08, "loss": 0.0049, "step": 230910 }, { "epoch": 1.9498849507082392, "grad_norm": 0.36098387837409973, "learning_rate": 1.9107946078683604e-08, "loss": 0.0099, "step": 230920 }, { "epoch": 1.949969390555403, "grad_norm": 0.30871036648750305, "learning_rate": 1.904363965079592e-08, "loss": 0.0055, "step": 230930 }, { "epoch": 1.950053830402567, "grad_norm": 0.41442131996154785, "learning_rate": 1.8979441408260023e-08, "loss": 0.0098, "step": 230940 }, { "epoch": 1.950138270249731, "grad_norm": 0.5893517732620239, "learning_rate": 1.8915351352470358e-08, "loss": 0.0078, "step": 230950 }, { "epoch": 1.9502227100968947, "grad_norm": 0.5555644035339355, "learning_rate": 1.8851369484818028e-08, "loss": 0.005, "step": 230960 }, { "epoch": 1.9503071499440585, "grad_norm": 0.6254075169563293, "learning_rate": 1.878749580669359e-08, "loss": 0.006, "step": 230970 }, { "epoch": 1.9503915897912225, "grad_norm": 0.31477680802345276, "learning_rate": 1.8723730319483714e-08, "loss": 0.0055, "step": 230980 }, { "epoch": 1.9504760296383865, "grad_norm": 0.03310715779662132, "learning_rate": 1.8660073024573398e-08, "loss": 0.008, "step": 230990 }, { "epoch": 1.9505604694855503, "grad_norm": 0.24319450557231903, "learning_rate": 1.8596523923345988e-08, "loss": 0.0043, "step": 231000 }, { "epoch": 1.950644909332714, "grad_norm": 0.2879771292209625, "learning_rate": 1.853308301718093e-08, "loss": 0.0054, "step": 231010 }, { "epoch": 1.9507293491798778, "grad_norm": 0.16160105168819427, "learning_rate": 1.8469750307457123e-08, "loss": 0.0082, "step": 231020 }, { "epoch": 1.9508137890270418, "grad_norm": 0.7067880034446716, "learning_rate": 1.8406525795549026e-08, "loss": 0.0079, "step": 231030 }, { "epoch": 1.9508982288742058, "grad_norm": 0.48186200857162476, "learning_rate": 1.834340948283053e-08, "loss": 0.0058, "step": 231040 }, { "epoch": 1.9509826687213696, "grad_norm": 0.02952001243829727, "learning_rate": 1.828040137067222e-08, "loss": 0.0057, "step": 231050 }, { "epoch": 1.9510671085685334, "grad_norm": 0.06534858047962189, "learning_rate": 1.821750146044299e-08, "loss": 0.0083, "step": 231060 }, { "epoch": 1.9511515484156974, "grad_norm": 0.024321505799889565, "learning_rate": 1.8154709753508416e-08, "loss": 0.0069, "step": 231070 }, { "epoch": 1.9512359882628614, "grad_norm": 0.019993286579847336, "learning_rate": 1.8092026251232962e-08, "loss": 0.0032, "step": 231080 }, { "epoch": 1.9513204281100252, "grad_norm": 0.28698083758354187, "learning_rate": 1.802945095497721e-08, "loss": 0.0061, "step": 231090 }, { "epoch": 1.951404867957189, "grad_norm": 0.1178571954369545, "learning_rate": 1.796698386610174e-08, "loss": 0.0029, "step": 231100 }, { "epoch": 1.951489307804353, "grad_norm": 0.231037899851799, "learning_rate": 1.7904624985962126e-08, "loss": 0.0039, "step": 231110 }, { "epoch": 1.9515737476515167, "grad_norm": 0.4025717079639435, "learning_rate": 1.7842374315912848e-08, "loss": 0.0058, "step": 231120 }, { "epoch": 1.9516581874986807, "grad_norm": 0.1091422364115715, "learning_rate": 1.7780231857306708e-08, "loss": 0.0055, "step": 231130 }, { "epoch": 1.9517426273458445, "grad_norm": 0.08317146450281143, "learning_rate": 1.7718197611492626e-08, "loss": 0.0086, "step": 231140 }, { "epoch": 1.9518270671930082, "grad_norm": 0.06501618772745132, "learning_rate": 1.7656271579818418e-08, "loss": 0.008, "step": 231150 }, { "epoch": 1.9519115070401722, "grad_norm": 0.12186203896999359, "learning_rate": 1.7594453763629114e-08, "loss": 0.0035, "step": 231160 }, { "epoch": 1.9519959468873362, "grad_norm": 0.40896326303482056, "learning_rate": 1.753274416426698e-08, "loss": 0.0073, "step": 231170 }, { "epoch": 1.9520803867345, "grad_norm": 0.4655453860759735, "learning_rate": 1.7471142783072603e-08, "loss": 0.0032, "step": 231180 }, { "epoch": 1.9521648265816638, "grad_norm": 0.18042705953121185, "learning_rate": 1.7409649621384362e-08, "loss": 0.0061, "step": 231190 }, { "epoch": 1.9522492664288278, "grad_norm": 0.2515716552734375, "learning_rate": 1.7348264680537298e-08, "loss": 0.0142, "step": 231200 }, { "epoch": 1.9523337062759918, "grad_norm": 0.13032852113246918, "learning_rate": 1.7286987961865343e-08, "loss": 0.0069, "step": 231210 }, { "epoch": 1.9524181461231556, "grad_norm": 0.2610034644603729, "learning_rate": 1.7225819466698544e-08, "loss": 0.0044, "step": 231220 }, { "epoch": 1.9525025859703193, "grad_norm": 0.31081080436706543, "learning_rate": 1.7164759196365843e-08, "loss": 0.0066, "step": 231230 }, { "epoch": 1.9525870258174831, "grad_norm": 0.281040221452713, "learning_rate": 1.7103807152193398e-08, "loss": 0.0052, "step": 231240 }, { "epoch": 1.952671465664647, "grad_norm": 0.6567709445953369, "learning_rate": 1.70429633355057e-08, "loss": 0.0048, "step": 231250 }, { "epoch": 1.952755905511811, "grad_norm": 0.389559805393219, "learning_rate": 1.698222774762337e-08, "loss": 0.0059, "step": 231260 }, { "epoch": 1.9528403453589749, "grad_norm": 0.14652037620544434, "learning_rate": 1.69216003898659e-08, "loss": 0.0025, "step": 231270 }, { "epoch": 1.9529247852061387, "grad_norm": 0.004940378945320845, "learning_rate": 1.6861081263550017e-08, "loss": 0.012, "step": 231280 }, { "epoch": 1.9530092250533027, "grad_norm": 0.0017550633056089282, "learning_rate": 1.6800670369990778e-08, "loss": 0.0033, "step": 231290 }, { "epoch": 1.9530936649004667, "grad_norm": 0.08873233199119568, "learning_rate": 1.674036771049936e-08, "loss": 0.0075, "step": 231300 }, { "epoch": 1.9531781047476304, "grad_norm": 0.04606317728757858, "learning_rate": 1.6680173286385827e-08, "loss": 0.005, "step": 231310 }, { "epoch": 1.9532625445947942, "grad_norm": 0.21713930368423462, "learning_rate": 1.6620087098958016e-08, "loss": 0.0033, "step": 231320 }, { "epoch": 1.9533469844419582, "grad_norm": 0.02393668331205845, "learning_rate": 1.6560109149520998e-08, "loss": 0.0034, "step": 231330 }, { "epoch": 1.9534314242891222, "grad_norm": 0.0037549615371972322, "learning_rate": 1.6500239439377065e-08, "loss": 0.0029, "step": 231340 }, { "epoch": 1.953515864136286, "grad_norm": 0.6080699563026428, "learning_rate": 1.644047796982684e-08, "loss": 0.0048, "step": 231350 }, { "epoch": 1.9536003039834497, "grad_norm": 0.20653758943080902, "learning_rate": 1.638082474216762e-08, "loss": 0.0051, "step": 231360 }, { "epoch": 1.9536847438306135, "grad_norm": 0.21592041850090027, "learning_rate": 1.632127975769615e-08, "loss": 0.0053, "step": 231370 }, { "epoch": 1.9537691836777775, "grad_norm": 0.4627831280231476, "learning_rate": 1.626184301770528e-08, "loss": 0.0041, "step": 231380 }, { "epoch": 1.9538536235249415, "grad_norm": 0.3116547465324402, "learning_rate": 1.6202514523485645e-08, "loss": 0.0072, "step": 231390 }, { "epoch": 1.9539380633721053, "grad_norm": 0.7804892659187317, "learning_rate": 1.6143294276326217e-08, "loss": 0.0089, "step": 231400 }, { "epoch": 1.954022503219269, "grad_norm": 0.14799931645393372, "learning_rate": 1.608418227751318e-08, "loss": 0.0056, "step": 231410 }, { "epoch": 1.954106943066433, "grad_norm": 0.4069328010082245, "learning_rate": 1.6025178528329965e-08, "loss": 0.0085, "step": 231420 }, { "epoch": 1.954191382913597, "grad_norm": 0.17879493534564972, "learning_rate": 1.5966283030059426e-08, "loss": 0.0079, "step": 231430 }, { "epoch": 1.9542758227607608, "grad_norm": 0.13378408551216125, "learning_rate": 1.590749578397943e-08, "loss": 0.0092, "step": 231440 }, { "epoch": 1.9543602626079246, "grad_norm": 0.34600576758384705, "learning_rate": 1.5848816791367295e-08, "loss": 0.0046, "step": 231450 }, { "epoch": 1.9544447024550886, "grad_norm": 0.19792073965072632, "learning_rate": 1.5790246053496993e-08, "loss": 0.007, "step": 231460 }, { "epoch": 1.9545291423022524, "grad_norm": 0.050797488540410995, "learning_rate": 1.5731783571641958e-08, "loss": 0.0064, "step": 231470 }, { "epoch": 1.9546135821494164, "grad_norm": 0.04921366274356842, "learning_rate": 1.5673429347071166e-08, "loss": 0.0061, "step": 231480 }, { "epoch": 1.9546980219965802, "grad_norm": 0.24355290830135345, "learning_rate": 1.561518338105139e-08, "loss": 0.0062, "step": 231490 }, { "epoch": 1.954782461843744, "grad_norm": 0.3524496853351593, "learning_rate": 1.5557045674849393e-08, "loss": 0.0061, "step": 231500 }, { "epoch": 1.954866901690908, "grad_norm": 0.18638311326503754, "learning_rate": 1.5499016229726384e-08, "loss": 0.0057, "step": 231510 }, { "epoch": 1.954951341538072, "grad_norm": 0.13545306026935577, "learning_rate": 1.5441095046943578e-08, "loss": 0.0092, "step": 231520 }, { "epoch": 1.9550357813852357, "grad_norm": 0.025423407554626465, "learning_rate": 1.5383282127758304e-08, "loss": 0.0038, "step": 231530 }, { "epoch": 1.9551202212323995, "grad_norm": 0.3085910379886627, "learning_rate": 1.5325577473427333e-08, "loss": 0.0052, "step": 231540 }, { "epoch": 1.9552046610795635, "grad_norm": 0.02373073436319828, "learning_rate": 1.5267981085202444e-08, "loss": 0.0082, "step": 231550 }, { "epoch": 1.9552891009267275, "grad_norm": 0.36593344807624817, "learning_rate": 1.5210492964336522e-08, "loss": 0.0089, "step": 231560 }, { "epoch": 1.9553735407738912, "grad_norm": 0.5810337066650391, "learning_rate": 1.5153113112076345e-08, "loss": 0.0051, "step": 231570 }, { "epoch": 1.955457980621055, "grad_norm": 0.20626726746559143, "learning_rate": 1.509584152966981e-08, "loss": 0.0027, "step": 231580 }, { "epoch": 1.9555424204682188, "grad_norm": 0.24941593408584595, "learning_rate": 1.5038678218359803e-08, "loss": 0.0039, "step": 231590 }, { "epoch": 1.9556268603153828, "grad_norm": 0.2414253056049347, "learning_rate": 1.4981623179388115e-08, "loss": 0.0039, "step": 231600 }, { "epoch": 1.9557113001625468, "grad_norm": 0.3560504615306854, "learning_rate": 1.492467641399431e-08, "loss": 0.0062, "step": 231610 }, { "epoch": 1.9557957400097106, "grad_norm": 0.5119390487670898, "learning_rate": 1.4867837923414619e-08, "loss": 0.0079, "step": 231620 }, { "epoch": 1.9558801798568743, "grad_norm": 0.4182335138320923, "learning_rate": 1.4811107708884165e-08, "loss": 0.011, "step": 231630 }, { "epoch": 1.9559646197040383, "grad_norm": 0.21273578703403473, "learning_rate": 1.4754485771634741e-08, "loss": 0.0043, "step": 231640 }, { "epoch": 1.9560490595512023, "grad_norm": 0.29772502183914185, "learning_rate": 1.4697972112896475e-08, "loss": 0.0154, "step": 231650 }, { "epoch": 1.9561334993983661, "grad_norm": 0.06289274990558624, "learning_rate": 1.4641566733896717e-08, "loss": 0.0034, "step": 231660 }, { "epoch": 1.95621793924553, "grad_norm": 0.26537472009658813, "learning_rate": 1.45852696358606e-08, "loss": 0.0066, "step": 231670 }, { "epoch": 1.9563023790926939, "grad_norm": 0.2272769659757614, "learning_rate": 1.4529080820010477e-08, "loss": 0.0056, "step": 231680 }, { "epoch": 1.9563868189398577, "grad_norm": 0.08928760886192322, "learning_rate": 1.4473000287567596e-08, "loss": 0.0039, "step": 231690 }, { "epoch": 1.9564712587870217, "grad_norm": 0.07432863861322403, "learning_rate": 1.441702803974876e-08, "loss": 0.0055, "step": 231700 }, { "epoch": 1.9565556986341854, "grad_norm": 0.3360593914985657, "learning_rate": 1.4361164077770773e-08, "loss": 0.008, "step": 231710 }, { "epoch": 1.9566401384813492, "grad_norm": 0.26955974102020264, "learning_rate": 1.430540840284711e-08, "loss": 0.009, "step": 231720 }, { "epoch": 1.9567245783285132, "grad_norm": 0.2516166567802429, "learning_rate": 1.4249761016187913e-08, "loss": 0.0053, "step": 231730 }, { "epoch": 1.9568090181756772, "grad_norm": 0.13501983880996704, "learning_rate": 1.4194221919002216e-08, "loss": 0.0046, "step": 231740 }, { "epoch": 1.956893458022841, "grad_norm": 0.1909206658601761, "learning_rate": 1.4138791112496275e-08, "loss": 0.0051, "step": 231750 }, { "epoch": 1.9569778978700048, "grad_norm": 0.07555918395519257, "learning_rate": 1.4083468597874128e-08, "loss": 0.0075, "step": 231760 }, { "epoch": 1.9570623377171688, "grad_norm": 0.04940568655729294, "learning_rate": 1.402825437633759e-08, "loss": 0.0037, "step": 231770 }, { "epoch": 1.9571467775643328, "grad_norm": 0.08609484136104584, "learning_rate": 1.397314844908515e-08, "loss": 0.0032, "step": 231780 }, { "epoch": 1.9572312174114965, "grad_norm": 0.07640165835618973, "learning_rate": 1.3918150817314736e-08, "loss": 0.0147, "step": 231790 }, { "epoch": 1.9573156572586603, "grad_norm": 0.31754785776138306, "learning_rate": 1.3863261482219836e-08, "loss": 0.0032, "step": 231800 }, { "epoch": 1.957400097105824, "grad_norm": 0.26695743203163147, "learning_rate": 1.3808480444993389e-08, "loss": 0.0023, "step": 231810 }, { "epoch": 1.957484536952988, "grad_norm": 0.2301555722951889, "learning_rate": 1.3753807706824995e-08, "loss": 0.0091, "step": 231820 }, { "epoch": 1.957568976800152, "grad_norm": 0.1372896283864975, "learning_rate": 1.3699243268902041e-08, "loss": 0.0066, "step": 231830 }, { "epoch": 1.9576534166473158, "grad_norm": 0.13467954099178314, "learning_rate": 1.3644787132409687e-08, "loss": 0.0022, "step": 231840 }, { "epoch": 1.9577378564944796, "grad_norm": 0.002395255258306861, "learning_rate": 1.3590439298531432e-08, "loss": 0.0038, "step": 231850 }, { "epoch": 1.9578222963416436, "grad_norm": 0.4603766202926636, "learning_rate": 1.3536199768446334e-08, "loss": 0.0053, "step": 231860 }, { "epoch": 1.9579067361888076, "grad_norm": 0.06955188512802124, "learning_rate": 1.348206854333345e-08, "loss": 0.0042, "step": 231870 }, { "epoch": 1.9579911760359714, "grad_norm": 0.18245601654052734, "learning_rate": 1.3428045624367392e-08, "loss": 0.0082, "step": 231880 }, { "epoch": 1.9580756158831352, "grad_norm": 0.15721866488456726, "learning_rate": 1.3374131012723335e-08, "loss": 0.0034, "step": 231890 }, { "epoch": 1.9581600557302992, "grad_norm": 0.13610544800758362, "learning_rate": 1.3320324709570897e-08, "loss": 0.0077, "step": 231900 }, { "epoch": 1.9582444955774632, "grad_norm": 0.00801904033869505, "learning_rate": 1.3266626716079144e-08, "loss": 0.0026, "step": 231910 }, { "epoch": 1.958328935424627, "grad_norm": 0.12721973657608032, "learning_rate": 1.3213037033414367e-08, "loss": 0.01, "step": 231920 }, { "epoch": 1.9584133752717907, "grad_norm": 0.11163715273141861, "learning_rate": 1.3159555662740631e-08, "loss": 0.0047, "step": 231930 }, { "epoch": 1.9584978151189545, "grad_norm": 0.3894957900047302, "learning_rate": 1.3106182605219786e-08, "loss": 0.0071, "step": 231940 }, { "epoch": 1.9585822549661185, "grad_norm": 0.1722065955400467, "learning_rate": 1.3052917862009795e-08, "loss": 0.0105, "step": 231950 }, { "epoch": 1.9586666948132825, "grad_norm": 0.22501511871814728, "learning_rate": 1.2999761434269175e-08, "loss": 0.0073, "step": 231960 }, { "epoch": 1.9587511346604463, "grad_norm": 0.0022288502659648657, "learning_rate": 1.294671332315145e-08, "loss": 0.0086, "step": 231970 }, { "epoch": 1.95883557450761, "grad_norm": 0.11495929211378098, "learning_rate": 1.2893773529809583e-08, "loss": 0.0208, "step": 231980 }, { "epoch": 1.958920014354774, "grad_norm": 0.1035456731915474, "learning_rate": 1.2840942055392658e-08, "loss": 0.004, "step": 231990 }, { "epoch": 1.959004454201938, "grad_norm": 0.17509286105632782, "learning_rate": 1.2788218901049198e-08, "loss": 0.0049, "step": 232000 }, { "epoch": 1.9590888940491018, "grad_norm": 0.2468106895685196, "learning_rate": 1.2735604067922735e-08, "loss": 0.0037, "step": 232010 }, { "epoch": 1.9591733338962656, "grad_norm": 0.2591564357280731, "learning_rate": 1.2683097557157353e-08, "loss": 0.0055, "step": 232020 }, { "epoch": 1.9592577737434296, "grad_norm": 0.2171998769044876, "learning_rate": 1.263069936989325e-08, "loss": 0.0022, "step": 232030 }, { "epoch": 1.9593422135905934, "grad_norm": 0.10765920579433441, "learning_rate": 1.2578409507268408e-08, "loss": 0.0048, "step": 232040 }, { "epoch": 1.9594266534377573, "grad_norm": 0.522939145565033, "learning_rate": 1.2526227970418025e-08, "loss": 0.006, "step": 232050 }, { "epoch": 1.9595110932849211, "grad_norm": 0.08588249981403351, "learning_rate": 1.2474154760476198e-08, "loss": 0.0046, "step": 232060 }, { "epoch": 1.959595533132085, "grad_norm": 0.3564477264881134, "learning_rate": 1.2422189878573687e-08, "loss": 0.0067, "step": 232070 }, { "epoch": 1.959679972979249, "grad_norm": 0.318680077791214, "learning_rate": 1.2370333325839034e-08, "loss": 0.0079, "step": 232080 }, { "epoch": 1.959764412826413, "grad_norm": 0.04693862050771713, "learning_rate": 1.2318585103399116e-08, "loss": 0.0042, "step": 232090 }, { "epoch": 1.9598488526735767, "grad_norm": 0.1556047648191452, "learning_rate": 1.2266945212376924e-08, "loss": 0.0061, "step": 232100 }, { "epoch": 1.9599332925207404, "grad_norm": 0.13590720295906067, "learning_rate": 1.2215413653894892e-08, "loss": 0.0045, "step": 232110 }, { "epoch": 1.9600177323679044, "grad_norm": 0.3750467598438263, "learning_rate": 1.2163990429071571e-08, "loss": 0.0084, "step": 232120 }, { "epoch": 1.9601021722150684, "grad_norm": 0.14492420852184296, "learning_rate": 1.2112675539024953e-08, "loss": 0.0038, "step": 232130 }, { "epoch": 1.9601866120622322, "grad_norm": 0.1982913315296173, "learning_rate": 1.2061468984868595e-08, "loss": 0.0065, "step": 232140 }, { "epoch": 1.960271051909396, "grad_norm": 0.3163519501686096, "learning_rate": 1.2010370767714941e-08, "loss": 0.0068, "step": 232150 }, { "epoch": 1.9603554917565598, "grad_norm": 0.10935384780168533, "learning_rate": 1.1959380888673655e-08, "loss": 0.0046, "step": 232160 }, { "epoch": 1.9604399316037238, "grad_norm": 0.1955292671918869, "learning_rate": 1.1908499348852743e-08, "loss": 0.0066, "step": 232170 }, { "epoch": 1.9605243714508878, "grad_norm": 0.13853473961353302, "learning_rate": 1.1857726149357428e-08, "loss": 0.0062, "step": 232180 }, { "epoch": 1.9606088112980515, "grad_norm": 0.15504521131515503, "learning_rate": 1.1807061291289612e-08, "loss": 0.0064, "step": 232190 }, { "epoch": 1.9606932511452153, "grad_norm": 0.37910738587379456, "learning_rate": 1.1756504775750633e-08, "loss": 0.0067, "step": 232200 }, { "epoch": 1.9607776909923793, "grad_norm": 0.22960975766181946, "learning_rate": 1.170605660383739e-08, "loss": 0.011, "step": 232210 }, { "epoch": 1.9608621308395433, "grad_norm": 0.590610921382904, "learning_rate": 1.165571677664734e-08, "loss": 0.0081, "step": 232220 }, { "epoch": 1.960946570686707, "grad_norm": 0.02749955654144287, "learning_rate": 1.160548529527239e-08, "loss": 0.0057, "step": 232230 }, { "epoch": 1.9610310105338709, "grad_norm": 0.2762506604194641, "learning_rate": 1.155536216080444e-08, "loss": 0.0078, "step": 232240 }, { "epoch": 1.9611154503810349, "grad_norm": 0.4558892548084259, "learning_rate": 1.150534737433151e-08, "loss": 0.0056, "step": 232250 }, { "epoch": 1.9611998902281988, "grad_norm": 0.27965250611305237, "learning_rate": 1.145544093694051e-08, "loss": 0.0038, "step": 232260 }, { "epoch": 1.9612843300753626, "grad_norm": 0.36604413390159607, "learning_rate": 1.140564284971446e-08, "loss": 0.0059, "step": 232270 }, { "epoch": 1.9613687699225264, "grad_norm": 0.2998517155647278, "learning_rate": 1.135595311373583e-08, "loss": 0.0057, "step": 232280 }, { "epoch": 1.9614532097696902, "grad_norm": 0.29304876923561096, "learning_rate": 1.1306371730083755e-08, "loss": 0.004, "step": 232290 }, { "epoch": 1.9615376496168542, "grad_norm": 0.3671673536300659, "learning_rate": 1.125689869983515e-08, "loss": 0.0088, "step": 232300 }, { "epoch": 1.9616220894640182, "grad_norm": 0.012803047895431519, "learning_rate": 1.120753402406416e-08, "loss": 0.0067, "step": 232310 }, { "epoch": 1.961706529311182, "grad_norm": 0.16750293970108032, "learning_rate": 1.1158277703843257e-08, "loss": 0.0065, "step": 232320 }, { "epoch": 1.9617909691583457, "grad_norm": 0.24654287099838257, "learning_rate": 1.1109129740242696e-08, "loss": 0.0042, "step": 232330 }, { "epoch": 1.9618754090055097, "grad_norm": 0.4954546391963959, "learning_rate": 1.1060090134328849e-08, "loss": 0.0068, "step": 232340 }, { "epoch": 1.9619598488526737, "grad_norm": 0.3657144606113434, "learning_rate": 1.1011158887168082e-08, "loss": 0.0061, "step": 232350 }, { "epoch": 1.9620442886998375, "grad_norm": 0.2365642488002777, "learning_rate": 1.0962335999822327e-08, "loss": 0.0042, "step": 232360 }, { "epoch": 1.9621287285470013, "grad_norm": 0.19091174006462097, "learning_rate": 1.0913621473352398e-08, "loss": 0.0037, "step": 232370 }, { "epoch": 1.962213168394165, "grad_norm": 0.08587140589952469, "learning_rate": 1.0865015308815785e-08, "loss": 0.004, "step": 232380 }, { "epoch": 1.962297608241329, "grad_norm": 0.21321210265159607, "learning_rate": 1.0816517507269419e-08, "loss": 0.0059, "step": 232390 }, { "epoch": 1.962382048088493, "grad_norm": 0.228298619389534, "learning_rate": 1.0768128069765239e-08, "loss": 0.0053, "step": 232400 }, { "epoch": 1.9624664879356568, "grad_norm": 0.3815847933292389, "learning_rate": 1.0719846997355176e-08, "loss": 0.0064, "step": 232410 }, { "epoch": 1.9625509277828206, "grad_norm": 0.37493568658828735, "learning_rate": 1.0671674291087842e-08, "loss": 0.0052, "step": 232420 }, { "epoch": 1.9626353676299846, "grad_norm": 0.1018991470336914, "learning_rate": 1.0623609952009062e-08, "loss": 0.0085, "step": 232430 }, { "epoch": 1.9627198074771486, "grad_norm": 0.4612571895122528, "learning_rate": 1.0575653981163558e-08, "loss": 0.0098, "step": 232440 }, { "epoch": 1.9628042473243124, "grad_norm": 0.027836998924613, "learning_rate": 1.052780637959161e-08, "loss": 0.0104, "step": 232450 }, { "epoch": 1.9628886871714761, "grad_norm": 0.08894574642181396, "learning_rate": 1.048006714833405e-08, "loss": 0.0054, "step": 232460 }, { "epoch": 1.9629731270186401, "grad_norm": 0.16049009561538696, "learning_rate": 1.0432436288426162e-08, "loss": 0.0095, "step": 232470 }, { "epoch": 1.9630575668658041, "grad_norm": 0.08843990415334702, "learning_rate": 1.0384913800903785e-08, "loss": 0.0034, "step": 232480 }, { "epoch": 1.963142006712968, "grad_norm": 0.27969375252723694, "learning_rate": 1.0337499686798868e-08, "loss": 0.0034, "step": 232490 }, { "epoch": 1.9632264465601317, "grad_norm": 0.1720387190580368, "learning_rate": 1.0290193947140592e-08, "loss": 0.0036, "step": 232500 }, { "epoch": 1.9633108864072955, "grad_norm": 0.006347323767840862, "learning_rate": 1.024299658295702e-08, "loss": 0.0063, "step": 232510 }, { "epoch": 1.9633953262544594, "grad_norm": 0.17059113085269928, "learning_rate": 1.0195907595272892e-08, "loss": 0.0039, "step": 232520 }, { "epoch": 1.9634797661016234, "grad_norm": 0.7171598076820374, "learning_rate": 1.0148926985111274e-08, "loss": 0.0052, "step": 232530 }, { "epoch": 1.9635642059487872, "grad_norm": 0.07242625951766968, "learning_rate": 1.0102054753492463e-08, "loss": 0.0053, "step": 232540 }, { "epoch": 1.963648645795951, "grad_norm": 0.11293090879917145, "learning_rate": 1.0055290901433979e-08, "loss": 0.0039, "step": 232550 }, { "epoch": 1.963733085643115, "grad_norm": 0.2686532139778137, "learning_rate": 1.0008635429952784e-08, "loss": 0.0037, "step": 232560 }, { "epoch": 1.963817525490279, "grad_norm": 0.23419803380966187, "learning_rate": 9.962088340060849e-09, "loss": 0.0055, "step": 232570 }, { "epoch": 1.9639019653374428, "grad_norm": 0.20916444063186646, "learning_rate": 9.915649632770141e-09, "loss": 0.0101, "step": 232580 }, { "epoch": 1.9639864051846065, "grad_norm": 0.3601648211479187, "learning_rate": 9.869319309089299e-09, "loss": 0.0078, "step": 232590 }, { "epoch": 1.9640708450317705, "grad_norm": 0.03316912427544594, "learning_rate": 9.823097370023626e-09, "loss": 0.0031, "step": 232600 }, { "epoch": 1.9641552848789343, "grad_norm": 0.13560402393341064, "learning_rate": 9.77698381657788e-09, "loss": 0.0044, "step": 232610 }, { "epoch": 1.9642397247260983, "grad_norm": 0.13777786493301392, "learning_rate": 9.730978649753476e-09, "loss": 0.0059, "step": 232620 }, { "epoch": 1.964324164573262, "grad_norm": 0.8727487325668335, "learning_rate": 9.685081870549617e-09, "loss": 0.0085, "step": 232630 }, { "epoch": 1.9644086044204259, "grad_norm": 0.02694978378713131, "learning_rate": 9.639293479963285e-09, "loss": 0.0057, "step": 232640 }, { "epoch": 1.9644930442675899, "grad_norm": 0.2120453417301178, "learning_rate": 9.59361347898924e-09, "loss": 0.0075, "step": 232650 }, { "epoch": 1.9645774841147539, "grad_norm": 0.5447168350219727, "learning_rate": 9.548041868618907e-09, "loss": 0.0066, "step": 232660 }, { "epoch": 1.9646619239619176, "grad_norm": 0.33503711223602295, "learning_rate": 9.502578649842053e-09, "loss": 0.0041, "step": 232670 }, { "epoch": 1.9647463638090814, "grad_norm": 0.17817460000514984, "learning_rate": 9.457223823647332e-09, "loss": 0.0046, "step": 232680 }, { "epoch": 1.9648308036562454, "grad_norm": 0.3694487512111664, "learning_rate": 9.411977391018401e-09, "loss": 0.0056, "step": 232690 }, { "epoch": 1.9649152435034094, "grad_norm": 0.05552356317639351, "learning_rate": 9.366839352938916e-09, "loss": 0.0034, "step": 232700 }, { "epoch": 1.9649996833505732, "grad_norm": 0.3045112192630768, "learning_rate": 9.321809710388652e-09, "loss": 0.0063, "step": 232710 }, { "epoch": 1.965084123197737, "grad_norm": 1.6049665212631226, "learning_rate": 9.276888464346267e-09, "loss": 0.0092, "step": 232720 }, { "epoch": 1.9651685630449007, "grad_norm": 0.18133887648582458, "learning_rate": 9.232075615787095e-09, "loss": 0.0045, "step": 232730 }, { "epoch": 1.9652530028920647, "grad_norm": 0.6839962005615234, "learning_rate": 9.1873711656848e-09, "loss": 0.0072, "step": 232740 }, { "epoch": 1.9653374427392287, "grad_norm": 0.1029665470123291, "learning_rate": 9.142775115010272e-09, "loss": 0.0031, "step": 232750 }, { "epoch": 1.9654218825863925, "grad_norm": 0.20578935742378235, "learning_rate": 9.098287464731625e-09, "loss": 0.0067, "step": 232760 }, { "epoch": 1.9655063224335563, "grad_norm": 0.08183889836072922, "learning_rate": 9.05390821581531e-09, "loss": 0.0045, "step": 232770 }, { "epoch": 1.9655907622807203, "grad_norm": 0.14185191690921783, "learning_rate": 9.009637369225554e-09, "loss": 0.0057, "step": 232780 }, { "epoch": 1.9656752021278843, "grad_norm": 0.17181290686130524, "learning_rate": 8.965474925923257e-09, "loss": 0.0024, "step": 232790 }, { "epoch": 1.965759641975048, "grad_norm": 0.12775982916355133, "learning_rate": 8.921420886868759e-09, "loss": 0.0045, "step": 232800 }, { "epoch": 1.9658440818222118, "grad_norm": 0.48455214500427246, "learning_rate": 8.877475253017964e-09, "loss": 0.0064, "step": 232810 }, { "epoch": 1.9659285216693758, "grad_norm": 0.10555975139141083, "learning_rate": 8.833638025325663e-09, "loss": 0.0059, "step": 232820 }, { "epoch": 1.9660129615165398, "grad_norm": 0.48346248269081116, "learning_rate": 8.789909204743875e-09, "loss": 0.0051, "step": 232830 }, { "epoch": 1.9660974013637036, "grad_norm": 0.2992316782474518, "learning_rate": 8.74628879222239e-09, "loss": 0.0039, "step": 232840 }, { "epoch": 1.9661818412108674, "grad_norm": 0.021495774388313293, "learning_rate": 8.70277678870879e-09, "loss": 0.0066, "step": 232850 }, { "epoch": 1.9662662810580311, "grad_norm": 0.15063580870628357, "learning_rate": 8.65937319514787e-09, "loss": 0.0066, "step": 232860 }, { "epoch": 1.9663507209051951, "grad_norm": 0.29691082239151, "learning_rate": 8.616078012482765e-09, "loss": 0.0055, "step": 232870 }, { "epoch": 1.9664351607523591, "grad_norm": 0.3110635280609131, "learning_rate": 8.572891241653836e-09, "loss": 0.0049, "step": 232880 }, { "epoch": 1.966519600599523, "grad_norm": 0.25242874026298523, "learning_rate": 8.529812883598665e-09, "loss": 0.0062, "step": 232890 }, { "epoch": 1.9666040404466867, "grad_norm": 0.24398280680179596, "learning_rate": 8.486842939252615e-09, "loss": 0.0173, "step": 232900 }, { "epoch": 1.9666884802938507, "grad_norm": 0.304570734500885, "learning_rate": 8.443981409550495e-09, "loss": 0.0053, "step": 232910 }, { "epoch": 1.9667729201410147, "grad_norm": 0.07918345928192139, "learning_rate": 8.401228295421559e-09, "loss": 0.0048, "step": 232920 }, { "epoch": 1.9668573599881785, "grad_norm": 0.17714187502861023, "learning_rate": 8.358583597795068e-09, "loss": 0.0093, "step": 232930 }, { "epoch": 1.9669417998353422, "grad_norm": 0.18007828295230865, "learning_rate": 8.316047317597498e-09, "loss": 0.0057, "step": 232940 }, { "epoch": 1.9670262396825062, "grad_norm": 0.1674443632364273, "learning_rate": 8.273619455752003e-09, "loss": 0.0061, "step": 232950 }, { "epoch": 1.96711067952967, "grad_norm": 0.12431337684392929, "learning_rate": 8.231300013181177e-09, "loss": 0.0073, "step": 232960 }, { "epoch": 1.967195119376834, "grad_norm": 0.2102326601743698, "learning_rate": 8.189088990803173e-09, "loss": 0.0068, "step": 232970 }, { "epoch": 1.9672795592239978, "grad_norm": 0.1398458182811737, "learning_rate": 8.146986389535594e-09, "loss": 0.0103, "step": 232980 }, { "epoch": 1.9673639990711616, "grad_norm": 0.06699545681476593, "learning_rate": 8.104992210292151e-09, "loss": 0.0032, "step": 232990 }, { "epoch": 1.9674484389183255, "grad_norm": 0.14386393129825592, "learning_rate": 8.063106453986002e-09, "loss": 0.0053, "step": 233000 }, { "epoch": 1.9675328787654895, "grad_norm": 0.1761196404695511, "learning_rate": 8.021329121525867e-09, "loss": 0.0064, "step": 233010 }, { "epoch": 1.9676173186126533, "grad_norm": 0.25772446393966675, "learning_rate": 7.979660213819351e-09, "loss": 0.0043, "step": 233020 }, { "epoch": 1.967701758459817, "grad_norm": 0.4267905354499817, "learning_rate": 7.938099731771842e-09, "loss": 0.008, "step": 233030 }, { "epoch": 1.967786198306981, "grad_norm": 0.20565566420555115, "learning_rate": 7.896647676285397e-09, "loss": 0.0071, "step": 233040 }, { "epoch": 1.967870638154145, "grad_norm": 0.4106169044971466, "learning_rate": 7.855304048261515e-09, "loss": 0.007, "step": 233050 }, { "epoch": 1.9679550780013089, "grad_norm": 0.5239765048027039, "learning_rate": 7.8140688485967e-09, "loss": 0.0056, "step": 233060 }, { "epoch": 1.9680395178484726, "grad_norm": 0.00526995537802577, "learning_rate": 7.772942078188017e-09, "loss": 0.0063, "step": 233070 }, { "epoch": 1.9681239576956364, "grad_norm": 0.08994211256504059, "learning_rate": 7.731923737927526e-09, "loss": 0.0042, "step": 233080 }, { "epoch": 1.9682083975428004, "grad_norm": 0.07471098750829697, "learning_rate": 7.69101382870674e-09, "loss": 0.0047, "step": 233090 }, { "epoch": 1.9682928373899644, "grad_norm": 0.049162622541189194, "learning_rate": 7.650212351414388e-09, "loss": 0.0025, "step": 233100 }, { "epoch": 1.9683772772371282, "grad_norm": 0.10604504495859146, "learning_rate": 7.609519306935875e-09, "loss": 0.0065, "step": 233110 }, { "epoch": 1.968461717084292, "grad_norm": 0.08713164925575256, "learning_rate": 7.56893469615605e-09, "loss": 0.0056, "step": 233120 }, { "epoch": 1.968546156931456, "grad_norm": 0.3303489685058594, "learning_rate": 7.528458519955873e-09, "loss": 0.0052, "step": 233130 }, { "epoch": 1.96863059677862, "grad_norm": 0.1750929057598114, "learning_rate": 7.488090779214086e-09, "loss": 0.0055, "step": 233140 }, { "epoch": 1.9687150366257837, "grad_norm": 0.42998963594436646, "learning_rate": 7.44783147480832e-09, "loss": 0.0088, "step": 233150 }, { "epoch": 1.9687994764729475, "grad_norm": 0.1992226243019104, "learning_rate": 7.407680607612322e-09, "loss": 0.0034, "step": 233160 }, { "epoch": 1.9688839163201115, "grad_norm": 0.26361092925071716, "learning_rate": 7.367638178498726e-09, "loss": 0.0039, "step": 233170 }, { "epoch": 1.9689683561672755, "grad_norm": 0.5871599316596985, "learning_rate": 7.327704188336837e-09, "loss": 0.011, "step": 233180 }, { "epoch": 1.9690527960144393, "grad_norm": 0.42587199807167053, "learning_rate": 7.28787863799374e-09, "loss": 0.0084, "step": 233190 }, { "epoch": 1.969137235861603, "grad_norm": 0.4392522871494293, "learning_rate": 7.248161528335407e-09, "loss": 0.004, "step": 233200 }, { "epoch": 1.9692216757087668, "grad_norm": 0.16454967856407166, "learning_rate": 7.2085528602233725e-09, "loss": 0.0016, "step": 233210 }, { "epoch": 1.9693061155559308, "grad_norm": 0.24040596187114716, "learning_rate": 7.169052634519169e-09, "loss": 0.0091, "step": 233220 }, { "epoch": 1.9693905554030948, "grad_norm": 0.11216860264539719, "learning_rate": 7.129660852079335e-09, "loss": 0.0052, "step": 233230 }, { "epoch": 1.9694749952502586, "grad_norm": 0.18171775341033936, "learning_rate": 7.090377513760405e-09, "loss": 0.0068, "step": 233240 }, { "epoch": 1.9695594350974224, "grad_norm": 0.11357022821903229, "learning_rate": 7.0512026204150316e-09, "loss": 0.0129, "step": 233250 }, { "epoch": 1.9696438749445864, "grad_norm": 0.8112859129905701, "learning_rate": 7.012136172894757e-09, "loss": 0.0057, "step": 233260 }, { "epoch": 1.9697283147917504, "grad_norm": 0.267691969871521, "learning_rate": 6.97317817204779e-09, "loss": 0.0104, "step": 233270 }, { "epoch": 1.9698127546389141, "grad_norm": 0.4124923050403595, "learning_rate": 6.934328618720121e-09, "loss": 0.0058, "step": 233280 }, { "epoch": 1.969897194486078, "grad_norm": 0.29696017503738403, "learning_rate": 6.895587513755519e-09, "loss": 0.0079, "step": 233290 }, { "epoch": 1.9699816343332417, "grad_norm": 0.3509746193885803, "learning_rate": 6.856954857995535e-09, "loss": 0.01, "step": 233300 }, { "epoch": 1.9700660741804057, "grad_norm": 0.3080432415008545, "learning_rate": 6.818430652279495e-09, "loss": 0.0072, "step": 233310 }, { "epoch": 1.9701505140275697, "grad_norm": 0.15170927345752716, "learning_rate": 6.780014897443399e-09, "loss": 0.0056, "step": 233320 }, { "epoch": 1.9702349538747335, "grad_norm": 0.0476837158203125, "learning_rate": 6.741707594322688e-09, "loss": 0.004, "step": 233330 }, { "epoch": 1.9703193937218972, "grad_norm": 0.09089431911706924, "learning_rate": 6.7035087437489214e-09, "loss": 0.0064, "step": 233340 }, { "epoch": 1.9704038335690612, "grad_norm": 0.13713540136814117, "learning_rate": 6.665418346551433e-09, "loss": 0.0071, "step": 233350 }, { "epoch": 1.9704882734162252, "grad_norm": 0.22020038962364197, "learning_rate": 6.627436403557896e-09, "loss": 0.0099, "step": 233360 }, { "epoch": 1.970572713263389, "grad_norm": 0.3267670273780823, "learning_rate": 6.589562915593206e-09, "loss": 0.0039, "step": 233370 }, { "epoch": 1.9706571531105528, "grad_norm": 0.5401843190193176, "learning_rate": 6.5517978834805925e-09, "loss": 0.0069, "step": 233380 }, { "epoch": 1.9707415929577168, "grad_norm": 0.40692397952079773, "learning_rate": 6.5141413080388464e-09, "loss": 0.006, "step": 233390 }, { "epoch": 1.9708260328048808, "grad_norm": 0.33622193336486816, "learning_rate": 6.47659319008731e-09, "loss": 0.0039, "step": 233400 }, { "epoch": 1.9709104726520446, "grad_norm": 0.07326450198888779, "learning_rate": 6.439153530440334e-09, "loss": 0.0053, "step": 233410 }, { "epoch": 1.9709949124992083, "grad_norm": 0.2381436675786972, "learning_rate": 6.401822329911711e-09, "loss": 0.0037, "step": 233420 }, { "epoch": 1.971079352346372, "grad_norm": 0.5687748193740845, "learning_rate": 6.364599589312459e-09, "loss": 0.0045, "step": 233430 }, { "epoch": 1.971163792193536, "grad_norm": 0.21758146584033966, "learning_rate": 6.327485309450821e-09, "loss": 0.0056, "step": 233440 }, { "epoch": 1.9712482320407, "grad_norm": 0.26907166838645935, "learning_rate": 6.290479491132817e-09, "loss": 0.0071, "step": 233450 }, { "epoch": 1.9713326718878639, "grad_norm": 0.24384281039237976, "learning_rate": 6.25358213516225e-09, "loss": 0.0081, "step": 233460 }, { "epoch": 1.9714171117350276, "grad_norm": 0.7272801995277405, "learning_rate": 6.216793242340702e-09, "loss": 0.0085, "step": 233470 }, { "epoch": 1.9715015515821916, "grad_norm": 0.1296808272600174, "learning_rate": 6.180112813466976e-09, "loss": 0.0048, "step": 233480 }, { "epoch": 1.9715859914293556, "grad_norm": 0.24239398539066315, "learning_rate": 6.1435408493376594e-09, "loss": 0.0043, "step": 233490 }, { "epoch": 1.9716704312765194, "grad_norm": 0.17515189945697784, "learning_rate": 6.10707735074767e-09, "loss": 0.0091, "step": 233500 }, { "epoch": 1.9717548711236832, "grad_norm": 0.3537817597389221, "learning_rate": 6.070722318488598e-09, "loss": 0.0043, "step": 233510 }, { "epoch": 1.9718393109708472, "grad_norm": 0.0014926354633644223, "learning_rate": 6.034475753349811e-09, "loss": 0.0034, "step": 233520 }, { "epoch": 1.971923750818011, "grad_norm": 0.3249693214893341, "learning_rate": 5.9983376561190134e-09, "loss": 0.0058, "step": 233530 }, { "epoch": 1.972008190665175, "grad_norm": 0.09238501638174057, "learning_rate": 5.962308027580577e-09, "loss": 0.0043, "step": 233540 }, { "epoch": 1.9720926305123387, "grad_norm": 0.13955754041671753, "learning_rate": 5.926386868518319e-09, "loss": 0.0084, "step": 233550 }, { "epoch": 1.9721770703595025, "grad_norm": 0.2383585423231125, "learning_rate": 5.8905741797110615e-09, "loss": 0.0071, "step": 233560 }, { "epoch": 1.9722615102066665, "grad_norm": 0.4086754322052002, "learning_rate": 5.854869961937071e-09, "loss": 0.0059, "step": 233570 }, { "epoch": 1.9723459500538305, "grad_norm": 0.49388790130615234, "learning_rate": 5.819274215971837e-09, "loss": 0.0049, "step": 233580 }, { "epoch": 1.9724303899009943, "grad_norm": 0.1490420550107956, "learning_rate": 5.783786942588632e-09, "loss": 0.0057, "step": 233590 }, { "epoch": 1.972514829748158, "grad_norm": 0.12259575724601746, "learning_rate": 5.7484081425579485e-09, "loss": 0.0039, "step": 233600 }, { "epoch": 1.972599269595322, "grad_norm": 0.12071184813976288, "learning_rate": 5.7131378166486176e-09, "loss": 0.0029, "step": 233610 }, { "epoch": 1.972683709442486, "grad_norm": 0.7976863980293274, "learning_rate": 5.677975965626692e-09, "loss": 0.0052, "step": 233620 }, { "epoch": 1.9727681492896498, "grad_norm": 0.45283371210098267, "learning_rate": 5.642922590255451e-09, "loss": 0.0043, "step": 233630 }, { "epoch": 1.9728525891368136, "grad_norm": 0.09130282700061798, "learning_rate": 5.6079776912970615e-09, "loss": 0.0076, "step": 233640 }, { "epoch": 1.9729370289839774, "grad_norm": 0.5643066763877869, "learning_rate": 5.5731412695092525e-09, "loss": 0.0095, "step": 233650 }, { "epoch": 1.9730214688311414, "grad_norm": 0.21311256289482117, "learning_rate": 5.53841332564975e-09, "loss": 0.0028, "step": 233660 }, { "epoch": 1.9731059086783054, "grad_norm": 0.25186237692832947, "learning_rate": 5.50379386047184e-09, "loss": 0.0053, "step": 233670 }, { "epoch": 1.9731903485254692, "grad_norm": 0.05888235941529274, "learning_rate": 5.469282874728809e-09, "loss": 0.0079, "step": 233680 }, { "epoch": 1.973274788372633, "grad_norm": 0.17950989305973053, "learning_rate": 5.434880369168949e-09, "loss": 0.0072, "step": 233690 }, { "epoch": 1.973359228219797, "grad_norm": 0.6008952260017395, "learning_rate": 5.400586344540548e-09, "loss": 0.0042, "step": 233700 }, { "epoch": 1.973443668066961, "grad_norm": 0.6033058762550354, "learning_rate": 5.366400801587457e-09, "loss": 0.0085, "step": 233710 }, { "epoch": 1.9735281079141247, "grad_norm": 0.0003061946190427989, "learning_rate": 5.33232374105297e-09, "loss": 0.0097, "step": 233720 }, { "epoch": 1.9736125477612885, "grad_norm": 0.4097440242767334, "learning_rate": 5.298355163676494e-09, "loss": 0.0081, "step": 233730 }, { "epoch": 1.9736969876084525, "grad_norm": 0.06999674439430237, "learning_rate": 5.264495070196884e-09, "loss": 0.0044, "step": 233740 }, { "epoch": 1.9737814274556165, "grad_norm": 0.2233424335718155, "learning_rate": 5.230743461347998e-09, "loss": 0.0093, "step": 233750 }, { "epoch": 1.9738658673027802, "grad_norm": 0.009977810084819794, "learning_rate": 5.197100337864802e-09, "loss": 0.0023, "step": 233760 }, { "epoch": 1.973950307149944, "grad_norm": 0.30349284410476685, "learning_rate": 5.163565700476159e-09, "loss": 0.0043, "step": 233770 }, { "epoch": 1.9740347469971078, "grad_norm": 0.5922655463218689, "learning_rate": 5.1301395499120385e-09, "loss": 0.0081, "step": 233780 }, { "epoch": 1.9741191868442718, "grad_norm": 0.35231220722198486, "learning_rate": 5.096821886897418e-09, "loss": 0.0064, "step": 233790 }, { "epoch": 1.9742036266914358, "grad_norm": 0.0017500825924798846, "learning_rate": 5.063612712156163e-09, "loss": 0.0103, "step": 233800 }, { "epoch": 1.9742880665385996, "grad_norm": 0.5708689093589783, "learning_rate": 5.030512026409917e-09, "loss": 0.0071, "step": 233810 }, { "epoch": 1.9743725063857633, "grad_norm": 0.3482787013053894, "learning_rate": 4.997519830377551e-09, "loss": 0.0037, "step": 233820 }, { "epoch": 1.9744569462329273, "grad_norm": 0.10550379753112793, "learning_rate": 4.964636124775157e-09, "loss": 0.0042, "step": 233830 }, { "epoch": 1.9745413860800913, "grad_norm": 0.13109388947486877, "learning_rate": 4.931860910317165e-09, "loss": 0.0043, "step": 233840 }, { "epoch": 1.974625825927255, "grad_norm": 0.22782041132450104, "learning_rate": 4.8991941877157835e-09, "loss": 0.0056, "step": 233850 }, { "epoch": 1.9747102657744189, "grad_norm": 0.2913289964199066, "learning_rate": 4.8666359576804435e-09, "loss": 0.005, "step": 233860 }, { "epoch": 1.9747947056215829, "grad_norm": 0.34887367486953735, "learning_rate": 4.834186220917802e-09, "loss": 0.0059, "step": 233870 }, { "epoch": 1.9748791454687467, "grad_norm": 0.2574564814567566, "learning_rate": 4.801844978133408e-09, "loss": 0.0056, "step": 233880 }, { "epoch": 1.9749635853159107, "grad_norm": 0.13890203833580017, "learning_rate": 4.769612230029475e-09, "loss": 0.0054, "step": 233890 }, { "epoch": 1.9750480251630744, "grad_norm": 0.48575884103775024, "learning_rate": 4.7374879773054435e-09, "loss": 0.0057, "step": 233900 }, { "epoch": 1.9751324650102382, "grad_norm": 0.13362230360507965, "learning_rate": 4.705472220659646e-09, "loss": 0.0057, "step": 233910 }, { "epoch": 1.9752169048574022, "grad_norm": 0.11842028796672821, "learning_rate": 4.673564960787635e-09, "loss": 0.0046, "step": 233920 }, { "epoch": 1.9753013447045662, "grad_norm": 0.3363246023654938, "learning_rate": 4.641766198381636e-09, "loss": 0.0046, "step": 233930 }, { "epoch": 1.97538578455173, "grad_norm": 0.001996001461520791, "learning_rate": 4.610075934133318e-09, "loss": 0.0029, "step": 233940 }, { "epoch": 1.9754702243988937, "grad_norm": 0.2942422330379486, "learning_rate": 4.578494168730463e-09, "loss": 0.0097, "step": 233950 }, { "epoch": 1.9755546642460577, "grad_norm": 0.08550646901130676, "learning_rate": 4.547020902858635e-09, "loss": 0.0049, "step": 233960 }, { "epoch": 1.9756391040932217, "grad_norm": 0.34083646535873413, "learning_rate": 4.515656137202284e-09, "loss": 0.0076, "step": 233970 }, { "epoch": 1.9757235439403855, "grad_norm": 0.2514665424823761, "learning_rate": 4.484399872442535e-09, "loss": 0.0067, "step": 233980 }, { "epoch": 1.9758079837875493, "grad_norm": 0.1358664482831955, "learning_rate": 4.453252109257178e-09, "loss": 0.0055, "step": 233990 }, { "epoch": 1.975892423634713, "grad_norm": 0.07133117318153381, "learning_rate": 4.422212848324004e-09, "loss": 0.0023, "step": 234000 }, { "epoch": 1.975976863481877, "grad_norm": 0.6215308308601379, "learning_rate": 4.391282090316362e-09, "loss": 0.0081, "step": 234010 }, { "epoch": 1.976061303329041, "grad_norm": 0.1696462482213974, "learning_rate": 4.360459835906494e-09, "loss": 0.0052, "step": 234020 }, { "epoch": 1.9761457431762048, "grad_norm": 0.21936364471912384, "learning_rate": 4.329746085763864e-09, "loss": 0.0066, "step": 234030 }, { "epoch": 1.9762301830233686, "grad_norm": 0.4620397686958313, "learning_rate": 4.29914084055516e-09, "loss": 0.0063, "step": 234040 }, { "epoch": 1.9763146228705326, "grad_norm": 0.08283715695142746, "learning_rate": 4.268644100945407e-09, "loss": 0.002, "step": 234050 }, { "epoch": 1.9763990627176966, "grad_norm": 0.34440940618515015, "learning_rate": 4.238255867597407e-09, "loss": 0.0095, "step": 234060 }, { "epoch": 1.9764835025648604, "grad_norm": 0.44667351245880127, "learning_rate": 4.207976141170633e-09, "loss": 0.0098, "step": 234070 }, { "epoch": 1.9765679424120242, "grad_norm": 0.24990570545196533, "learning_rate": 4.1778049223223375e-09, "loss": 0.0056, "step": 234080 }, { "epoch": 1.9766523822591882, "grad_norm": 0.13596788048744202, "learning_rate": 4.14774221170866e-09, "loss": 0.0052, "step": 234090 }, { "epoch": 1.9767368221063522, "grad_norm": 0.31429803371429443, "learning_rate": 4.117788009982415e-09, "loss": 0.0036, "step": 234100 }, { "epoch": 1.976821261953516, "grad_norm": 0.6309512257575989, "learning_rate": 4.087942317794192e-09, "loss": 0.0065, "step": 234110 }, { "epoch": 1.9769057018006797, "grad_norm": 0.8824112415313721, "learning_rate": 4.058205135791804e-09, "loss": 0.007, "step": 234120 }, { "epoch": 1.9769901416478435, "grad_norm": 0.3572956919670105, "learning_rate": 4.028576464621958e-09, "loss": 0.0075, "step": 234130 }, { "epoch": 1.9770745814950075, "grad_norm": 1.01976478099823, "learning_rate": 3.999056304926918e-09, "loss": 0.0054, "step": 234140 }, { "epoch": 1.9771590213421715, "grad_norm": 0.13977813720703125, "learning_rate": 3.969644657348947e-09, "loss": 0.0036, "step": 234150 }, { "epoch": 1.9772434611893352, "grad_norm": 0.23712655901908875, "learning_rate": 3.940341522526425e-09, "loss": 0.0069, "step": 234160 }, { "epoch": 1.977327901036499, "grad_norm": 0.14241373538970947, "learning_rate": 3.911146901096063e-09, "loss": 0.005, "step": 234170 }, { "epoch": 1.977412340883663, "grad_norm": 0.47190552949905396, "learning_rate": 3.8820607936918e-09, "loss": 0.0042, "step": 234180 }, { "epoch": 1.977496780730827, "grad_norm": 0.1512310653924942, "learning_rate": 3.8530832009453515e-09, "loss": 0.0048, "step": 234190 }, { "epoch": 1.9775812205779908, "grad_norm": 0.1335340291261673, "learning_rate": 3.824214123486214e-09, "loss": 0.0056, "step": 234200 }, { "epoch": 1.9776656604251546, "grad_norm": 0.35866305232048035, "learning_rate": 3.795453561941109e-09, "loss": 0.008, "step": 234210 }, { "epoch": 1.9777501002723183, "grad_norm": 0.06599879264831543, "learning_rate": 3.766801516935092e-09, "loss": 0.0071, "step": 234220 }, { "epoch": 1.9778345401194823, "grad_norm": 0.2711978852748871, "learning_rate": 3.7382579890904436e-09, "loss": 0.0094, "step": 234230 }, { "epoch": 1.9779189799666463, "grad_norm": 0.261308878660202, "learning_rate": 3.7098229790266673e-09, "loss": 0.004, "step": 234240 }, { "epoch": 1.9780034198138101, "grad_norm": 0.13795027136802673, "learning_rate": 3.681496487361602e-09, "loss": 0.0048, "step": 234250 }, { "epoch": 1.978087859660974, "grad_norm": 0.017946502193808556, "learning_rate": 3.6532785147108674e-09, "loss": 0.007, "step": 234260 }, { "epoch": 1.9781722995081379, "grad_norm": 0.33686310052871704, "learning_rate": 3.625169061686751e-09, "loss": 0.0051, "step": 234270 }, { "epoch": 1.9782567393553019, "grad_norm": 0.13097718358039856, "learning_rate": 3.59716812890043e-09, "loss": 0.0057, "step": 234280 }, { "epoch": 1.9783411792024657, "grad_norm": 0.35233446955680847, "learning_rate": 3.5692757169597525e-09, "loss": 0.006, "step": 234290 }, { "epoch": 1.9784256190496294, "grad_norm": 0.1789984107017517, "learning_rate": 3.5414918264708997e-09, "loss": 0.0073, "step": 234300 }, { "epoch": 1.9785100588967934, "grad_norm": 0.22670738399028778, "learning_rate": 3.513816458036168e-09, "loss": 0.0085, "step": 234310 }, { "epoch": 1.9785944987439574, "grad_norm": 0.1664072871208191, "learning_rate": 3.4862496122578525e-09, "loss": 0.0047, "step": 234320 }, { "epoch": 1.9786789385911212, "grad_norm": 0.11254987865686417, "learning_rate": 3.4587912897343646e-09, "loss": 0.0088, "step": 234330 }, { "epoch": 1.978763378438285, "grad_norm": 0.125636488199234, "learning_rate": 3.4314414910624483e-09, "loss": 0.008, "step": 234340 }, { "epoch": 1.9788478182854488, "grad_norm": 0.32530373334884644, "learning_rate": 3.404200216834963e-09, "loss": 0.0151, "step": 234350 }, { "epoch": 1.9789322581326128, "grad_norm": 0.1204603910446167, "learning_rate": 3.3770674676447677e-09, "loss": 0.0123, "step": 234360 }, { "epoch": 1.9790166979797768, "grad_norm": 0.09524217247962952, "learning_rate": 3.350043244080836e-09, "loss": 0.0038, "step": 234370 }, { "epoch": 1.9791011378269405, "grad_norm": 0.640161395072937, "learning_rate": 3.3231275467299205e-09, "loss": 0.0062, "step": 234380 }, { "epoch": 1.9791855776741043, "grad_norm": 0.09346526861190796, "learning_rate": 3.2963203761765537e-09, "loss": 0.0097, "step": 234390 }, { "epoch": 1.9792700175212683, "grad_norm": 0.4538516402244568, "learning_rate": 3.2696217330030476e-09, "loss": 0.0072, "step": 234400 }, { "epoch": 1.9793544573684323, "grad_norm": 0.09732501953840256, "learning_rate": 3.2430316177900487e-09, "loss": 0.0151, "step": 234410 }, { "epoch": 1.979438897215596, "grad_norm": 0.1368035525083542, "learning_rate": 3.216550031114318e-09, "loss": 0.0039, "step": 234420 }, { "epoch": 1.9795233370627598, "grad_norm": 0.13645300269126892, "learning_rate": 3.1901769735509513e-09, "loss": 0.0036, "step": 234430 }, { "epoch": 1.9796077769099238, "grad_norm": 0.03049304150044918, "learning_rate": 3.163912445672823e-09, "loss": 0.0046, "step": 234440 }, { "epoch": 1.9796922167570876, "grad_norm": 0.4158048927783966, "learning_rate": 3.1377564480505882e-09, "loss": 0.0051, "step": 234450 }, { "epoch": 1.9797766566042516, "grad_norm": 0.30551812052726746, "learning_rate": 3.1117089812526813e-09, "loss": 0.0089, "step": 234460 }, { "epoch": 1.9798610964514154, "grad_norm": 0.8305858373641968, "learning_rate": 3.0857700458447606e-09, "loss": 0.0078, "step": 234470 }, { "epoch": 1.9799455362985792, "grad_norm": 0.20229671895503998, "learning_rate": 3.0599396423891538e-09, "loss": 0.005, "step": 234480 }, { "epoch": 1.9800299761457432, "grad_norm": 0.4276854395866394, "learning_rate": 3.034217771448189e-09, "loss": 0.0102, "step": 234490 }, { "epoch": 1.9801144159929072, "grad_norm": 0.13159674406051636, "learning_rate": 3.0086044335797538e-09, "loss": 0.0051, "step": 234500 }, { "epoch": 1.980198855840071, "grad_norm": 0.06043471023440361, "learning_rate": 2.9830996293406245e-09, "loss": 0.002, "step": 234510 }, { "epoch": 1.9802832956872347, "grad_norm": 0.41592106223106384, "learning_rate": 2.9577033592848026e-09, "loss": 0.0048, "step": 234520 }, { "epoch": 1.9803677355343987, "grad_norm": 0.7738907933235168, "learning_rate": 2.9324156239635137e-09, "loss": 0.0105, "step": 234530 }, { "epoch": 1.9804521753815627, "grad_norm": 0.39866915345191956, "learning_rate": 2.9072364239263184e-09, "loss": 0.0076, "step": 234540 }, { "epoch": 1.9805366152287265, "grad_norm": 0.36748969554901123, "learning_rate": 2.8821657597194463e-09, "loss": 0.0043, "step": 234550 }, { "epoch": 1.9806210550758903, "grad_norm": 0.279211163520813, "learning_rate": 2.857203631888572e-09, "loss": 0.0033, "step": 234560 }, { "epoch": 1.980705494923054, "grad_norm": 0.19239124655723572, "learning_rate": 2.8323500409743743e-09, "loss": 0.0019, "step": 234570 }, { "epoch": 1.980789934770218, "grad_norm": 0.0685516819357872, "learning_rate": 2.8076049875180866e-09, "loss": 0.0031, "step": 234580 }, { "epoch": 1.980874374617382, "grad_norm": 0.2742098569869995, "learning_rate": 2.782968472056502e-09, "loss": 0.007, "step": 234590 }, { "epoch": 1.9809588144645458, "grad_norm": 0.3453047573566437, "learning_rate": 2.758440495125303e-09, "loss": 0.0033, "step": 234600 }, { "epoch": 1.9810432543117096, "grad_norm": 0.27754509449005127, "learning_rate": 2.7340210572562864e-09, "loss": 0.0041, "step": 234610 }, { "epoch": 1.9811276941588736, "grad_norm": 0.04597030580043793, "learning_rate": 2.709710158980139e-09, "loss": 0.0056, "step": 234620 }, { "epoch": 1.9812121340060376, "grad_norm": 0.14934128522872925, "learning_rate": 2.6855078008253268e-09, "loss": 0.0054, "step": 234630 }, { "epoch": 1.9812965738532013, "grad_norm": 0.2864067852497101, "learning_rate": 2.661413983316985e-09, "loss": 0.0064, "step": 234640 }, { "epoch": 1.9813810137003651, "grad_norm": 0.2688711881637573, "learning_rate": 2.637428706978584e-09, "loss": 0.0065, "step": 234650 }, { "epoch": 1.9814654535475291, "grad_norm": 0.10297157615423203, "learning_rate": 2.613551972331374e-09, "loss": 0.0063, "step": 234660 }, { "epoch": 1.9815498933946931, "grad_norm": 0.1718522608280182, "learning_rate": 2.5897837798938285e-09, "loss": 0.0044, "step": 234670 }, { "epoch": 1.981634333241857, "grad_norm": 0.16192886233329773, "learning_rate": 2.5661241301822016e-09, "loss": 0.0052, "step": 234680 }, { "epoch": 1.9817187730890207, "grad_norm": 0.30395960807800293, "learning_rate": 2.542573023709971e-09, "loss": 0.0032, "step": 234690 }, { "epoch": 1.9818032129361844, "grad_norm": 0.25371360778808594, "learning_rate": 2.519130460989505e-09, "loss": 0.0083, "step": 234700 }, { "epoch": 1.9818876527833484, "grad_norm": 0.4231247305870056, "learning_rate": 2.4957964425287307e-09, "loss": 0.0036, "step": 234710 }, { "epoch": 1.9819720926305124, "grad_norm": 0.1422703117132187, "learning_rate": 2.472570968835575e-09, "loss": 0.0059, "step": 234720 }, { "epoch": 1.9820565324776762, "grad_norm": 0.1379384696483612, "learning_rate": 2.4494540404140786e-09, "loss": 0.0028, "step": 234730 }, { "epoch": 1.98214097232484, "grad_norm": 0.19020213186740875, "learning_rate": 2.4264456577660632e-09, "loss": 0.0053, "step": 234740 }, { "epoch": 1.982225412172004, "grad_norm": 0.08326614648103714, "learning_rate": 2.4035458213922393e-09, "loss": 0.0038, "step": 234750 }, { "epoch": 1.982309852019168, "grad_norm": 0.6915023922920227, "learning_rate": 2.380754531788321e-09, "loss": 0.0032, "step": 234760 }, { "epoch": 1.9823942918663318, "grad_norm": 0.10656752437353134, "learning_rate": 2.358071789451133e-09, "loss": 0.0036, "step": 234770 }, { "epoch": 1.9824787317134955, "grad_norm": 0.21189896762371063, "learning_rate": 2.3354975948719493e-09, "loss": 0.0029, "step": 234780 }, { "epoch": 1.9825631715606593, "grad_norm": 0.22992320358753204, "learning_rate": 2.3130319485420437e-09, "loss": 0.0053, "step": 234790 }, { "epoch": 1.9826476114078233, "grad_norm": 0.1467999517917633, "learning_rate": 2.290674850948804e-09, "loss": 0.0058, "step": 234800 }, { "epoch": 1.9827320512549873, "grad_norm": 0.3363277316093445, "learning_rate": 2.268426302577953e-09, "loss": 0.0042, "step": 234810 }, { "epoch": 1.982816491102151, "grad_norm": 0.3213275969028473, "learning_rate": 2.246286303912992e-09, "loss": 0.0054, "step": 234820 }, { "epoch": 1.9829009309493149, "grad_norm": 0.36603060364723206, "learning_rate": 2.2242548554340935e-09, "loss": 0.0058, "step": 234830 }, { "epoch": 1.9829853707964789, "grad_norm": 0.0933137908577919, "learning_rate": 2.2023319576208733e-09, "loss": 0.0037, "step": 234840 }, { "epoch": 1.9830698106436428, "grad_norm": 0.2426687330007553, "learning_rate": 2.180517610948507e-09, "loss": 0.0057, "step": 234850 }, { "epoch": 1.9831542504908066, "grad_norm": 0.38430070877075195, "learning_rate": 2.15881181589106e-09, "loss": 0.0084, "step": 234860 }, { "epoch": 1.9832386903379704, "grad_norm": 0.12034989148378372, "learning_rate": 2.137214572919821e-09, "loss": 0.0043, "step": 234870 }, { "epoch": 1.9833231301851344, "grad_norm": 0.08536813408136368, "learning_rate": 2.115725882504416e-09, "loss": 0.004, "step": 234880 }, { "epoch": 1.9834075700322984, "grad_norm": 0.07386838644742966, "learning_rate": 2.094345745111692e-09, "loss": 0.0086, "step": 234890 }, { "epoch": 1.9834920098794622, "grad_norm": 0.4040534198284149, "learning_rate": 2.0730741612051687e-09, "loss": 0.0035, "step": 234900 }, { "epoch": 1.983576449726626, "grad_norm": 0.232939213514328, "learning_rate": 2.0519111312472528e-09, "loss": 0.0077, "step": 234910 }, { "epoch": 1.9836608895737897, "grad_norm": 0.5415394306182861, "learning_rate": 2.0308566556975774e-09, "loss": 0.0075, "step": 234920 }, { "epoch": 1.9837453294209537, "grad_norm": 0.61712646484375, "learning_rate": 2.009910735013554e-09, "loss": 0.0086, "step": 234930 }, { "epoch": 1.9838297692681177, "grad_norm": 0.32154837250709534, "learning_rate": 1.989073369650374e-09, "loss": 0.0067, "step": 234940 }, { "epoch": 1.9839142091152815, "grad_norm": 0.15265068411827087, "learning_rate": 1.9683445600604536e-09, "loss": 0.0061, "step": 234950 }, { "epoch": 1.9839986489624453, "grad_norm": 0.35133641958236694, "learning_rate": 1.9477243066934327e-09, "loss": 0.0051, "step": 234960 }, { "epoch": 1.9840830888096093, "grad_norm": 0.31417620182037354, "learning_rate": 1.9272126099978417e-09, "loss": 0.0064, "step": 234970 }, { "epoch": 1.9841675286567733, "grad_norm": 0.2508220076560974, "learning_rate": 1.90680947041888e-09, "loss": 0.0036, "step": 234980 }, { "epoch": 1.984251968503937, "grad_norm": 0.22506707906723022, "learning_rate": 1.8865148884000817e-09, "loss": 0.0121, "step": 234990 }, { "epoch": 1.9843364083511008, "grad_norm": 0.40669938921928406, "learning_rate": 1.86632886438165e-09, "loss": 0.0032, "step": 235000 }, { "epoch": 1.9844208481982648, "grad_norm": 0.09459399431943893, "learning_rate": 1.8462513988026787e-09, "loss": 0.0044, "step": 235010 }, { "epoch": 1.9845052880454286, "grad_norm": 0.38781648874282837, "learning_rate": 1.8262824920983746e-09, "loss": 0.0088, "step": 235020 }, { "epoch": 1.9845897278925926, "grad_norm": 0.07353712618350983, "learning_rate": 1.8064221447033902e-09, "loss": 0.0043, "step": 235030 }, { "epoch": 1.9846741677397564, "grad_norm": 0.5090818405151367, "learning_rate": 1.7866703570484921e-09, "loss": 0.0068, "step": 235040 }, { "epoch": 1.9847586075869201, "grad_norm": 0.12600357830524445, "learning_rate": 1.7670271295633368e-09, "loss": 0.0044, "step": 235050 }, { "epoch": 1.9848430474340841, "grad_norm": 0.23083136975765228, "learning_rate": 1.7474924626736944e-09, "loss": 0.0067, "step": 235060 }, { "epoch": 1.9849274872812481, "grad_norm": 0.020422594621777534, "learning_rate": 1.7280663568042255e-09, "loss": 0.0041, "step": 235070 }, { "epoch": 1.985011927128412, "grad_norm": 0.10130546987056732, "learning_rate": 1.7087488123773698e-09, "loss": 0.005, "step": 235080 }, { "epoch": 1.9850963669755757, "grad_norm": 0.13774777948856354, "learning_rate": 1.6895398298116816e-09, "loss": 0.0023, "step": 235090 }, { "epoch": 1.9851808068227397, "grad_norm": 0.19942915439605713, "learning_rate": 1.6704394095251598e-09, "loss": 0.009, "step": 235100 }, { "epoch": 1.9852652466699037, "grad_norm": 0.3734475374221802, "learning_rate": 1.6514475519324723e-09, "loss": 0.0055, "step": 235110 }, { "epoch": 1.9853496865170674, "grad_norm": 0.44890788197517395, "learning_rate": 1.6325642574460676e-09, "loss": 0.006, "step": 235120 }, { "epoch": 1.9854341263642312, "grad_norm": 0.3997938334941864, "learning_rate": 1.6137895264761728e-09, "loss": 0.0023, "step": 235130 }, { "epoch": 1.985518566211395, "grad_norm": 0.02485020086169243, "learning_rate": 1.5951233594302395e-09, "loss": 0.0056, "step": 235140 }, { "epoch": 1.985603006058559, "grad_norm": 0.6411536931991577, "learning_rate": 1.5765657567146098e-09, "loss": 0.0108, "step": 235150 }, { "epoch": 1.985687445905723, "grad_norm": 0.49865642189979553, "learning_rate": 1.558116718731184e-09, "loss": 0.0079, "step": 235160 }, { "epoch": 1.9857718857528868, "grad_norm": 0.04839629307389259, "learning_rate": 1.5397762458813082e-09, "loss": 0.003, "step": 235170 }, { "epoch": 1.9858563256000505, "grad_norm": 0.04620115086436272, "learning_rate": 1.5215443385629968e-09, "loss": 0.0044, "step": 235180 }, { "epoch": 1.9859407654472145, "grad_norm": 0.12216522544622421, "learning_rate": 1.5034209971726e-09, "loss": 0.0089, "step": 235190 }, { "epoch": 1.9860252052943785, "grad_norm": 0.22726033627986908, "learning_rate": 1.4854062221036914e-09, "loss": 0.0032, "step": 235200 }, { "epoch": 1.9861096451415423, "grad_norm": 0.14926275610923767, "learning_rate": 1.4675000137470696e-09, "loss": 0.0053, "step": 235210 }, { "epoch": 1.986194084988706, "grad_norm": 0.1752198487520218, "learning_rate": 1.449702372492423e-09, "loss": 0.0084, "step": 235220 }, { "epoch": 1.98627852483587, "grad_norm": 0.003544656792655587, "learning_rate": 1.4320132987261093e-09, "loss": 0.0094, "step": 235230 }, { "epoch": 1.986362964683034, "grad_norm": 0.4771324396133423, "learning_rate": 1.4144327928317104e-09, "loss": 0.0092, "step": 235240 }, { "epoch": 1.9864474045301979, "grad_norm": 0.14609356224536896, "learning_rate": 1.396960855191698e-09, "loss": 0.0058, "step": 235250 }, { "epoch": 1.9865318443773616, "grad_norm": 0.23356294631958008, "learning_rate": 1.3795974861857685e-09, "loss": 0.006, "step": 235260 }, { "epoch": 1.9866162842245254, "grad_norm": 0.698250412940979, "learning_rate": 1.3623426861902878e-09, "loss": 0.0057, "step": 235270 }, { "epoch": 1.9867007240716894, "grad_norm": 0.2735303044319153, "learning_rate": 1.345196455580511e-09, "loss": 0.0048, "step": 235280 }, { "epoch": 1.9867851639188534, "grad_norm": 0.3963792324066162, "learning_rate": 1.3281587947283626e-09, "loss": 0.004, "step": 235290 }, { "epoch": 1.9868696037660172, "grad_norm": 0.3193782866001129, "learning_rate": 1.3112297040046574e-09, "loss": 0.0037, "step": 235300 }, { "epoch": 1.986954043613181, "grad_norm": 0.2921280562877655, "learning_rate": 1.2944091837768791e-09, "loss": 0.0067, "step": 235310 }, { "epoch": 1.987038483460345, "grad_norm": 0.21815747022628784, "learning_rate": 1.2776972344102912e-09, "loss": 0.0061, "step": 235320 }, { "epoch": 1.987122923307509, "grad_norm": 0.08460067212581635, "learning_rate": 1.2610938562673813e-09, "loss": 0.0074, "step": 235330 }, { "epoch": 1.9872073631546727, "grad_norm": 0.12876304984092712, "learning_rate": 1.2445990497089721e-09, "loss": 0.0036, "step": 235340 }, { "epoch": 1.9872918030018365, "grad_norm": 1.8708076477050781, "learning_rate": 1.2282128150942207e-09, "loss": 0.0118, "step": 235350 }, { "epoch": 1.9873762428490005, "grad_norm": 0.4970230758190155, "learning_rate": 1.2119351527778433e-09, "loss": 0.0079, "step": 235360 }, { "epoch": 1.9874606826961643, "grad_norm": 0.2656949460506439, "learning_rate": 1.1957660631140012e-09, "loss": 0.0038, "step": 235370 }, { "epoch": 1.9875451225433283, "grad_norm": 0.08342950791120529, "learning_rate": 1.1797055464535246e-09, "loss": 0.0037, "step": 235380 }, { "epoch": 1.987629562390492, "grad_norm": 0.059173863381147385, "learning_rate": 1.1637536031461338e-09, "loss": 0.0049, "step": 235390 }, { "epoch": 1.9877140022376558, "grad_norm": 0.5153182148933411, "learning_rate": 1.1479102335371085e-09, "loss": 0.0068, "step": 235400 }, { "epoch": 1.9877984420848198, "grad_norm": 0.12633003294467926, "learning_rate": 1.1321754379711725e-09, "loss": 0.007, "step": 235410 }, { "epoch": 1.9878828819319838, "grad_norm": 0.357338011264801, "learning_rate": 1.1165492167902748e-09, "loss": 0.0072, "step": 235420 }, { "epoch": 1.9879673217791476, "grad_norm": 0.010353390127420425, "learning_rate": 1.1010315703330333e-09, "loss": 0.0062, "step": 235430 }, { "epoch": 1.9880517616263114, "grad_norm": 0.2892155945301056, "learning_rate": 1.0856224989375109e-09, "loss": 0.0043, "step": 235440 }, { "epoch": 1.9881362014734754, "grad_norm": 0.4230026304721832, "learning_rate": 1.0703220029378847e-09, "loss": 0.0099, "step": 235450 }, { "epoch": 1.9882206413206394, "grad_norm": 0.11125912517309189, "learning_rate": 1.0551300826661115e-09, "loss": 0.0043, "step": 235460 }, { "epoch": 1.9883050811678031, "grad_norm": 0.17151376605033875, "learning_rate": 1.0400467384530377e-09, "loss": 0.0046, "step": 235470 }, { "epoch": 1.988389521014967, "grad_norm": 0.24165388941764832, "learning_rate": 1.025071970625624e-09, "loss": 0.0075, "step": 235480 }, { "epoch": 1.9884739608621307, "grad_norm": 0.05258028954267502, "learning_rate": 1.0102057795086107e-09, "loss": 0.0031, "step": 235490 }, { "epoch": 1.9885584007092947, "grad_norm": 0.053296227008104324, "learning_rate": 9.954481654261828e-10, "loss": 0.0059, "step": 235500 }, { "epoch": 1.9886428405564587, "grad_norm": 0.14367258548736572, "learning_rate": 9.807991286980845e-10, "loss": 0.003, "step": 235510 }, { "epoch": 1.9887272804036225, "grad_norm": 0.14453253149986267, "learning_rate": 9.6625866964295e-10, "loss": 0.0058, "step": 235520 }, { "epoch": 1.9888117202507862, "grad_norm": 0.07827950268983841, "learning_rate": 9.518267885760824e-10, "loss": 0.0064, "step": 235530 }, { "epoch": 1.9888961600979502, "grad_norm": 0.0012691236333921552, "learning_rate": 9.3750348581112e-10, "loss": 0.0061, "step": 235540 }, { "epoch": 1.9889805999451142, "grad_norm": 0.39608636498451233, "learning_rate": 9.232887616589248e-10, "loss": 0.0051, "step": 235550 }, { "epoch": 1.989065039792278, "grad_norm": 0.16510318219661713, "learning_rate": 9.091826164286943e-10, "loss": 0.0049, "step": 235560 }, { "epoch": 1.9891494796394418, "grad_norm": 0.27392175793647766, "learning_rate": 8.951850504262949e-10, "loss": 0.0068, "step": 235570 }, { "epoch": 1.9892339194866058, "grad_norm": 0.2520779073238373, "learning_rate": 8.812960639564827e-10, "loss": 0.005, "step": 235580 }, { "epoch": 1.9893183593337698, "grad_norm": 0.30087098479270935, "learning_rate": 8.675156573201282e-10, "loss": 0.0055, "step": 235590 }, { "epoch": 1.9894027991809335, "grad_norm": 0.3233768045902252, "learning_rate": 8.538438308175468e-10, "loss": 0.0092, "step": 235600 }, { "epoch": 1.9894872390280973, "grad_norm": 0.29561102390289307, "learning_rate": 8.402805847446127e-10, "loss": 0.0067, "step": 235610 }, { "epoch": 1.989571678875261, "grad_norm": 0.4864557385444641, "learning_rate": 8.268259193960904e-10, "loss": 0.007, "step": 235620 }, { "epoch": 1.989656118722425, "grad_norm": 0.45904049277305603, "learning_rate": 8.134798350650786e-10, "loss": 0.0096, "step": 235630 }, { "epoch": 1.989740558569589, "grad_norm": 0.43982994556427, "learning_rate": 8.002423320402353e-10, "loss": 0.0086, "step": 235640 }, { "epoch": 1.9898249984167529, "grad_norm": 0.12433885782957077, "learning_rate": 7.871134106102186e-10, "loss": 0.0038, "step": 235650 }, { "epoch": 1.9899094382639166, "grad_norm": 0.42036473751068115, "learning_rate": 7.740930710592454e-10, "loss": 0.0048, "step": 235660 }, { "epoch": 1.9899938781110806, "grad_norm": 0.136670783162117, "learning_rate": 7.611813136709778e-10, "loss": 0.0058, "step": 235670 }, { "epoch": 1.9900783179582446, "grad_norm": 0.04170548543334007, "learning_rate": 7.483781387251921e-10, "loss": 0.0036, "step": 235680 }, { "epoch": 1.9901627578054084, "grad_norm": 0.19283811748027802, "learning_rate": 7.356835464999989e-10, "loss": 0.0082, "step": 235690 }, { "epoch": 1.9902471976525722, "grad_norm": 0.17111830413341522, "learning_rate": 7.230975372718441e-10, "loss": 0.0103, "step": 235700 }, { "epoch": 1.990331637499736, "grad_norm": 0.3956637680530548, "learning_rate": 7.106201113132871e-10, "loss": 0.0047, "step": 235710 }, { "epoch": 1.9904160773469, "grad_norm": 0.1537417322397232, "learning_rate": 6.982512688952226e-10, "loss": 0.0088, "step": 235720 }, { "epoch": 1.990500517194064, "grad_norm": 0.12282709032297134, "learning_rate": 6.859910102868794e-10, "loss": 0.0065, "step": 235730 }, { "epoch": 1.9905849570412277, "grad_norm": 0.43138203024864197, "learning_rate": 6.738393357547113e-10, "loss": 0.0067, "step": 235740 }, { "epoch": 1.9906693968883915, "grad_norm": 0.23593424260616302, "learning_rate": 6.61796245561841e-10, "loss": 0.0159, "step": 235750 }, { "epoch": 1.9907538367355555, "grad_norm": 0.3871917426586151, "learning_rate": 6.498617399708363e-10, "loss": 0.0091, "step": 235760 }, { "epoch": 1.9908382765827195, "grad_norm": 0.4005966782569885, "learning_rate": 6.380358192398239e-10, "loss": 0.0042, "step": 235770 }, { "epoch": 1.9909227164298833, "grad_norm": 0.07509897649288177, "learning_rate": 6.263184836263758e-10, "loss": 0.0054, "step": 235780 }, { "epoch": 1.991007156277047, "grad_norm": 0.23701204359531403, "learning_rate": 6.14709733385288e-10, "loss": 0.0084, "step": 235790 }, { "epoch": 1.991091596124211, "grad_norm": 0.2421122044324875, "learning_rate": 6.032095687674711e-10, "loss": 0.0101, "step": 235800 }, { "epoch": 1.991176035971375, "grad_norm": 0.0031015989370644093, "learning_rate": 5.918179900238352e-10, "loss": 0.0085, "step": 235810 }, { "epoch": 1.9912604758185388, "grad_norm": 0.1971542090177536, "learning_rate": 5.805349974014052e-10, "loss": 0.0083, "step": 235820 }, { "epoch": 1.9913449156657026, "grad_norm": 0.02870827540755272, "learning_rate": 5.693605911455402e-10, "loss": 0.0044, "step": 235830 }, { "epoch": 1.9914293555128664, "grad_norm": 0.8669508099555969, "learning_rate": 5.582947714982689e-10, "loss": 0.0048, "step": 235840 }, { "epoch": 1.9915137953600304, "grad_norm": 0.2853430509567261, "learning_rate": 5.473375387005098e-10, "loss": 0.0031, "step": 235850 }, { "epoch": 1.9915982352071944, "grad_norm": 0.37568792700767517, "learning_rate": 5.364888929904056e-10, "loss": 0.0053, "step": 235860 }, { "epoch": 1.9916826750543581, "grad_norm": 0.3557461202144623, "learning_rate": 5.257488346027684e-10, "loss": 0.0046, "step": 235870 }, { "epoch": 1.991767114901522, "grad_norm": 0.36117833852767944, "learning_rate": 5.151173637718554e-10, "loss": 0.005, "step": 235880 }, { "epoch": 1.991851554748686, "grad_norm": 0.06485728174448013, "learning_rate": 5.045944807274827e-10, "loss": 0.0035, "step": 235890 }, { "epoch": 1.99193599459585, "grad_norm": 0.10185305774211884, "learning_rate": 4.941801856994666e-10, "loss": 0.0035, "step": 235900 }, { "epoch": 1.9920204344430137, "grad_norm": 0.2130269557237625, "learning_rate": 4.83874478912627e-10, "loss": 0.0045, "step": 235910 }, { "epoch": 1.9921048742901775, "grad_norm": 0.08165845274925232, "learning_rate": 4.736773605923395e-10, "loss": 0.0038, "step": 235920 }, { "epoch": 1.9921893141373415, "grad_norm": 0.3574807643890381, "learning_rate": 4.635888309589831e-10, "loss": 0.0067, "step": 235930 }, { "epoch": 1.9922737539845052, "grad_norm": 0.24420402944087982, "learning_rate": 4.5360889023182695e-10, "loss": 0.0028, "step": 235940 }, { "epoch": 1.9923581938316692, "grad_norm": 0.5072728395462036, "learning_rate": 4.437375386279197e-10, "loss": 0.0062, "step": 235950 }, { "epoch": 1.992442633678833, "grad_norm": 0.19829119741916656, "learning_rate": 4.339747763615343e-10, "loss": 0.0056, "step": 235960 }, { "epoch": 1.9925270735259968, "grad_norm": 0.39060819149017334, "learning_rate": 4.2432060364416825e-10, "loss": 0.0072, "step": 235970 }, { "epoch": 1.9926115133731608, "grad_norm": 0.11956916004419327, "learning_rate": 4.14775020686764e-10, "loss": 0.0071, "step": 235980 }, { "epoch": 1.9926959532203248, "grad_norm": 0.005635562352836132, "learning_rate": 4.05338027695823e-10, "loss": 0.0061, "step": 235990 }, { "epoch": 1.9927803930674886, "grad_norm": 0.09190724045038223, "learning_rate": 3.9600962487618134e-10, "loss": 0.0038, "step": 236000 }, { "epoch": 1.9928648329146523, "grad_norm": 0.46661463379859924, "learning_rate": 3.867898124310099e-10, "loss": 0.0088, "step": 236010 }, { "epoch": 1.9929492727618163, "grad_norm": 0.24129046499729156, "learning_rate": 3.7767859055959364e-10, "loss": 0.005, "step": 236020 }, { "epoch": 1.9930337126089803, "grad_norm": 0.15315717458724976, "learning_rate": 3.686759594612177e-10, "loss": 0.0074, "step": 236030 }, { "epoch": 1.993118152456144, "grad_norm": 0.20242954790592194, "learning_rate": 3.5978191933017105e-10, "loss": 0.0099, "step": 236040 }, { "epoch": 1.9932025923033079, "grad_norm": 0.19342343509197235, "learning_rate": 3.509964703607427e-10, "loss": 0.0057, "step": 236050 }, { "epoch": 1.9932870321504716, "grad_norm": 0.010290603153407574, "learning_rate": 3.4231961274222567e-10, "loss": 0.0105, "step": 236060 }, { "epoch": 1.9933714719976356, "grad_norm": 0.46781307458877563, "learning_rate": 3.337513466650233e-10, "loss": 0.0074, "step": 236070 }, { "epoch": 1.9934559118447996, "grad_norm": 0.2736954689025879, "learning_rate": 3.252916723134325e-10, "loss": 0.0093, "step": 236080 }, { "epoch": 1.9935403516919634, "grad_norm": 0.4955291748046875, "learning_rate": 3.169405898723055e-10, "loss": 0.0082, "step": 236090 }, { "epoch": 1.9936247915391272, "grad_norm": 0.35224077105522156, "learning_rate": 3.086980995226085e-10, "loss": 0.0073, "step": 236100 }, { "epoch": 1.9937092313862912, "grad_norm": 0.20278573036193848, "learning_rate": 3.0056420144364274e-10, "loss": 0.0054, "step": 236110 }, { "epoch": 1.9937936712334552, "grad_norm": 0.4728107452392578, "learning_rate": 2.925388958119335e-10, "loss": 0.0034, "step": 236120 }, { "epoch": 1.993878111080619, "grad_norm": 0.03740473836660385, "learning_rate": 2.846221828012308e-10, "loss": 0.0048, "step": 236130 }, { "epoch": 1.9939625509277827, "grad_norm": 0.09214048832654953, "learning_rate": 2.768140625841742e-10, "loss": 0.0058, "step": 236140 }, { "epoch": 1.9940469907749467, "grad_norm": 0.22083379328250885, "learning_rate": 2.691145353306279e-10, "loss": 0.0041, "step": 236150 }, { "epoch": 1.9941314306221107, "grad_norm": 0.3201301097869873, "learning_rate": 2.615236012071254e-10, "loss": 0.0084, "step": 236160 }, { "epoch": 1.9942158704692745, "grad_norm": 0.47323283553123474, "learning_rate": 2.540412603785347e-10, "loss": 0.0022, "step": 236170 }, { "epoch": 1.9943003103164383, "grad_norm": 0.08931645005941391, "learning_rate": 2.466675130080587e-10, "loss": 0.0054, "step": 236180 }, { "epoch": 1.994384750163602, "grad_norm": 0.12664946913719177, "learning_rate": 2.394023592550143e-10, "loss": 0.0033, "step": 236190 }, { "epoch": 1.994469190010766, "grad_norm": 0.14545446634292603, "learning_rate": 2.3224579927760836e-10, "loss": 0.0065, "step": 236200 }, { "epoch": 1.99455362985793, "grad_norm": 0.171514630317688, "learning_rate": 2.251978332312721e-10, "loss": 0.0044, "step": 236210 }, { "epoch": 1.9946380697050938, "grad_norm": 0.07984656095504761, "learning_rate": 2.1825846126921623e-10, "loss": 0.0128, "step": 236220 }, { "epoch": 1.9947225095522576, "grad_norm": 0.24139486253261566, "learning_rate": 2.1142768354243116e-10, "loss": 0.0081, "step": 236230 }, { "epoch": 1.9948069493994216, "grad_norm": 0.29359757900238037, "learning_rate": 2.047055001985765e-10, "loss": 0.0057, "step": 236240 }, { "epoch": 1.9948913892465856, "grad_norm": 0.4036403000354767, "learning_rate": 1.9809191138364658e-10, "loss": 0.0089, "step": 236250 }, { "epoch": 1.9949758290937494, "grad_norm": 0.7377258539199829, "learning_rate": 1.9158691724197042e-10, "loss": 0.0059, "step": 236260 }, { "epoch": 1.9950602689409132, "grad_norm": 0.14075836539268494, "learning_rate": 1.8519051791454633e-10, "loss": 0.0079, "step": 236270 }, { "epoch": 1.9951447087880771, "grad_norm": 0.4691273272037506, "learning_rate": 1.789027135395971e-10, "loss": 0.003, "step": 236280 }, { "epoch": 1.995229148635241, "grad_norm": 0.31431323289871216, "learning_rate": 1.7272350425479035e-10, "loss": 0.0087, "step": 236290 }, { "epoch": 1.995313588482405, "grad_norm": 0.33355197310447693, "learning_rate": 1.6665289019390795e-10, "loss": 0.0115, "step": 236300 }, { "epoch": 1.9953980283295687, "grad_norm": 0.16460761427879333, "learning_rate": 1.6069087148851137e-10, "loss": 0.0052, "step": 236310 }, { "epoch": 1.9954824681767325, "grad_norm": 0.06548289954662323, "learning_rate": 1.5483744826849668e-10, "loss": 0.0089, "step": 236320 }, { "epoch": 1.9955669080238965, "grad_norm": 0.36108046770095825, "learning_rate": 1.490926206609844e-10, "loss": 0.0097, "step": 236330 }, { "epoch": 1.9956513478710605, "grad_norm": 0.20600545406341553, "learning_rate": 1.4345638879087464e-10, "loss": 0.0049, "step": 236340 }, { "epoch": 1.9957357877182242, "grad_norm": 0.2232927829027176, "learning_rate": 1.3792875277973683e-10, "loss": 0.0085, "step": 236350 }, { "epoch": 1.995820227565388, "grad_norm": 0.2231009602546692, "learning_rate": 1.3250971274858525e-10, "loss": 0.0041, "step": 236360 }, { "epoch": 1.995904667412552, "grad_norm": 0.008728435263037682, "learning_rate": 1.2719926881454848e-10, "loss": 0.0034, "step": 236370 }, { "epoch": 1.995989107259716, "grad_norm": 0.03604479134082794, "learning_rate": 1.2199742109308966e-10, "loss": 0.0077, "step": 236380 }, { "epoch": 1.9960735471068798, "grad_norm": 0.08342063426971436, "learning_rate": 1.1690416969745155e-10, "loss": 0.0048, "step": 236390 }, { "epoch": 1.9961579869540436, "grad_norm": 0.2320503145456314, "learning_rate": 1.1191951473810137e-10, "loss": 0.0082, "step": 236400 }, { "epoch": 1.9962424268012073, "grad_norm": 0.49301043152809143, "learning_rate": 1.0704345632328583e-10, "loss": 0.0055, "step": 236410 }, { "epoch": 1.9963268666483713, "grad_norm": 0.1427074521780014, "learning_rate": 1.0227599455847614e-10, "loss": 0.0075, "step": 236420 }, { "epoch": 1.9964113064955353, "grad_norm": 0.23704443871974945, "learning_rate": 9.761712954803326e-11, "loss": 0.0093, "step": 236430 }, { "epoch": 1.996495746342699, "grad_norm": 0.08593946695327759, "learning_rate": 9.306686139298749e-11, "loss": 0.0056, "step": 236440 }, { "epoch": 1.9965801861898629, "grad_norm": 0.0012261155061423779, "learning_rate": 8.862519019159355e-11, "loss": 0.0033, "step": 236450 }, { "epoch": 1.9966646260370269, "grad_norm": 0.38939857482910156, "learning_rate": 8.429211604099597e-11, "loss": 0.0065, "step": 236460 }, { "epoch": 1.9967490658841909, "grad_norm": 0.33946338295936584, "learning_rate": 8.006763903445347e-11, "loss": 0.0062, "step": 236470 }, { "epoch": 1.9968335057313547, "grad_norm": 0.09644557535648346, "learning_rate": 7.595175926466969e-11, "loss": 0.0055, "step": 236480 }, { "epoch": 1.9969179455785184, "grad_norm": 0.40846773982048035, "learning_rate": 7.194447682046246e-11, "loss": 0.0124, "step": 236490 }, { "epoch": 1.9970023854256824, "grad_norm": 0.7336077094078064, "learning_rate": 6.80457917895394e-11, "loss": 0.0102, "step": 236500 }, { "epoch": 1.9970868252728464, "grad_norm": 0.2976542115211487, "learning_rate": 6.425570425572236e-11, "loss": 0.0051, "step": 236510 }, { "epoch": 1.9971712651200102, "grad_norm": 0.1341790109872818, "learning_rate": 6.057421430172295e-11, "loss": 0.0086, "step": 236520 }, { "epoch": 1.997255704967174, "grad_norm": 0.38734355568885803, "learning_rate": 5.70013220069221e-11, "loss": 0.0071, "step": 236530 }, { "epoch": 1.9973401448143377, "grad_norm": 0.1782805621623993, "learning_rate": 5.353702745014566e-11, "loss": 0.0062, "step": 236540 }, { "epoch": 1.9974245846615017, "grad_norm": 0.2812137007713318, "learning_rate": 5.018133070577858e-11, "loss": 0.008, "step": 236550 }, { "epoch": 1.9975090245086657, "grad_norm": 0.40670207142829895, "learning_rate": 4.693423184709556e-11, "loss": 0.0078, "step": 236560 }, { "epoch": 1.9975934643558295, "grad_norm": 0.08966337144374847, "learning_rate": 4.379573094404066e-11, "loss": 0.0049, "step": 236570 }, { "epoch": 1.9976779042029933, "grad_norm": 0.39062660932540894, "learning_rate": 4.0765828065447713e-11, "loss": 0.0065, "step": 236580 }, { "epoch": 1.9977623440501573, "grad_norm": 0.0973239615559578, "learning_rate": 3.7844523276264756e-11, "loss": 0.0028, "step": 236590 }, { "epoch": 1.9978467838973213, "grad_norm": 0.10427127778530121, "learning_rate": 3.503181664088473e-11, "loss": 0.0069, "step": 236600 }, { "epoch": 1.997931223744485, "grad_norm": 0.2112521231174469, "learning_rate": 3.23277082203699e-11, "loss": 0.0064, "step": 236610 }, { "epoch": 1.9980156635916488, "grad_norm": 0.19656828045845032, "learning_rate": 2.973219807245187e-11, "loss": 0.0061, "step": 236620 }, { "epoch": 1.9981001034388126, "grad_norm": 0.2138976901769638, "learning_rate": 2.7245286254862223e-11, "loss": 0.0071, "step": 236630 }, { "epoch": 1.9981845432859766, "grad_norm": 0.3593355119228363, "learning_rate": 2.4866972820336566e-11, "loss": 0.0061, "step": 236640 }, { "epoch": 1.9982689831331406, "grad_norm": 0.43258029222488403, "learning_rate": 2.2597257821610486e-11, "loss": 0.005, "step": 236650 }, { "epoch": 1.9983534229803044, "grad_norm": 0.03804236277937889, "learning_rate": 2.0436141306978685e-11, "loss": 0.0068, "step": 236660 }, { "epoch": 1.9984378628274682, "grad_norm": 0.29247885942459106, "learning_rate": 1.8383623324180754e-11, "loss": 0.0032, "step": 236670 }, { "epoch": 1.9985223026746322, "grad_norm": 0.0501822754740715, "learning_rate": 1.6439703917625617e-11, "loss": 0.0045, "step": 236680 }, { "epoch": 1.9986067425217962, "grad_norm": 0.30726802349090576, "learning_rate": 1.460438312894663e-11, "loss": 0.0057, "step": 236690 }, { "epoch": 1.99869118236896, "grad_norm": 0.09306943416595459, "learning_rate": 1.287766099866694e-11, "loss": 0.0058, "step": 236700 }, { "epoch": 1.9987756222161237, "grad_norm": 0.010029187425971031, "learning_rate": 1.1259537563979018e-11, "loss": 0.0053, "step": 236710 }, { "epoch": 1.9988600620632877, "grad_norm": 0.14235156774520874, "learning_rate": 9.750012860409997e-12, "loss": 0.0027, "step": 236720 }, { "epoch": 1.9989445019104517, "grad_norm": 0.027954531833529472, "learning_rate": 8.349086920711457e-12, "loss": 0.005, "step": 236730 }, { "epoch": 1.9990289417576155, "grad_norm": 0.3513014018535614, "learning_rate": 7.056759774304312e-12, "loss": 0.0063, "step": 236740 }, { "epoch": 1.9991133816047792, "grad_norm": 0.460551917552948, "learning_rate": 5.8730314506094674e-12, "loss": 0.0073, "step": 236750 }, { "epoch": 1.999197821451943, "grad_norm": 0.4349982440471649, "learning_rate": 4.797901974606944e-12, "loss": 0.0039, "step": 236760 }, { "epoch": 1.999282261299107, "grad_norm": 0.24565884470939636, "learning_rate": 3.831371369611425e-12, "loss": 0.0081, "step": 236770 }, { "epoch": 1.999366701146271, "grad_norm": 0.47670474648475647, "learning_rate": 2.973439657272259e-12, "loss": 0.0038, "step": 236780 }, { "epoch": 1.9994511409934348, "grad_norm": 0.510762631893158, "learning_rate": 2.2241068553530142e-12, "loss": 0.0057, "step": 236790 }, { "epoch": 1.9995355808405986, "grad_norm": 0.3290398418903351, "learning_rate": 1.5833729805070364e-12, "loss": 0.0095, "step": 236800 }, { "epoch": 1.9996200206877626, "grad_norm": 0.2319296896457672, "learning_rate": 1.0512380471672246e-12, "loss": 0.0093, "step": 236810 }, { "epoch": 1.9997044605349266, "grad_norm": 0.5944666862487793, "learning_rate": 6.277020658806976e-13, "loss": 0.0033, "step": 236820 }, { "epoch": 1.9997889003820903, "grad_norm": 0.5525945425033569, "learning_rate": 3.127650466394627e-13, "loss": 0.0087, "step": 236830 }, { "epoch": 1.9998733402292541, "grad_norm": 0.09104105085134506, "learning_rate": 1.0642699554974656e-13, "loss": 0.0053, "step": 236840 }, { "epoch": 1.9999577800764181, "grad_norm": 0.015089857392013073, "learning_rate": 8.687918162664232e-15, "loss": 0.0069, "step": 236850 }, { "epoch": 1.9999915560152837, "step": 236854, "total_flos": 3.430800695853318e+18, "train_loss": 0.01405828405903957, "train_runtime": 96268.9774, "train_samples_per_second": 9.841, "train_steps_per_second": 2.46 } ], "logging_steps": 10, "max_steps": 236854, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.430800695853318e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }