{ "best_metric": 0.9997238005050505, "best_model_checkpoint": "swinv2-ocr-finetuned-panjabi\\checkpoint-29415", "epoch": 20.0, "eval_steps": 10, "global_step": 117660, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0016998130205677375, "grad_norm": 14.915071487426758, "learning_rate": 2.5497195308516064e-08, "loss": 3.2073, "step": 10 }, { "epoch": 0.003399626041135475, "grad_norm": 11.534662246704102, "learning_rate": 5.099439061703213e-08, "loss": 3.2761, "step": 20 }, { "epoch": 0.005099439061703213, "grad_norm": 17.62863540649414, "learning_rate": 7.649158592554819e-08, "loss": 3.3619, "step": 30 }, { "epoch": 0.00679925208227095, "grad_norm": 6.881149768829346, "learning_rate": 1.0198878123406425e-07, "loss": 3.2398, "step": 40 }, { "epoch": 0.008499065102838687, "grad_norm": 10.151201248168945, "learning_rate": 1.2748597654258032e-07, "loss": 3.3957, "step": 50 }, { "epoch": 0.010198878123406425, "grad_norm": 13.510781288146973, "learning_rate": 1.5298317185109638e-07, "loss": 3.209, "step": 60 }, { "epoch": 0.011898691143974164, "grad_norm": 13.116254806518555, "learning_rate": 1.7848036715961246e-07, "loss": 3.3002, "step": 70 }, { "epoch": 0.0135985041645419, "grad_norm": 10.267038345336914, "learning_rate": 2.039775624681285e-07, "loss": 3.3104, "step": 80 }, { "epoch": 0.015298317185109638, "grad_norm": 26.43967056274414, "learning_rate": 2.294747577766446e-07, "loss": 3.2833, "step": 90 }, { "epoch": 0.016998130205677375, "grad_norm": 7.353091716766357, "learning_rate": 2.5497195308516064e-07, "loss": 3.3962, "step": 100 }, { "epoch": 0.018697943226245115, "grad_norm": 14.186665534973145, "learning_rate": 2.804691483936767e-07, "loss": 3.301, "step": 110 }, { "epoch": 0.02039775624681285, "grad_norm": 11.719480514526367, "learning_rate": 3.0596634370219275e-07, "loss": 3.4909, "step": 120 }, { "epoch": 0.022097569267380587, "grad_norm": 13.704547882080078, "learning_rate": 3.3146353901070883e-07, "loss": 3.2751, "step": 130 }, { "epoch": 0.023797382287948327, "grad_norm": 11.318249702453613, "learning_rate": 3.569607343192249e-07, "loss": 3.3027, "step": 140 }, { "epoch": 0.025497195308516064, "grad_norm": 12.137194633483887, "learning_rate": 3.82457929627741e-07, "loss": 3.2706, "step": 150 }, { "epoch": 0.0271970083290838, "grad_norm": 10.884977340698242, "learning_rate": 4.07955124936257e-07, "loss": 3.4032, "step": 160 }, { "epoch": 0.02889682134965154, "grad_norm": 8.090470314025879, "learning_rate": 4.334523202447731e-07, "loss": 3.3324, "step": 170 }, { "epoch": 0.030596634370219276, "grad_norm": 13.42221736907959, "learning_rate": 4.589495155532892e-07, "loss": 3.1758, "step": 180 }, { "epoch": 0.03229644739078701, "grad_norm": 12.101027488708496, "learning_rate": 4.844467108618052e-07, "loss": 3.0916, "step": 190 }, { "epoch": 0.03399626041135475, "grad_norm": 11.101884841918945, "learning_rate": 5.099439061703213e-07, "loss": 3.2831, "step": 200 }, { "epoch": 0.035696073431922486, "grad_norm": 8.022069931030273, "learning_rate": 5.354411014788373e-07, "loss": 3.2731, "step": 210 }, { "epoch": 0.03739588645249023, "grad_norm": 10.096372604370117, "learning_rate": 5.609382967873534e-07, "loss": 3.4517, "step": 220 }, { "epoch": 0.039095699473057965, "grad_norm": 10.021903038024902, "learning_rate": 5.864354920958695e-07, "loss": 3.3513, "step": 230 }, { "epoch": 0.0407955124936257, "grad_norm": 8.615796089172363, "learning_rate": 6.119326874043855e-07, "loss": 3.4101, "step": 240 }, { "epoch": 0.04249532551419344, "grad_norm": 11.486947059631348, "learning_rate": 6.374298827129016e-07, "loss": 3.3873, "step": 250 }, { "epoch": 0.044195138534761175, "grad_norm": 9.730019569396973, "learning_rate": 6.629270780214177e-07, "loss": 3.1953, "step": 260 }, { "epoch": 0.04589495155532891, "grad_norm": 9.98071575164795, "learning_rate": 6.884242733299336e-07, "loss": 3.2449, "step": 270 }, { "epoch": 0.047594764575896654, "grad_norm": 10.990867614746094, "learning_rate": 7.139214686384498e-07, "loss": 3.1882, "step": 280 }, { "epoch": 0.04929457759646439, "grad_norm": 8.79128360748291, "learning_rate": 7.394186639469659e-07, "loss": 3.1309, "step": 290 }, { "epoch": 0.05099439061703213, "grad_norm": 13.359721183776855, "learning_rate": 7.64915859255482e-07, "loss": 3.1185, "step": 300 }, { "epoch": 0.052694203637599864, "grad_norm": 9.531183242797852, "learning_rate": 7.90413054563998e-07, "loss": 3.3587, "step": 310 }, { "epoch": 0.0543940166581676, "grad_norm": 14.003602981567383, "learning_rate": 8.15910249872514e-07, "loss": 3.2106, "step": 320 }, { "epoch": 0.05609382967873534, "grad_norm": 12.434489250183105, "learning_rate": 8.414074451810301e-07, "loss": 3.198, "step": 330 }, { "epoch": 0.05779364269930308, "grad_norm": 9.772897720336914, "learning_rate": 8.669046404895462e-07, "loss": 3.1344, "step": 340 }, { "epoch": 0.059493455719870816, "grad_norm": 12.630123138427734, "learning_rate": 8.924018357980623e-07, "loss": 3.4188, "step": 350 }, { "epoch": 0.06119326874043855, "grad_norm": 10.130664825439453, "learning_rate": 9.178990311065784e-07, "loss": 3.0111, "step": 360 }, { "epoch": 0.06289308176100629, "grad_norm": 13.064087867736816, "learning_rate": 9.433962264150943e-07, "loss": 3.2658, "step": 370 }, { "epoch": 0.06459289478157403, "grad_norm": 10.627570152282715, "learning_rate": 9.688934217236104e-07, "loss": 3.2649, "step": 380 }, { "epoch": 0.06629270780214176, "grad_norm": 10.745613098144531, "learning_rate": 9.943906170321264e-07, "loss": 3.3159, "step": 390 }, { "epoch": 0.0679925208227095, "grad_norm": 11.623552322387695, "learning_rate": 1.0198878123406426e-06, "loss": 2.9984, "step": 400 }, { "epoch": 0.06969233384327723, "grad_norm": 8.520482063293457, "learning_rate": 1.0453850076491585e-06, "loss": 3.3301, "step": 410 }, { "epoch": 0.07139214686384497, "grad_norm": 9.857006072998047, "learning_rate": 1.0708822029576745e-06, "loss": 3.3559, "step": 420 }, { "epoch": 0.07309195988441271, "grad_norm": 10.37116813659668, "learning_rate": 1.0963793982661907e-06, "loss": 3.5024, "step": 430 }, { "epoch": 0.07479177290498046, "grad_norm": 10.329543113708496, "learning_rate": 1.1218765935747069e-06, "loss": 3.274, "step": 440 }, { "epoch": 0.0764915859255482, "grad_norm": 11.133870124816895, "learning_rate": 1.1473737888832229e-06, "loss": 3.2365, "step": 450 }, { "epoch": 0.07819139894611593, "grad_norm": 11.507918357849121, "learning_rate": 1.172870984191739e-06, "loss": 3.3305, "step": 460 }, { "epoch": 0.07989121196668367, "grad_norm": 8.08620548248291, "learning_rate": 1.198368179500255e-06, "loss": 3.2419, "step": 470 }, { "epoch": 0.0815910249872514, "grad_norm": 14.286142349243164, "learning_rate": 1.223865374808771e-06, "loss": 3.1029, "step": 480 }, { "epoch": 0.08329083800781914, "grad_norm": 12.439252853393555, "learning_rate": 1.2493625701172872e-06, "loss": 3.3563, "step": 490 }, { "epoch": 0.08499065102838688, "grad_norm": 11.488417625427246, "learning_rate": 1.2748597654258032e-06, "loss": 3.1235, "step": 500 }, { "epoch": 0.08669046404895461, "grad_norm": 11.142370223999023, "learning_rate": 1.3003569607343191e-06, "loss": 3.4087, "step": 510 }, { "epoch": 0.08839027706952235, "grad_norm": 16.54132652282715, "learning_rate": 1.3258541560428353e-06, "loss": 3.224, "step": 520 }, { "epoch": 0.09009009009009009, "grad_norm": 13.064096450805664, "learning_rate": 1.3513513513513513e-06, "loss": 3.2434, "step": 530 }, { "epoch": 0.09178990311065782, "grad_norm": 9.510428428649902, "learning_rate": 1.3768485466598673e-06, "loss": 3.3582, "step": 540 }, { "epoch": 0.09348971613122556, "grad_norm": 8.75434398651123, "learning_rate": 1.4023457419683835e-06, "loss": 3.2665, "step": 550 }, { "epoch": 0.09518952915179331, "grad_norm": 11.14885425567627, "learning_rate": 1.4278429372768996e-06, "loss": 3.1277, "step": 560 }, { "epoch": 0.09688934217236105, "grad_norm": 22.08490562438965, "learning_rate": 1.4533401325854156e-06, "loss": 3.1017, "step": 570 }, { "epoch": 0.09858915519292878, "grad_norm": 25.119800567626953, "learning_rate": 1.4788373278939318e-06, "loss": 3.3466, "step": 580 }, { "epoch": 0.10028896821349652, "grad_norm": 13.632193565368652, "learning_rate": 1.5043345232024478e-06, "loss": 3.1143, "step": 590 }, { "epoch": 0.10198878123406425, "grad_norm": 8.770873069763184, "learning_rate": 1.529831718510964e-06, "loss": 3.1223, "step": 600 }, { "epoch": 0.10368859425463199, "grad_norm": 9.25463581085205, "learning_rate": 1.55532891381948e-06, "loss": 3.2065, "step": 610 }, { "epoch": 0.10538840727519973, "grad_norm": 12.359750747680664, "learning_rate": 1.580826109127996e-06, "loss": 3.2826, "step": 620 }, { "epoch": 0.10708822029576746, "grad_norm": 17.98019790649414, "learning_rate": 1.606323304436512e-06, "loss": 3.1529, "step": 630 }, { "epoch": 0.1087880333163352, "grad_norm": 7.104980945587158, "learning_rate": 1.631820499745028e-06, "loss": 3.3423, "step": 640 }, { "epoch": 0.11048784633690294, "grad_norm": 9.299174308776855, "learning_rate": 1.657317695053544e-06, "loss": 3.0865, "step": 650 }, { "epoch": 0.11218765935747067, "grad_norm": 10.180256843566895, "learning_rate": 1.6828148903620602e-06, "loss": 3.2424, "step": 660 }, { "epoch": 0.11388747237803841, "grad_norm": 13.52150821685791, "learning_rate": 1.7083120856705762e-06, "loss": 3.1841, "step": 670 }, { "epoch": 0.11558728539860616, "grad_norm": 8.526803016662598, "learning_rate": 1.7338092809790924e-06, "loss": 3.4302, "step": 680 }, { "epoch": 0.1172870984191739, "grad_norm": 17.6016845703125, "learning_rate": 1.7593064762876086e-06, "loss": 3.1399, "step": 690 }, { "epoch": 0.11898691143974163, "grad_norm": 8.9983549118042, "learning_rate": 1.7848036715961246e-06, "loss": 3.1282, "step": 700 }, { "epoch": 0.12068672446030937, "grad_norm": 14.257224082946777, "learning_rate": 1.8103008669046405e-06, "loss": 3.2749, "step": 710 }, { "epoch": 0.1223865374808771, "grad_norm": 9.958919525146484, "learning_rate": 1.8357980622131567e-06, "loss": 3.2504, "step": 720 }, { "epoch": 0.12408635050144484, "grad_norm": 12.964635848999023, "learning_rate": 1.8612952575216727e-06, "loss": 2.8601, "step": 730 }, { "epoch": 0.12578616352201258, "grad_norm": 11.294242858886719, "learning_rate": 1.8867924528301887e-06, "loss": 3.2207, "step": 740 }, { "epoch": 0.12748597654258031, "grad_norm": 10.708282470703125, "learning_rate": 1.9122896481387046e-06, "loss": 3.2052, "step": 750 }, { "epoch": 0.12918578956314805, "grad_norm": 12.49475383758545, "learning_rate": 1.937786843447221e-06, "loss": 3.4246, "step": 760 }, { "epoch": 0.1308856025837158, "grad_norm": 10.642792701721191, "learning_rate": 1.963284038755737e-06, "loss": 3.2642, "step": 770 }, { "epoch": 0.13258541560428352, "grad_norm": 9.440777778625488, "learning_rate": 1.9887812340642528e-06, "loss": 3.3023, "step": 780 }, { "epoch": 0.13428522862485126, "grad_norm": 21.428163528442383, "learning_rate": 2.014278429372769e-06, "loss": 3.3881, "step": 790 }, { "epoch": 0.135985041645419, "grad_norm": 12.733051300048828, "learning_rate": 2.039775624681285e-06, "loss": 3.2025, "step": 800 }, { "epoch": 0.13768485466598673, "grad_norm": 11.321364402770996, "learning_rate": 2.065272819989801e-06, "loss": 3.4019, "step": 810 }, { "epoch": 0.13938466768655447, "grad_norm": 11.918639183044434, "learning_rate": 2.090770015298317e-06, "loss": 3.421, "step": 820 }, { "epoch": 0.1410844807071222, "grad_norm": 13.5828275680542, "learning_rate": 2.1162672106068333e-06, "loss": 3.2242, "step": 830 }, { "epoch": 0.14278429372768994, "grad_norm": 14.851539611816406, "learning_rate": 2.141764405915349e-06, "loss": 3.1187, "step": 840 }, { "epoch": 0.14448410674825768, "grad_norm": 8.637921333312988, "learning_rate": 2.1672616012238652e-06, "loss": 3.2781, "step": 850 }, { "epoch": 0.14618391976882542, "grad_norm": 25.528392791748047, "learning_rate": 2.1927587965323814e-06, "loss": 3.2304, "step": 860 }, { "epoch": 0.14788373278939318, "grad_norm": 9.753904342651367, "learning_rate": 2.2182559918408976e-06, "loss": 3.2209, "step": 870 }, { "epoch": 0.14958354580996092, "grad_norm": 28.079679489135742, "learning_rate": 2.2437531871494138e-06, "loss": 3.0704, "step": 880 }, { "epoch": 0.15128335883052865, "grad_norm": 12.453920364379883, "learning_rate": 2.26925038245793e-06, "loss": 3.0608, "step": 890 }, { "epoch": 0.1529831718510964, "grad_norm": 11.230239868164062, "learning_rate": 2.2947475777664457e-06, "loss": 3.2757, "step": 900 }, { "epoch": 0.15468298487166413, "grad_norm": 7.69272518157959, "learning_rate": 2.320244773074962e-06, "loss": 3.2047, "step": 910 }, { "epoch": 0.15638279789223186, "grad_norm": 10.109095573425293, "learning_rate": 2.345741968383478e-06, "loss": 3.1277, "step": 920 }, { "epoch": 0.1580826109127996, "grad_norm": 16.64354705810547, "learning_rate": 2.371239163691994e-06, "loss": 3.1653, "step": 930 }, { "epoch": 0.15978242393336733, "grad_norm": 13.184935569763184, "learning_rate": 2.39673635900051e-06, "loss": 3.3547, "step": 940 }, { "epoch": 0.16148223695393507, "grad_norm": 10.887249946594238, "learning_rate": 2.4222335543090262e-06, "loss": 2.9074, "step": 950 }, { "epoch": 0.1631820499745028, "grad_norm": 18.531204223632812, "learning_rate": 2.447730749617542e-06, "loss": 3.2234, "step": 960 }, { "epoch": 0.16488186299507054, "grad_norm": 13.720499038696289, "learning_rate": 2.473227944926058e-06, "loss": 3.2783, "step": 970 }, { "epoch": 0.16658167601563828, "grad_norm": 8.763530731201172, "learning_rate": 2.4987251402345744e-06, "loss": 3.1683, "step": 980 }, { "epoch": 0.16828148903620602, "grad_norm": 12.344958305358887, "learning_rate": 2.52422233554309e-06, "loss": 3.3184, "step": 990 }, { "epoch": 0.16998130205677375, "grad_norm": 16.092954635620117, "learning_rate": 2.5497195308516063e-06, "loss": 3.1023, "step": 1000 }, { "epoch": 0.1716811150773415, "grad_norm": 8.49113941192627, "learning_rate": 2.5752167261601225e-06, "loss": 3.1352, "step": 1010 }, { "epoch": 0.17338092809790923, "grad_norm": 7.488245010375977, "learning_rate": 2.6007139214686383e-06, "loss": 3.0845, "step": 1020 }, { "epoch": 0.17508074111847696, "grad_norm": 12.73012924194336, "learning_rate": 2.6262111167771545e-06, "loss": 3.126, "step": 1030 }, { "epoch": 0.1767805541390447, "grad_norm": 11.719165802001953, "learning_rate": 2.6517083120856706e-06, "loss": 3.0355, "step": 1040 }, { "epoch": 0.17848036715961244, "grad_norm": 9.878467559814453, "learning_rate": 2.6772055073941864e-06, "loss": 3.3438, "step": 1050 }, { "epoch": 0.18018018018018017, "grad_norm": 14.523748397827148, "learning_rate": 2.7027027027027026e-06, "loss": 3.1635, "step": 1060 }, { "epoch": 0.1818799932007479, "grad_norm": 14.986756324768066, "learning_rate": 2.7281998980112188e-06, "loss": 3.2314, "step": 1070 }, { "epoch": 0.18357980622131564, "grad_norm": 13.241119384765625, "learning_rate": 2.7536970933197345e-06, "loss": 3.2113, "step": 1080 }, { "epoch": 0.18527961924188338, "grad_norm": 13.413939476013184, "learning_rate": 2.7791942886282507e-06, "loss": 3.472, "step": 1090 }, { "epoch": 0.18697943226245112, "grad_norm": 9.439146041870117, "learning_rate": 2.804691483936767e-06, "loss": 3.2224, "step": 1100 }, { "epoch": 0.18867924528301888, "grad_norm": 10.511975288391113, "learning_rate": 2.830188679245283e-06, "loss": 3.0341, "step": 1110 }, { "epoch": 0.19037905830358662, "grad_norm": 15.891010284423828, "learning_rate": 2.8556858745537993e-06, "loss": 3.1767, "step": 1120 }, { "epoch": 0.19207887132415435, "grad_norm": 9.685541152954102, "learning_rate": 2.8811830698623155e-06, "loss": 3.0911, "step": 1130 }, { "epoch": 0.1937786843447221, "grad_norm": 7.956643104553223, "learning_rate": 2.9066802651708312e-06, "loss": 3.2289, "step": 1140 }, { "epoch": 0.19547849736528983, "grad_norm": 14.644304275512695, "learning_rate": 2.9321774604793474e-06, "loss": 3.321, "step": 1150 }, { "epoch": 0.19717831038585756, "grad_norm": 12.538710594177246, "learning_rate": 2.9576746557878636e-06, "loss": 3.2752, "step": 1160 }, { "epoch": 0.1988781234064253, "grad_norm": 11.577922821044922, "learning_rate": 2.9831718510963794e-06, "loss": 3.2047, "step": 1170 }, { "epoch": 0.20057793642699304, "grad_norm": 11.967416763305664, "learning_rate": 3.0086690464048956e-06, "loss": 3.1154, "step": 1180 }, { "epoch": 0.20227774944756077, "grad_norm": 8.246138572692871, "learning_rate": 3.0341662417134117e-06, "loss": 3.2342, "step": 1190 }, { "epoch": 0.2039775624681285, "grad_norm": 9.948267936706543, "learning_rate": 3.059663437021928e-06, "loss": 3.4468, "step": 1200 }, { "epoch": 0.20567737548869625, "grad_norm": 8.385937690734863, "learning_rate": 3.0851606323304437e-06, "loss": 3.1835, "step": 1210 }, { "epoch": 0.20737718850926398, "grad_norm": 10.103710174560547, "learning_rate": 3.11065782763896e-06, "loss": 3.2911, "step": 1220 }, { "epoch": 0.20907700152983172, "grad_norm": 7.359684944152832, "learning_rate": 3.136155022947476e-06, "loss": 3.1453, "step": 1230 }, { "epoch": 0.21077681455039946, "grad_norm": 9.228778839111328, "learning_rate": 3.161652218255992e-06, "loss": 3.0301, "step": 1240 }, { "epoch": 0.2124766275709672, "grad_norm": 11.706355094909668, "learning_rate": 3.187149413564508e-06, "loss": 3.1787, "step": 1250 }, { "epoch": 0.21417644059153493, "grad_norm": 11.361674308776855, "learning_rate": 3.212646608873024e-06, "loss": 3.1996, "step": 1260 }, { "epoch": 0.21587625361210266, "grad_norm": 8.49975872039795, "learning_rate": 3.23814380418154e-06, "loss": 3.3104, "step": 1270 }, { "epoch": 0.2175760666326704, "grad_norm": 11.83768367767334, "learning_rate": 3.263640999490056e-06, "loss": 3.1404, "step": 1280 }, { "epoch": 0.21927587965323814, "grad_norm": 12.27997875213623, "learning_rate": 3.2891381947985723e-06, "loss": 2.9499, "step": 1290 }, { "epoch": 0.22097569267380587, "grad_norm": 9.085856437683105, "learning_rate": 3.314635390107088e-06, "loss": 3.1976, "step": 1300 }, { "epoch": 0.2226755056943736, "grad_norm": 10.74309253692627, "learning_rate": 3.3401325854156043e-06, "loss": 3.1824, "step": 1310 }, { "epoch": 0.22437531871494135, "grad_norm": 11.62602424621582, "learning_rate": 3.3656297807241205e-06, "loss": 3.1679, "step": 1320 }, { "epoch": 0.22607513173550908, "grad_norm": 11.831320762634277, "learning_rate": 3.3911269760326362e-06, "loss": 3.2512, "step": 1330 }, { "epoch": 0.22777494475607682, "grad_norm": 7.565231800079346, "learning_rate": 3.4166241713411524e-06, "loss": 3.2692, "step": 1340 }, { "epoch": 0.22947475777664456, "grad_norm": 13.02651309967041, "learning_rate": 3.4421213666496686e-06, "loss": 3.1564, "step": 1350 }, { "epoch": 0.23117457079721232, "grad_norm": 9.061662673950195, "learning_rate": 3.4676185619581848e-06, "loss": 3.1986, "step": 1360 }, { "epoch": 0.23287438381778006, "grad_norm": 8.962031364440918, "learning_rate": 3.493115757266701e-06, "loss": 3.2047, "step": 1370 }, { "epoch": 0.2345741968383478, "grad_norm": 10.72256088256836, "learning_rate": 3.518612952575217e-06, "loss": 3.1681, "step": 1380 }, { "epoch": 0.23627400985891553, "grad_norm": 11.396880149841309, "learning_rate": 3.544110147883733e-06, "loss": 3.0406, "step": 1390 }, { "epoch": 0.23797382287948327, "grad_norm": 10.540839195251465, "learning_rate": 3.569607343192249e-06, "loss": 3.1295, "step": 1400 }, { "epoch": 0.239673635900051, "grad_norm": 9.576120376586914, "learning_rate": 3.5951045385007653e-06, "loss": 3.1131, "step": 1410 }, { "epoch": 0.24137344892061874, "grad_norm": 10.901866912841797, "learning_rate": 3.620601733809281e-06, "loss": 3.0524, "step": 1420 }, { "epoch": 0.24307326194118647, "grad_norm": 12.480045318603516, "learning_rate": 3.6460989291177972e-06, "loss": 3.2472, "step": 1430 }, { "epoch": 0.2447730749617542, "grad_norm": 10.335319519042969, "learning_rate": 3.6715961244263134e-06, "loss": 3.2974, "step": 1440 }, { "epoch": 0.24647288798232195, "grad_norm": 11.153907775878906, "learning_rate": 3.697093319734829e-06, "loss": 3.1716, "step": 1450 }, { "epoch": 0.24817270100288968, "grad_norm": 10.779867172241211, "learning_rate": 3.7225905150433454e-06, "loss": 3.1685, "step": 1460 }, { "epoch": 0.24987251402345742, "grad_norm": 13.553423881530762, "learning_rate": 3.7480877103518616e-06, "loss": 2.9175, "step": 1470 }, { "epoch": 0.25157232704402516, "grad_norm": 9.05892562866211, "learning_rate": 3.7735849056603773e-06, "loss": 3.4155, "step": 1480 }, { "epoch": 0.2532721400645929, "grad_norm": 11.980334281921387, "learning_rate": 3.799082100968894e-06, "loss": 3.1745, "step": 1490 }, { "epoch": 0.25497195308516063, "grad_norm": 15.703144073486328, "learning_rate": 3.824579296277409e-06, "loss": 3.1077, "step": 1500 }, { "epoch": 0.2566717661057284, "grad_norm": 13.702265739440918, "learning_rate": 3.850076491585926e-06, "loss": 3.026, "step": 1510 }, { "epoch": 0.2583715791262961, "grad_norm": 12.1428804397583, "learning_rate": 3.875573686894442e-06, "loss": 3.1852, "step": 1520 }, { "epoch": 0.26007139214686387, "grad_norm": 14.151073455810547, "learning_rate": 3.901070882202958e-06, "loss": 3.2175, "step": 1530 }, { "epoch": 0.2617712051674316, "grad_norm": 9.452439308166504, "learning_rate": 3.926568077511474e-06, "loss": 3.2952, "step": 1540 }, { "epoch": 0.26347101818799934, "grad_norm": 15.585714340209961, "learning_rate": 3.95206527281999e-06, "loss": 3.2123, "step": 1550 }, { "epoch": 0.26517083120856705, "grad_norm": 9.52536392211914, "learning_rate": 3.9775624681285055e-06, "loss": 3.1399, "step": 1560 }, { "epoch": 0.2668706442291348, "grad_norm": 8.8570556640625, "learning_rate": 4.003059663437022e-06, "loss": 3.1407, "step": 1570 }, { "epoch": 0.2685704572497025, "grad_norm": 11.571701049804688, "learning_rate": 4.028556858745538e-06, "loss": 3.0941, "step": 1580 }, { "epoch": 0.2702702702702703, "grad_norm": 13.441617012023926, "learning_rate": 4.0540540540540545e-06, "loss": 3.107, "step": 1590 }, { "epoch": 0.271970083290838, "grad_norm": 9.909313201904297, "learning_rate": 4.07955124936257e-06, "loss": 3.2634, "step": 1600 }, { "epoch": 0.27366989631140576, "grad_norm": 13.226483345031738, "learning_rate": 4.105048444671087e-06, "loss": 3.2523, "step": 1610 }, { "epoch": 0.27536970933197347, "grad_norm": 9.15570068359375, "learning_rate": 4.130545639979602e-06, "loss": 3.3208, "step": 1620 }, { "epoch": 0.27706952235254123, "grad_norm": 11.243284225463867, "learning_rate": 4.156042835288118e-06, "loss": 3.0926, "step": 1630 }, { "epoch": 0.27876933537310894, "grad_norm": 9.681652069091797, "learning_rate": 4.181540030596634e-06, "loss": 3.1163, "step": 1640 }, { "epoch": 0.2804691483936767, "grad_norm": 8.100641250610352, "learning_rate": 4.207037225905151e-06, "loss": 3.1641, "step": 1650 }, { "epoch": 0.2821689614142444, "grad_norm": 9.985051155090332, "learning_rate": 4.2325344212136666e-06, "loss": 3.0764, "step": 1660 }, { "epoch": 0.2838687744348122, "grad_norm": 7.276730060577393, "learning_rate": 4.258031616522183e-06, "loss": 3.3922, "step": 1670 }, { "epoch": 0.2855685874553799, "grad_norm": 13.23525619506836, "learning_rate": 4.283528811830698e-06, "loss": 3.0177, "step": 1680 }, { "epoch": 0.28726840047594765, "grad_norm": 8.984657287597656, "learning_rate": 4.309026007139215e-06, "loss": 3.1293, "step": 1690 }, { "epoch": 0.28896821349651536, "grad_norm": 9.691112518310547, "learning_rate": 4.3345232024477305e-06, "loss": 3.3984, "step": 1700 }, { "epoch": 0.2906680265170831, "grad_norm": 9.698630332946777, "learning_rate": 4.360020397756247e-06, "loss": 3.1557, "step": 1710 }, { "epoch": 0.29236783953765083, "grad_norm": 11.153619766235352, "learning_rate": 4.385517593064763e-06, "loss": 3.1161, "step": 1720 }, { "epoch": 0.2940676525582186, "grad_norm": 7.501280784606934, "learning_rate": 4.4110147883732794e-06, "loss": 3.2235, "step": 1730 }, { "epoch": 0.29576746557878636, "grad_norm": 6.853991985321045, "learning_rate": 4.436511983681795e-06, "loss": 3.119, "step": 1740 }, { "epoch": 0.29746727859935407, "grad_norm": 12.935437202453613, "learning_rate": 4.462009178990311e-06, "loss": 3.2582, "step": 1750 }, { "epoch": 0.29916709161992183, "grad_norm": 10.198761940002441, "learning_rate": 4.4875063742988276e-06, "loss": 2.9456, "step": 1760 }, { "epoch": 0.30086690464048954, "grad_norm": 16.29355812072754, "learning_rate": 4.513003569607343e-06, "loss": 3.0392, "step": 1770 }, { "epoch": 0.3025667176610573, "grad_norm": 13.934629440307617, "learning_rate": 4.53850076491586e-06, "loss": 3.193, "step": 1780 }, { "epoch": 0.304266530681625, "grad_norm": 8.973444938659668, "learning_rate": 4.563997960224376e-06, "loss": 3.0013, "step": 1790 }, { "epoch": 0.3059663437021928, "grad_norm": 15.074542999267578, "learning_rate": 4.5894951555328915e-06, "loss": 3.1104, "step": 1800 }, { "epoch": 0.3076661567227605, "grad_norm": 13.276994705200195, "learning_rate": 4.614992350841407e-06, "loss": 3.1439, "step": 1810 }, { "epoch": 0.30936596974332825, "grad_norm": 10.17678165435791, "learning_rate": 4.640489546149924e-06, "loss": 3.088, "step": 1820 }, { "epoch": 0.31106578276389596, "grad_norm": 8.808093070983887, "learning_rate": 4.66598674145844e-06, "loss": 3.3149, "step": 1830 }, { "epoch": 0.3127655957844637, "grad_norm": 9.850824356079102, "learning_rate": 4.691483936766956e-06, "loss": 3.1583, "step": 1840 }, { "epoch": 0.31446540880503143, "grad_norm": 23.916513442993164, "learning_rate": 4.716981132075472e-06, "loss": 3.0697, "step": 1850 }, { "epoch": 0.3161652218255992, "grad_norm": 11.234800338745117, "learning_rate": 4.742478327383988e-06, "loss": 3.1875, "step": 1860 }, { "epoch": 0.3178650348461669, "grad_norm": 11.487380981445312, "learning_rate": 4.7679755226925035e-06, "loss": 3.1782, "step": 1870 }, { "epoch": 0.31956484786673467, "grad_norm": 23.833322525024414, "learning_rate": 4.79347271800102e-06, "loss": 3.2475, "step": 1880 }, { "epoch": 0.3212646608873024, "grad_norm": 9.148923873901367, "learning_rate": 4.818969913309536e-06, "loss": 3.2547, "step": 1890 }, { "epoch": 0.32296447390787014, "grad_norm": 17.639650344848633, "learning_rate": 4.8444671086180525e-06, "loss": 3.2521, "step": 1900 }, { "epoch": 0.32466428692843785, "grad_norm": 10.998615264892578, "learning_rate": 4.869964303926568e-06, "loss": 2.9841, "step": 1910 }, { "epoch": 0.3263640999490056, "grad_norm": 11.560646057128906, "learning_rate": 4.895461499235084e-06, "loss": 3.0048, "step": 1920 }, { "epoch": 0.3280639129695733, "grad_norm": 9.833661079406738, "learning_rate": 4.9209586945436e-06, "loss": 2.7689, "step": 1930 }, { "epoch": 0.3297637259901411, "grad_norm": 17.815710067749023, "learning_rate": 4.946455889852116e-06, "loss": 3.1412, "step": 1940 }, { "epoch": 0.3314635390107088, "grad_norm": 9.539080619812012, "learning_rate": 4.971953085160632e-06, "loss": 3.1318, "step": 1950 }, { "epoch": 0.33316335203127656, "grad_norm": 13.04981803894043, "learning_rate": 4.997450280469149e-06, "loss": 3.2737, "step": 1960 }, { "epoch": 0.3348631650518443, "grad_norm": 16.304323196411133, "learning_rate": 5.022947475777665e-06, "loss": 2.9316, "step": 1970 }, { "epoch": 0.33656297807241203, "grad_norm": 19.650745391845703, "learning_rate": 5.04844467108618e-06, "loss": 3.1547, "step": 1980 }, { "epoch": 0.3382627910929798, "grad_norm": 13.360518455505371, "learning_rate": 5.073941866394697e-06, "loss": 3.2172, "step": 1990 }, { "epoch": 0.3399626041135475, "grad_norm": 10.050530433654785, "learning_rate": 5.099439061703213e-06, "loss": 3.1043, "step": 2000 }, { "epoch": 0.34166241713411527, "grad_norm": 7.381603240966797, "learning_rate": 5.124936257011729e-06, "loss": 3.3219, "step": 2010 }, { "epoch": 0.343362230154683, "grad_norm": 10.292802810668945, "learning_rate": 5.150433452320245e-06, "loss": 3.2046, "step": 2020 }, { "epoch": 0.34506204317525074, "grad_norm": 18.185476303100586, "learning_rate": 5.175930647628762e-06, "loss": 3.2285, "step": 2030 }, { "epoch": 0.34676185619581845, "grad_norm": 15.757147789001465, "learning_rate": 5.2014278429372765e-06, "loss": 3.0057, "step": 2040 }, { "epoch": 0.3484616692163862, "grad_norm": 6.3245015144348145, "learning_rate": 5.226925038245793e-06, "loss": 3.205, "step": 2050 }, { "epoch": 0.3501614822369539, "grad_norm": 14.33997631072998, "learning_rate": 5.252422233554309e-06, "loss": 3.1699, "step": 2060 }, { "epoch": 0.3518612952575217, "grad_norm": 8.820186614990234, "learning_rate": 5.2779194288628255e-06, "loss": 3.3086, "step": 2070 }, { "epoch": 0.3535611082780894, "grad_norm": 24.069997787475586, "learning_rate": 5.303416624171341e-06, "loss": 3.2027, "step": 2080 }, { "epoch": 0.35526092129865716, "grad_norm": 12.365748405456543, "learning_rate": 5.328913819479858e-06, "loss": 3.1522, "step": 2090 }, { "epoch": 0.35696073431922487, "grad_norm": 10.033935546875, "learning_rate": 5.354411014788373e-06, "loss": 3.1822, "step": 2100 }, { "epoch": 0.35866054733979263, "grad_norm": 7.267968654632568, "learning_rate": 5.379908210096889e-06, "loss": 2.8413, "step": 2110 }, { "epoch": 0.36036036036036034, "grad_norm": 14.972100257873535, "learning_rate": 5.405405405405405e-06, "loss": 3.0308, "step": 2120 }, { "epoch": 0.3620601733809281, "grad_norm": 10.989387512207031, "learning_rate": 5.430902600713922e-06, "loss": 2.9712, "step": 2130 }, { "epoch": 0.3637599864014958, "grad_norm": 11.23474407196045, "learning_rate": 5.4563997960224376e-06, "loss": 3.1032, "step": 2140 }, { "epoch": 0.3654597994220636, "grad_norm": 16.640745162963867, "learning_rate": 5.481896991330954e-06, "loss": 3.1478, "step": 2150 }, { "epoch": 0.3671596124426313, "grad_norm": 9.49834156036377, "learning_rate": 5.507394186639469e-06, "loss": 3.1672, "step": 2160 }, { "epoch": 0.36885942546319905, "grad_norm": 12.081939697265625, "learning_rate": 5.532891381947986e-06, "loss": 3.0514, "step": 2170 }, { "epoch": 0.37055923848376676, "grad_norm": 9.975428581237793, "learning_rate": 5.5583885772565015e-06, "loss": 3.1269, "step": 2180 }, { "epoch": 0.3722590515043345, "grad_norm": 26.993165969848633, "learning_rate": 5.583885772565018e-06, "loss": 3.2937, "step": 2190 }, { "epoch": 0.37395886452490223, "grad_norm": 8.222159385681152, "learning_rate": 5.609382967873534e-06, "loss": 3.1882, "step": 2200 }, { "epoch": 0.37565867754547, "grad_norm": 9.108057975769043, "learning_rate": 5.6348801631820504e-06, "loss": 3.0375, "step": 2210 }, { "epoch": 0.37735849056603776, "grad_norm": 12.72333812713623, "learning_rate": 5.660377358490566e-06, "loss": 3.0959, "step": 2220 }, { "epoch": 0.37905830358660547, "grad_norm": 8.580050468444824, "learning_rate": 5.685874553799082e-06, "loss": 3.1478, "step": 2230 }, { "epoch": 0.38075811660717324, "grad_norm": 13.88869857788086, "learning_rate": 5.7113717491075986e-06, "loss": 3.0149, "step": 2240 }, { "epoch": 0.38245792962774094, "grad_norm": 9.968484878540039, "learning_rate": 5.736868944416114e-06, "loss": 3.2894, "step": 2250 }, { "epoch": 0.3841577426483087, "grad_norm": 7.811567783355713, "learning_rate": 5.762366139724631e-06, "loss": 3.1621, "step": 2260 }, { "epoch": 0.3858575556688764, "grad_norm": 7.990389823913574, "learning_rate": 5.787863335033147e-06, "loss": 2.9408, "step": 2270 }, { "epoch": 0.3875573686894442, "grad_norm": 18.173372268676758, "learning_rate": 5.8133605303416625e-06, "loss": 3.2226, "step": 2280 }, { "epoch": 0.3892571817100119, "grad_norm": 12.913167953491211, "learning_rate": 5.838857725650178e-06, "loss": 3.2275, "step": 2290 }, { "epoch": 0.39095699473057965, "grad_norm": 14.679533958435059, "learning_rate": 5.864354920958695e-06, "loss": 3.1013, "step": 2300 }, { "epoch": 0.39265680775114736, "grad_norm": 9.06629467010498, "learning_rate": 5.889852116267211e-06, "loss": 3.2526, "step": 2310 }, { "epoch": 0.3943566207717151, "grad_norm": 7.43846321105957, "learning_rate": 5.915349311575727e-06, "loss": 3.171, "step": 2320 }, { "epoch": 0.39605643379228284, "grad_norm": 23.38277244567871, "learning_rate": 5.940846506884243e-06, "loss": 2.8292, "step": 2330 }, { "epoch": 0.3977562468128506, "grad_norm": 9.565221786499023, "learning_rate": 5.966343702192759e-06, "loss": 3.0887, "step": 2340 }, { "epoch": 0.3994560598334183, "grad_norm": 10.80700969696045, "learning_rate": 5.9918408975012745e-06, "loss": 3.1331, "step": 2350 }, { "epoch": 0.4011558728539861, "grad_norm": 7.946408271789551, "learning_rate": 6.017338092809791e-06, "loss": 3.2474, "step": 2360 }, { "epoch": 0.4028556858745538, "grad_norm": 12.367879867553711, "learning_rate": 6.042835288118307e-06, "loss": 3.1924, "step": 2370 }, { "epoch": 0.40455549889512155, "grad_norm": 13.659883499145508, "learning_rate": 6.0683324834268235e-06, "loss": 3.253, "step": 2380 }, { "epoch": 0.40625531191568925, "grad_norm": 10.498866081237793, "learning_rate": 6.093829678735339e-06, "loss": 3.3014, "step": 2390 }, { "epoch": 0.407955124936257, "grad_norm": 10.060430526733398, "learning_rate": 6.119326874043856e-06, "loss": 2.9927, "step": 2400 }, { "epoch": 0.4096549379568247, "grad_norm": 13.857873916625977, "learning_rate": 6.144824069352371e-06, "loss": 2.9372, "step": 2410 }, { "epoch": 0.4113547509773925, "grad_norm": 9.68122673034668, "learning_rate": 6.170321264660887e-06, "loss": 3.2248, "step": 2420 }, { "epoch": 0.4130545639979602, "grad_norm": 10.427902221679688, "learning_rate": 6.195818459969403e-06, "loss": 3.2398, "step": 2430 }, { "epoch": 0.41475437701852796, "grad_norm": 14.366276741027832, "learning_rate": 6.22131565527792e-06, "loss": 3.1118, "step": 2440 }, { "epoch": 0.4164541900390957, "grad_norm": 14.666399002075195, "learning_rate": 6.2468128505864355e-06, "loss": 3.1993, "step": 2450 }, { "epoch": 0.41815400305966344, "grad_norm": 8.881954193115234, "learning_rate": 6.272310045894952e-06, "loss": 3.087, "step": 2460 }, { "epoch": 0.4198538160802312, "grad_norm": 12.173590660095215, "learning_rate": 6.297807241203468e-06, "loss": 3.2354, "step": 2470 }, { "epoch": 0.4215536291007989, "grad_norm": 16.154399871826172, "learning_rate": 6.323304436511984e-06, "loss": 3.2513, "step": 2480 }, { "epoch": 0.4232534421213667, "grad_norm": 14.78820514678955, "learning_rate": 6.3488016318205e-06, "loss": 3.0539, "step": 2490 }, { "epoch": 0.4249532551419344, "grad_norm": 14.075251579284668, "learning_rate": 6.374298827129016e-06, "loss": 3.2605, "step": 2500 }, { "epoch": 0.42665306816250215, "grad_norm": 8.461102485656738, "learning_rate": 6.399796022437533e-06, "loss": 3.1171, "step": 2510 }, { "epoch": 0.42835288118306986, "grad_norm": 10.986845970153809, "learning_rate": 6.425293217746048e-06, "loss": 3.0178, "step": 2520 }, { "epoch": 0.4300526942036376, "grad_norm": 16.862895965576172, "learning_rate": 6.450790413054564e-06, "loss": 3.1324, "step": 2530 }, { "epoch": 0.43175250722420533, "grad_norm": 6.757488250732422, "learning_rate": 6.47628760836308e-06, "loss": 3.0215, "step": 2540 }, { "epoch": 0.4334523202447731, "grad_norm": 11.354599952697754, "learning_rate": 6.5017848036715965e-06, "loss": 3.026, "step": 2550 }, { "epoch": 0.4351521332653408, "grad_norm": 15.148270606994629, "learning_rate": 6.527281998980112e-06, "loss": 3.1777, "step": 2560 }, { "epoch": 0.43685194628590857, "grad_norm": 10.034414291381836, "learning_rate": 6.552779194288629e-06, "loss": 3.2794, "step": 2570 }, { "epoch": 0.4385517593064763, "grad_norm": 16.344343185424805, "learning_rate": 6.578276389597145e-06, "loss": 3.1148, "step": 2580 }, { "epoch": 0.44025157232704404, "grad_norm": 10.70421314239502, "learning_rate": 6.60377358490566e-06, "loss": 3.2385, "step": 2590 }, { "epoch": 0.44195138534761175, "grad_norm": 10.967391967773438, "learning_rate": 6.629270780214176e-06, "loss": 3.1535, "step": 2600 }, { "epoch": 0.4436511983681795, "grad_norm": 10.155302047729492, "learning_rate": 6.654767975522693e-06, "loss": 3.0975, "step": 2610 }, { "epoch": 0.4453510113887472, "grad_norm": 13.833210945129395, "learning_rate": 6.6802651708312086e-06, "loss": 3.1209, "step": 2620 }, { "epoch": 0.447050824409315, "grad_norm": 9.533905029296875, "learning_rate": 6.705762366139725e-06, "loss": 3.2363, "step": 2630 }, { "epoch": 0.4487506374298827, "grad_norm": 9.623392105102539, "learning_rate": 6.731259561448241e-06, "loss": 3.2439, "step": 2640 }, { "epoch": 0.45045045045045046, "grad_norm": 21.0267391204834, "learning_rate": 6.756756756756757e-06, "loss": 3.1291, "step": 2650 }, { "epoch": 0.45215026347101817, "grad_norm": 9.454344749450684, "learning_rate": 6.7822539520652725e-06, "loss": 3.2105, "step": 2660 }, { "epoch": 0.45385007649158593, "grad_norm": 16.543880462646484, "learning_rate": 6.807751147373789e-06, "loss": 3.0838, "step": 2670 }, { "epoch": 0.45554988951215364, "grad_norm": 8.742140769958496, "learning_rate": 6.833248342682305e-06, "loss": 3.2767, "step": 2680 }, { "epoch": 0.4572497025327214, "grad_norm": 11.755403518676758, "learning_rate": 6.8587455379908214e-06, "loss": 3.1479, "step": 2690 }, { "epoch": 0.4589495155532891, "grad_norm": 9.975177764892578, "learning_rate": 6.884242733299337e-06, "loss": 3.2109, "step": 2700 }, { "epoch": 0.4606493285738569, "grad_norm": 13.988856315612793, "learning_rate": 6.909739928607853e-06, "loss": 3.0092, "step": 2710 }, { "epoch": 0.46234914159442464, "grad_norm": 11.22400951385498, "learning_rate": 6.9352371239163696e-06, "loss": 3.2899, "step": 2720 }, { "epoch": 0.46404895461499235, "grad_norm": 14.208046913146973, "learning_rate": 6.960734319224885e-06, "loss": 3.1635, "step": 2730 }, { "epoch": 0.4657487676355601, "grad_norm": 12.377664566040039, "learning_rate": 6.986231514533402e-06, "loss": 2.9894, "step": 2740 }, { "epoch": 0.4674485806561278, "grad_norm": 10.713295936584473, "learning_rate": 7.011728709841918e-06, "loss": 3.0931, "step": 2750 }, { "epoch": 0.4691483936766956, "grad_norm": 9.088719367980957, "learning_rate": 7.037225905150434e-06, "loss": 3.1208, "step": 2760 }, { "epoch": 0.4708482066972633, "grad_norm": 11.67667007446289, "learning_rate": 7.062723100458949e-06, "loss": 3.1575, "step": 2770 }, { "epoch": 0.47254801971783106, "grad_norm": 11.577333450317383, "learning_rate": 7.088220295767466e-06, "loss": 3.2597, "step": 2780 }, { "epoch": 0.47424783273839877, "grad_norm": 7.053229331970215, "learning_rate": 7.113717491075982e-06, "loss": 3.4562, "step": 2790 }, { "epoch": 0.47594764575896653, "grad_norm": 12.834189414978027, "learning_rate": 7.139214686384498e-06, "loss": 3.0428, "step": 2800 }, { "epoch": 0.47764745877953424, "grad_norm": 12.743949890136719, "learning_rate": 7.164711881693014e-06, "loss": 3.12, "step": 2810 }, { "epoch": 0.479347271800102, "grad_norm": 10.846124649047852, "learning_rate": 7.190209077001531e-06, "loss": 3.2458, "step": 2820 }, { "epoch": 0.4810470848206697, "grad_norm": 14.795022010803223, "learning_rate": 7.2157062723100455e-06, "loss": 3.1479, "step": 2830 }, { "epoch": 0.4827468978412375, "grad_norm": 10.183623313903809, "learning_rate": 7.241203467618562e-06, "loss": 3.1254, "step": 2840 }, { "epoch": 0.4844467108618052, "grad_norm": 14.705172538757324, "learning_rate": 7.266700662927078e-06, "loss": 3.0823, "step": 2850 }, { "epoch": 0.48614652388237295, "grad_norm": 9.310958862304688, "learning_rate": 7.2921978582355945e-06, "loss": 3.3862, "step": 2860 }, { "epoch": 0.48784633690294066, "grad_norm": 17.989683151245117, "learning_rate": 7.31769505354411e-06, "loss": 3.1846, "step": 2870 }, { "epoch": 0.4895461499235084, "grad_norm": 13.063238143920898, "learning_rate": 7.343192248852627e-06, "loss": 3.116, "step": 2880 }, { "epoch": 0.49124596294407613, "grad_norm": 7.9271955490112305, "learning_rate": 7.368689444161142e-06, "loss": 3.193, "step": 2890 }, { "epoch": 0.4929457759646439, "grad_norm": 12.773377418518066, "learning_rate": 7.394186639469658e-06, "loss": 3.1658, "step": 2900 }, { "epoch": 0.4946455889852116, "grad_norm": 8.996011734008789, "learning_rate": 7.419683834778174e-06, "loss": 3.3046, "step": 2910 }, { "epoch": 0.49634540200577937, "grad_norm": 8.390958786010742, "learning_rate": 7.445181030086691e-06, "loss": 3.0519, "step": 2920 }, { "epoch": 0.4980452150263471, "grad_norm": 13.802458763122559, "learning_rate": 7.4706782253952065e-06, "loss": 2.9085, "step": 2930 }, { "epoch": 0.49974502804691484, "grad_norm": 15.90228271484375, "learning_rate": 7.496175420703723e-06, "loss": 3.1797, "step": 2940 }, { "epoch": 0.5014448410674825, "grad_norm": 9.317934036254883, "learning_rate": 7.521672616012238e-06, "loss": 3.2726, "step": 2950 }, { "epoch": 0.5031446540880503, "grad_norm": 12.627945899963379, "learning_rate": 7.547169811320755e-06, "loss": 3.1845, "step": 2960 }, { "epoch": 0.5048444671086181, "grad_norm": 13.665678024291992, "learning_rate": 7.572667006629271e-06, "loss": 3.1512, "step": 2970 }, { "epoch": 0.5065442801291858, "grad_norm": 14.959603309631348, "learning_rate": 7.598164201937788e-06, "loss": 3.1418, "step": 2980 }, { "epoch": 0.5082440931497535, "grad_norm": 22.252614974975586, "learning_rate": 7.623661397246303e-06, "loss": 2.9086, "step": 2990 }, { "epoch": 0.5099439061703213, "grad_norm": 16.481027603149414, "learning_rate": 7.649158592554819e-06, "loss": 3.206, "step": 3000 }, { "epoch": 0.511643719190889, "grad_norm": 10.652236938476562, "learning_rate": 7.674655787863335e-06, "loss": 3.2759, "step": 3010 }, { "epoch": 0.5133435322114568, "grad_norm": 14.651650428771973, "learning_rate": 7.700152983171852e-06, "loss": 3.0001, "step": 3020 }, { "epoch": 0.5150433452320244, "grad_norm": 8.116512298583984, "learning_rate": 7.725650178480367e-06, "loss": 3.3032, "step": 3030 }, { "epoch": 0.5167431582525922, "grad_norm": 9.108267784118652, "learning_rate": 7.751147373788883e-06, "loss": 3.1492, "step": 3040 }, { "epoch": 0.51844297127316, "grad_norm": 17.928550720214844, "learning_rate": 7.7766445690974e-06, "loss": 2.934, "step": 3050 }, { "epoch": 0.5201427842937277, "grad_norm": 10.164219856262207, "learning_rate": 7.802141764405917e-06, "loss": 3.1297, "step": 3060 }, { "epoch": 0.5218425973142954, "grad_norm": 17.294029235839844, "learning_rate": 7.827638959714431e-06, "loss": 3.1137, "step": 3070 }, { "epoch": 0.5235424103348632, "grad_norm": 13.40277099609375, "learning_rate": 7.853136155022948e-06, "loss": 3.0432, "step": 3080 }, { "epoch": 0.5252422233554309, "grad_norm": 10.869766235351562, "learning_rate": 7.878633350331465e-06, "loss": 3.0801, "step": 3090 }, { "epoch": 0.5269420363759987, "grad_norm": 13.572504043579102, "learning_rate": 7.90413054563998e-06, "loss": 2.9288, "step": 3100 }, { "epoch": 0.5286418493965663, "grad_norm": 7.9016032218933105, "learning_rate": 7.929627740948494e-06, "loss": 3.1138, "step": 3110 }, { "epoch": 0.5303416624171341, "grad_norm": 12.291131973266602, "learning_rate": 7.955124936257011e-06, "loss": 3.2221, "step": 3120 }, { "epoch": 0.5320414754377019, "grad_norm": 17.350740432739258, "learning_rate": 7.980622131565528e-06, "loss": 3.051, "step": 3130 }, { "epoch": 0.5337412884582696, "grad_norm": 16.523826599121094, "learning_rate": 8.006119326874044e-06, "loss": 3.2642, "step": 3140 }, { "epoch": 0.5354411014788373, "grad_norm": 8.08110237121582, "learning_rate": 8.03161652218256e-06, "loss": 3.2307, "step": 3150 }, { "epoch": 0.537140914499405, "grad_norm": 13.91906452178955, "learning_rate": 8.057113717491076e-06, "loss": 3.4329, "step": 3160 }, { "epoch": 0.5388407275199728, "grad_norm": 12.821698188781738, "learning_rate": 8.082610912799592e-06, "loss": 3.1852, "step": 3170 }, { "epoch": 0.5405405405405406, "grad_norm": 10.448390007019043, "learning_rate": 8.108108108108109e-06, "loss": 3.2685, "step": 3180 }, { "epoch": 0.5422403535611082, "grad_norm": 9.630447387695312, "learning_rate": 8.133605303416624e-06, "loss": 3.3215, "step": 3190 }, { "epoch": 0.543940166581676, "grad_norm": 11.665550231933594, "learning_rate": 8.15910249872514e-06, "loss": 3.3018, "step": 3200 }, { "epoch": 0.5456399796022438, "grad_norm": 9.476261138916016, "learning_rate": 8.184599694033657e-06, "loss": 3.1869, "step": 3210 }, { "epoch": 0.5473397926228115, "grad_norm": 8.080037117004395, "learning_rate": 8.210096889342174e-06, "loss": 3.1781, "step": 3220 }, { "epoch": 0.5490396056433793, "grad_norm": 18.861425399780273, "learning_rate": 8.235594084650689e-06, "loss": 2.9596, "step": 3230 }, { "epoch": 0.5507394186639469, "grad_norm": 10.488423347473145, "learning_rate": 8.261091279959204e-06, "loss": 3.1511, "step": 3240 }, { "epoch": 0.5524392316845147, "grad_norm": 10.037883758544922, "learning_rate": 8.28658847526772e-06, "loss": 3.2439, "step": 3250 }, { "epoch": 0.5541390447050825, "grad_norm": 11.517376899719238, "learning_rate": 8.312085670576237e-06, "loss": 3.2126, "step": 3260 }, { "epoch": 0.5558388577256502, "grad_norm": 9.818196296691895, "learning_rate": 8.337582865884753e-06, "loss": 3.2523, "step": 3270 }, { "epoch": 0.5575386707462179, "grad_norm": 11.898098945617676, "learning_rate": 8.363080061193268e-06, "loss": 3.0723, "step": 3280 }, { "epoch": 0.5592384837667856, "grad_norm": 10.411894798278809, "learning_rate": 8.388577256501785e-06, "loss": 3.1254, "step": 3290 }, { "epoch": 0.5609382967873534, "grad_norm": 8.960809707641602, "learning_rate": 8.414074451810302e-06, "loss": 3.1539, "step": 3300 }, { "epoch": 0.5626381098079212, "grad_norm": 11.39979076385498, "learning_rate": 8.439571647118818e-06, "loss": 3.2514, "step": 3310 }, { "epoch": 0.5643379228284888, "grad_norm": 12.31738567352295, "learning_rate": 8.465068842427333e-06, "loss": 3.3971, "step": 3320 }, { "epoch": 0.5660377358490566, "grad_norm": 10.953630447387695, "learning_rate": 8.49056603773585e-06, "loss": 3.16, "step": 3330 }, { "epoch": 0.5677375488696244, "grad_norm": 8.98260498046875, "learning_rate": 8.516063233044366e-06, "loss": 3.2735, "step": 3340 }, { "epoch": 0.5694373618901921, "grad_norm": 10.458647727966309, "learning_rate": 8.541560428352881e-06, "loss": 3.1326, "step": 3350 }, { "epoch": 0.5711371749107598, "grad_norm": 14.563250541687012, "learning_rate": 8.567057623661396e-06, "loss": 3.1414, "step": 3360 }, { "epoch": 0.5728369879313275, "grad_norm": 11.054962158203125, "learning_rate": 8.592554818969913e-06, "loss": 3.203, "step": 3370 }, { "epoch": 0.5745368009518953, "grad_norm": 10.51877498626709, "learning_rate": 8.61805201427843e-06, "loss": 3.1292, "step": 3380 }, { "epoch": 0.5762366139724631, "grad_norm": 9.742039680480957, "learning_rate": 8.643549209586946e-06, "loss": 3.0991, "step": 3390 }, { "epoch": 0.5779364269930307, "grad_norm": 11.332913398742676, "learning_rate": 8.669046404895461e-06, "loss": 3.0266, "step": 3400 }, { "epoch": 0.5796362400135985, "grad_norm": 15.534616470336914, "learning_rate": 8.694543600203978e-06, "loss": 3.1418, "step": 3410 }, { "epoch": 0.5813360530341662, "grad_norm": 11.862672805786133, "learning_rate": 8.720040795512494e-06, "loss": 3.1524, "step": 3420 }, { "epoch": 0.583035866054734, "grad_norm": 11.60019588470459, "learning_rate": 8.74553799082101e-06, "loss": 2.8435, "step": 3430 }, { "epoch": 0.5847356790753017, "grad_norm": 15.77658462524414, "learning_rate": 8.771035186129526e-06, "loss": 3.1721, "step": 3440 }, { "epoch": 0.5864354920958694, "grad_norm": 21.336660385131836, "learning_rate": 8.796532381438042e-06, "loss": 3.2867, "step": 3450 }, { "epoch": 0.5881353051164372, "grad_norm": 9.424360275268555, "learning_rate": 8.822029576746559e-06, "loss": 3.2387, "step": 3460 }, { "epoch": 0.589835118137005, "grad_norm": 20.009159088134766, "learning_rate": 8.847526772055074e-06, "loss": 3.2532, "step": 3470 }, { "epoch": 0.5915349311575727, "grad_norm": 11.375692367553711, "learning_rate": 8.87302396736359e-06, "loss": 3.0955, "step": 3480 }, { "epoch": 0.5932347441781404, "grad_norm": 11.379082679748535, "learning_rate": 8.898521162672105e-06, "loss": 3.275, "step": 3490 }, { "epoch": 0.5949345571987081, "grad_norm": 13.359050750732422, "learning_rate": 8.924018357980622e-06, "loss": 3.3023, "step": 3500 }, { "epoch": 0.5966343702192759, "grad_norm": 13.287803649902344, "learning_rate": 8.949515553289139e-06, "loss": 3.4607, "step": 3510 }, { "epoch": 0.5983341832398437, "grad_norm": 18.69312286376953, "learning_rate": 8.975012748597655e-06, "loss": 3.2626, "step": 3520 }, { "epoch": 0.6000339962604113, "grad_norm": 10.662874221801758, "learning_rate": 9.00050994390617e-06, "loss": 3.186, "step": 3530 }, { "epoch": 0.6017338092809791, "grad_norm": 10.468372344970703, "learning_rate": 9.026007139214687e-06, "loss": 3.3084, "step": 3540 }, { "epoch": 0.6034336223015468, "grad_norm": 8.171687126159668, "learning_rate": 9.051504334523203e-06, "loss": 3.2376, "step": 3550 }, { "epoch": 0.6051334353221146, "grad_norm": 9.923846244812012, "learning_rate": 9.07700152983172e-06, "loss": 3.3045, "step": 3560 }, { "epoch": 0.6068332483426823, "grad_norm": 10.945162773132324, "learning_rate": 9.102498725140235e-06, "loss": 3.1312, "step": 3570 }, { "epoch": 0.60853306136325, "grad_norm": 10.397847175598145, "learning_rate": 9.127995920448751e-06, "loss": 3.1429, "step": 3580 }, { "epoch": 0.6102328743838178, "grad_norm": 9.66450309753418, "learning_rate": 9.153493115757266e-06, "loss": 3.2174, "step": 3590 }, { "epoch": 0.6119326874043856, "grad_norm": 8.293842315673828, "learning_rate": 9.178990311065783e-06, "loss": 3.114, "step": 3600 }, { "epoch": 0.6136325004249532, "grad_norm": 17.379987716674805, "learning_rate": 9.204487506374298e-06, "loss": 3.133, "step": 3610 }, { "epoch": 0.615332313445521, "grad_norm": 9.490466117858887, "learning_rate": 9.229984701682814e-06, "loss": 3.1576, "step": 3620 }, { "epoch": 0.6170321264660887, "grad_norm": 9.129146575927734, "learning_rate": 9.255481896991331e-06, "loss": 3.1804, "step": 3630 }, { "epoch": 0.6187319394866565, "grad_norm": 9.310760498046875, "learning_rate": 9.280979092299848e-06, "loss": 3.1048, "step": 3640 }, { "epoch": 0.6204317525072242, "grad_norm": 32.262428283691406, "learning_rate": 9.306476287608363e-06, "loss": 3.05, "step": 3650 }, { "epoch": 0.6221315655277919, "grad_norm": 18.674816131591797, "learning_rate": 9.33197348291688e-06, "loss": 3.3563, "step": 3660 }, { "epoch": 0.6238313785483597, "grad_norm": 12.0899076461792, "learning_rate": 9.357470678225396e-06, "loss": 3.0351, "step": 3670 }, { "epoch": 0.6255311915689274, "grad_norm": 23.684768676757812, "learning_rate": 9.382967873533912e-06, "loss": 3.1843, "step": 3680 }, { "epoch": 0.6272310045894951, "grad_norm": 12.83434009552002, "learning_rate": 9.408465068842427e-06, "loss": 3.0727, "step": 3690 }, { "epoch": 0.6289308176100629, "grad_norm": 10.67946720123291, "learning_rate": 9.433962264150944e-06, "loss": 3.2342, "step": 3700 }, { "epoch": 0.6306306306306306, "grad_norm": 16.078346252441406, "learning_rate": 9.459459459459459e-06, "loss": 3.1644, "step": 3710 }, { "epoch": 0.6323304436511984, "grad_norm": 8.740599632263184, "learning_rate": 9.484956654767975e-06, "loss": 3.3242, "step": 3720 }, { "epoch": 0.6340302566717662, "grad_norm": 12.612327575683594, "learning_rate": 9.510453850076492e-06, "loss": 3.2345, "step": 3730 }, { "epoch": 0.6357300696923338, "grad_norm": 12.91860580444336, "learning_rate": 9.535951045385007e-06, "loss": 3.2458, "step": 3740 }, { "epoch": 0.6374298827129016, "grad_norm": 11.627067565917969, "learning_rate": 9.561448240693524e-06, "loss": 3.2164, "step": 3750 }, { "epoch": 0.6391296957334693, "grad_norm": 15.081028938293457, "learning_rate": 9.58694543600204e-06, "loss": 3.061, "step": 3760 }, { "epoch": 0.6408295087540371, "grad_norm": 22.168771743774414, "learning_rate": 9.612442631310557e-06, "loss": 3.205, "step": 3770 }, { "epoch": 0.6425293217746048, "grad_norm": 7.235875129699707, "learning_rate": 9.637939826619072e-06, "loss": 3.1719, "step": 3780 }, { "epoch": 0.6442291347951725, "grad_norm": 15.274739265441895, "learning_rate": 9.663437021927588e-06, "loss": 3.1196, "step": 3790 }, { "epoch": 0.6459289478157403, "grad_norm": 11.244776725769043, "learning_rate": 9.688934217236105e-06, "loss": 2.868, "step": 3800 }, { "epoch": 0.647628760836308, "grad_norm": 22.259662628173828, "learning_rate": 9.714431412544622e-06, "loss": 3.3902, "step": 3810 }, { "epoch": 0.6493285738568757, "grad_norm": 11.932364463806152, "learning_rate": 9.739928607853136e-06, "loss": 3.3245, "step": 3820 }, { "epoch": 0.6510283868774435, "grad_norm": 19.369796752929688, "learning_rate": 9.765425803161651e-06, "loss": 3.2004, "step": 3830 }, { "epoch": 0.6527281998980112, "grad_norm": 22.055461883544922, "learning_rate": 9.790922998470168e-06, "loss": 3.2325, "step": 3840 }, { "epoch": 0.654428012918579, "grad_norm": 18.751625061035156, "learning_rate": 9.816420193778685e-06, "loss": 3.0153, "step": 3850 }, { "epoch": 0.6561278259391466, "grad_norm": 13.298213958740234, "learning_rate": 9.8419173890872e-06, "loss": 3.2881, "step": 3860 }, { "epoch": 0.6578276389597144, "grad_norm": 10.503677368164062, "learning_rate": 9.867414584395716e-06, "loss": 3.0178, "step": 3870 }, { "epoch": 0.6595274519802822, "grad_norm": 11.025795936584473, "learning_rate": 9.892911779704233e-06, "loss": 2.9868, "step": 3880 }, { "epoch": 0.6612272650008499, "grad_norm": 12.027215957641602, "learning_rate": 9.91840897501275e-06, "loss": 3.0332, "step": 3890 }, { "epoch": 0.6629270780214176, "grad_norm": 11.699767112731934, "learning_rate": 9.943906170321264e-06, "loss": 3.1722, "step": 3900 }, { "epoch": 0.6646268910419854, "grad_norm": 13.553813934326172, "learning_rate": 9.969403365629781e-06, "loss": 3.0914, "step": 3910 }, { "epoch": 0.6663267040625531, "grad_norm": 8.780049324035645, "learning_rate": 9.994900560938297e-06, "loss": 3.1152, "step": 3920 }, { "epoch": 0.6680265170831209, "grad_norm": 12.206955909729004, "learning_rate": 1.0020397756246814e-05, "loss": 2.9839, "step": 3930 }, { "epoch": 0.6697263301036886, "grad_norm": 14.725057601928711, "learning_rate": 1.004589495155533e-05, "loss": 3.3288, "step": 3940 }, { "epoch": 0.6714261431242563, "grad_norm": 10.696678161621094, "learning_rate": 1.0071392146863846e-05, "loss": 3.1446, "step": 3950 }, { "epoch": 0.6731259561448241, "grad_norm": 9.036437034606934, "learning_rate": 1.009688934217236e-05, "loss": 3.2454, "step": 3960 }, { "epoch": 0.6748257691653918, "grad_norm": 8.94646167755127, "learning_rate": 1.0122386537480877e-05, "loss": 3.0637, "step": 3970 }, { "epoch": 0.6765255821859596, "grad_norm": 14.608528137207031, "learning_rate": 1.0147883732789394e-05, "loss": 3.0838, "step": 3980 }, { "epoch": 0.6782253952065272, "grad_norm": 21.47159194946289, "learning_rate": 1.0173380928097909e-05, "loss": 3.2818, "step": 3990 }, { "epoch": 0.679925208227095, "grad_norm": 11.459216117858887, "learning_rate": 1.0198878123406425e-05, "loss": 3.2097, "step": 4000 }, { "epoch": 0.6816250212476628, "grad_norm": 8.457747459411621, "learning_rate": 1.0224375318714942e-05, "loss": 3.1464, "step": 4010 }, { "epoch": 0.6833248342682305, "grad_norm": 12.061871528625488, "learning_rate": 1.0249872514023459e-05, "loss": 3.1237, "step": 4020 }, { "epoch": 0.6850246472887982, "grad_norm": 8.287370681762695, "learning_rate": 1.0275369709331973e-05, "loss": 3.2925, "step": 4030 }, { "epoch": 0.686724460309366, "grad_norm": 14.222463607788086, "learning_rate": 1.030086690464049e-05, "loss": 3.1536, "step": 4040 }, { "epoch": 0.6884242733299337, "grad_norm": 13.310591697692871, "learning_rate": 1.0326364099949007e-05, "loss": 3.0382, "step": 4050 }, { "epoch": 0.6901240863505015, "grad_norm": 15.666090965270996, "learning_rate": 1.0351861295257523e-05, "loss": 3.0042, "step": 4060 }, { "epoch": 0.6918238993710691, "grad_norm": 8.641575813293457, "learning_rate": 1.0377358490566038e-05, "loss": 3.2421, "step": 4070 }, { "epoch": 0.6935237123916369, "grad_norm": 14.367666244506836, "learning_rate": 1.0402855685874553e-05, "loss": 3.2314, "step": 4080 }, { "epoch": 0.6952235254122047, "grad_norm": 8.593974113464355, "learning_rate": 1.042835288118307e-05, "loss": 3.062, "step": 4090 }, { "epoch": 0.6969233384327724, "grad_norm": 11.21434497833252, "learning_rate": 1.0453850076491586e-05, "loss": 3.193, "step": 4100 }, { "epoch": 0.6986231514533401, "grad_norm": 9.636263847351074, "learning_rate": 1.0479347271800101e-05, "loss": 2.9731, "step": 4110 }, { "epoch": 0.7003229644739078, "grad_norm": 11.461636543273926, "learning_rate": 1.0504844467108618e-05, "loss": 2.8977, "step": 4120 }, { "epoch": 0.7020227774944756, "grad_norm": 13.582660675048828, "learning_rate": 1.0530341662417134e-05, "loss": 3.201, "step": 4130 }, { "epoch": 0.7037225905150434, "grad_norm": 20.189695358276367, "learning_rate": 1.0555838857725651e-05, "loss": 3.1502, "step": 4140 }, { "epoch": 0.705422403535611, "grad_norm": 11.565384864807129, "learning_rate": 1.0581336053034166e-05, "loss": 3.1605, "step": 4150 }, { "epoch": 0.7071222165561788, "grad_norm": 12.695282936096191, "learning_rate": 1.0606833248342683e-05, "loss": 3.0693, "step": 4160 }, { "epoch": 0.7088220295767466, "grad_norm": 11.89785385131836, "learning_rate": 1.06323304436512e-05, "loss": 3.1398, "step": 4170 }, { "epoch": 0.7105218425973143, "grad_norm": 24.03660011291504, "learning_rate": 1.0657827638959716e-05, "loss": 3.071, "step": 4180 }, { "epoch": 0.7122216556178821, "grad_norm": 12.20805835723877, "learning_rate": 1.0683324834268232e-05, "loss": 3.1557, "step": 4190 }, { "epoch": 0.7139214686384497, "grad_norm": 12.51597785949707, "learning_rate": 1.0708822029576746e-05, "loss": 3.1118, "step": 4200 }, { "epoch": 0.7156212816590175, "grad_norm": 13.55483627319336, "learning_rate": 1.0734319224885262e-05, "loss": 3.264, "step": 4210 }, { "epoch": 0.7173210946795853, "grad_norm": 18.49689483642578, "learning_rate": 1.0759816420193779e-05, "loss": 3.1127, "step": 4220 }, { "epoch": 0.719020907700153, "grad_norm": 9.318002700805664, "learning_rate": 1.0785313615502295e-05, "loss": 3.2669, "step": 4230 }, { "epoch": 0.7207207207207207, "grad_norm": 13.836160659790039, "learning_rate": 1.081081081081081e-05, "loss": 3.1319, "step": 4240 }, { "epoch": 0.7224205337412885, "grad_norm": 16.53606605529785, "learning_rate": 1.0836308006119327e-05, "loss": 3.0206, "step": 4250 }, { "epoch": 0.7241203467618562, "grad_norm": 14.355804443359375, "learning_rate": 1.0861805201427844e-05, "loss": 3.0796, "step": 4260 }, { "epoch": 0.725820159782424, "grad_norm": 19.92349624633789, "learning_rate": 1.088730239673636e-05, "loss": 2.8805, "step": 4270 }, { "epoch": 0.7275199728029916, "grad_norm": 15.806783676147461, "learning_rate": 1.0912799592044875e-05, "loss": 2.9902, "step": 4280 }, { "epoch": 0.7292197858235594, "grad_norm": 14.752193450927734, "learning_rate": 1.0938296787353392e-05, "loss": 3.1028, "step": 4290 }, { "epoch": 0.7309195988441272, "grad_norm": 13.902088165283203, "learning_rate": 1.0963793982661908e-05, "loss": 2.9751, "step": 4300 }, { "epoch": 0.7326194118646949, "grad_norm": 12.324780464172363, "learning_rate": 1.0989291177970425e-05, "loss": 3.2824, "step": 4310 }, { "epoch": 0.7343192248852626, "grad_norm": 12.409863471984863, "learning_rate": 1.1014788373278938e-05, "loss": 3.122, "step": 4320 }, { "epoch": 0.7360190379058303, "grad_norm": 9.861491203308105, "learning_rate": 1.1040285568587455e-05, "loss": 3.2375, "step": 4330 }, { "epoch": 0.7377188509263981, "grad_norm": 9.019848823547363, "learning_rate": 1.1065782763895971e-05, "loss": 3.0919, "step": 4340 }, { "epoch": 0.7394186639469659, "grad_norm": 14.767576217651367, "learning_rate": 1.1091279959204488e-05, "loss": 3.0239, "step": 4350 }, { "epoch": 0.7411184769675335, "grad_norm": 9.925335884094238, "learning_rate": 1.1116777154513003e-05, "loss": 3.1198, "step": 4360 }, { "epoch": 0.7428182899881013, "grad_norm": 8.941740036010742, "learning_rate": 1.114227434982152e-05, "loss": 2.9879, "step": 4370 }, { "epoch": 0.744518103008669, "grad_norm": 18.021644592285156, "learning_rate": 1.1167771545130036e-05, "loss": 3.0699, "step": 4380 }, { "epoch": 0.7462179160292368, "grad_norm": 12.144454956054688, "learning_rate": 1.1193268740438553e-05, "loss": 3.1572, "step": 4390 }, { "epoch": 0.7479177290498045, "grad_norm": 11.383416175842285, "learning_rate": 1.1218765935747068e-05, "loss": 3.2732, "step": 4400 }, { "epoch": 0.7496175420703722, "grad_norm": 17.053791046142578, "learning_rate": 1.1244263131055584e-05, "loss": 3.2239, "step": 4410 }, { "epoch": 0.75131735509094, "grad_norm": 16.178321838378906, "learning_rate": 1.1269760326364101e-05, "loss": 3.2065, "step": 4420 }, { "epoch": 0.7530171681115078, "grad_norm": 10.801485061645508, "learning_rate": 1.1295257521672617e-05, "loss": 3.2594, "step": 4430 }, { "epoch": 0.7547169811320755, "grad_norm": 12.620925903320312, "learning_rate": 1.1320754716981132e-05, "loss": 3.1531, "step": 4440 }, { "epoch": 0.7564167941526432, "grad_norm": 8.814167022705078, "learning_rate": 1.1346251912289647e-05, "loss": 3.0673, "step": 4450 }, { "epoch": 0.7581166071732109, "grad_norm": 13.035064697265625, "learning_rate": 1.1371749107598164e-05, "loss": 2.879, "step": 4460 }, { "epoch": 0.7598164201937787, "grad_norm": 15.914697647094727, "learning_rate": 1.139724630290668e-05, "loss": 2.9907, "step": 4470 }, { "epoch": 0.7615162332143465, "grad_norm": 19.77353286743164, "learning_rate": 1.1422743498215197e-05, "loss": 3.3016, "step": 4480 }, { "epoch": 0.7632160462349141, "grad_norm": 17.499847412109375, "learning_rate": 1.1448240693523712e-05, "loss": 3.1001, "step": 4490 }, { "epoch": 0.7649158592554819, "grad_norm": 18.461782455444336, "learning_rate": 1.1473737888832229e-05, "loss": 3.0467, "step": 4500 }, { "epoch": 0.7666156722760497, "grad_norm": 10.483536720275879, "learning_rate": 1.1499235084140745e-05, "loss": 3.172, "step": 4510 }, { "epoch": 0.7683154852966174, "grad_norm": 11.847857475280762, "learning_rate": 1.1524732279449262e-05, "loss": 3.1173, "step": 4520 }, { "epoch": 0.7700152983171851, "grad_norm": 7.209905624389648, "learning_rate": 1.1550229474757777e-05, "loss": 3.1651, "step": 4530 }, { "epoch": 0.7717151113377528, "grad_norm": 12.45370101928711, "learning_rate": 1.1575726670066293e-05, "loss": 3.2762, "step": 4540 }, { "epoch": 0.7734149243583206, "grad_norm": 14.217766761779785, "learning_rate": 1.160122386537481e-05, "loss": 3.2151, "step": 4550 }, { "epoch": 0.7751147373788884, "grad_norm": 11.740962982177734, "learning_rate": 1.1626721060683325e-05, "loss": 3.2085, "step": 4560 }, { "epoch": 0.776814550399456, "grad_norm": 12.5169095993042, "learning_rate": 1.165221825599184e-05, "loss": 3.0908, "step": 4570 }, { "epoch": 0.7785143634200238, "grad_norm": 17.221237182617188, "learning_rate": 1.1677715451300356e-05, "loss": 3.269, "step": 4580 }, { "epoch": 0.7802141764405915, "grad_norm": 8.08166790008545, "learning_rate": 1.1703212646608873e-05, "loss": 3.1564, "step": 4590 }, { "epoch": 0.7819139894611593, "grad_norm": 10.850512504577637, "learning_rate": 1.172870984191739e-05, "loss": 2.9558, "step": 4600 }, { "epoch": 0.783613802481727, "grad_norm": 17.077058792114258, "learning_rate": 1.1754207037225905e-05, "loss": 2.8889, "step": 4610 }, { "epoch": 0.7853136155022947, "grad_norm": 13.369579315185547, "learning_rate": 1.1779704232534421e-05, "loss": 3.0464, "step": 4620 }, { "epoch": 0.7870134285228625, "grad_norm": 10.453845024108887, "learning_rate": 1.1805201427842938e-05, "loss": 3.2285, "step": 4630 }, { "epoch": 0.7887132415434303, "grad_norm": 10.738533973693848, "learning_rate": 1.1830698623151454e-05, "loss": 3.3995, "step": 4640 }, { "epoch": 0.7904130545639979, "grad_norm": 19.719512939453125, "learning_rate": 1.185619581845997e-05, "loss": 3.1451, "step": 4650 }, { "epoch": 0.7921128675845657, "grad_norm": 11.245988845825195, "learning_rate": 1.1881693013768486e-05, "loss": 3.2594, "step": 4660 }, { "epoch": 0.7938126806051334, "grad_norm": 13.23965835571289, "learning_rate": 1.1907190209077003e-05, "loss": 3.2602, "step": 4670 }, { "epoch": 0.7955124936257012, "grad_norm": 19.181047439575195, "learning_rate": 1.1932687404385517e-05, "loss": 3.0092, "step": 4680 }, { "epoch": 0.797212306646269, "grad_norm": 15.932703971862793, "learning_rate": 1.1958184599694034e-05, "loss": 3.2126, "step": 4690 }, { "epoch": 0.7989121196668366, "grad_norm": 12.468653678894043, "learning_rate": 1.1983681795002549e-05, "loss": 3.0921, "step": 4700 }, { "epoch": 0.8006119326874044, "grad_norm": 13.808542251586914, "learning_rate": 1.2009178990311066e-05, "loss": 2.9738, "step": 4710 }, { "epoch": 0.8023117457079721, "grad_norm": 13.965943336486816, "learning_rate": 1.2034676185619582e-05, "loss": 3.1756, "step": 4720 }, { "epoch": 0.8040115587285399, "grad_norm": 11.100417137145996, "learning_rate": 1.2060173380928099e-05, "loss": 3.1025, "step": 4730 }, { "epoch": 0.8057113717491076, "grad_norm": 8.195331573486328, "learning_rate": 1.2085670576236614e-05, "loss": 3.2392, "step": 4740 }, { "epoch": 0.8074111847696753, "grad_norm": 13.367034912109375, "learning_rate": 1.211116777154513e-05, "loss": 3.1623, "step": 4750 }, { "epoch": 0.8091109977902431, "grad_norm": 13.571057319641113, "learning_rate": 1.2136664966853647e-05, "loss": 3.2048, "step": 4760 }, { "epoch": 0.8108108108108109, "grad_norm": 9.458085060119629, "learning_rate": 1.2162162162162164e-05, "loss": 3.2483, "step": 4770 }, { "epoch": 0.8125106238313785, "grad_norm": 11.54200267791748, "learning_rate": 1.2187659357470678e-05, "loss": 3.3721, "step": 4780 }, { "epoch": 0.8142104368519463, "grad_norm": 15.455368041992188, "learning_rate": 1.2213156552779195e-05, "loss": 3.0817, "step": 4790 }, { "epoch": 0.815910249872514, "grad_norm": 12.639066696166992, "learning_rate": 1.2238653748087712e-05, "loss": 3.1073, "step": 4800 }, { "epoch": 0.8176100628930818, "grad_norm": 11.447978973388672, "learning_rate": 1.2264150943396227e-05, "loss": 2.9345, "step": 4810 }, { "epoch": 0.8193098759136495, "grad_norm": 11.833393096923828, "learning_rate": 1.2289648138704742e-05, "loss": 3.1226, "step": 4820 }, { "epoch": 0.8210096889342172, "grad_norm": 11.435378074645996, "learning_rate": 1.2315145334013258e-05, "loss": 3.0095, "step": 4830 }, { "epoch": 0.822709501954785, "grad_norm": 11.14553165435791, "learning_rate": 1.2340642529321775e-05, "loss": 3.1344, "step": 4840 }, { "epoch": 0.8244093149753527, "grad_norm": 11.785877227783203, "learning_rate": 1.2366139724630291e-05, "loss": 2.95, "step": 4850 }, { "epoch": 0.8261091279959204, "grad_norm": 11.952654838562012, "learning_rate": 1.2391636919938806e-05, "loss": 3.1479, "step": 4860 }, { "epoch": 0.8278089410164882, "grad_norm": 7.311314105987549, "learning_rate": 1.2417134115247323e-05, "loss": 3.0715, "step": 4870 }, { "epoch": 0.8295087540370559, "grad_norm": 12.729374885559082, "learning_rate": 1.244263131055584e-05, "loss": 3.1882, "step": 4880 }, { "epoch": 0.8312085670576237, "grad_norm": 16.40912437438965, "learning_rate": 1.2468128505864356e-05, "loss": 3.2723, "step": 4890 }, { "epoch": 0.8329083800781913, "grad_norm": 15.855485916137695, "learning_rate": 1.2493625701172871e-05, "loss": 2.8996, "step": 4900 }, { "epoch": 0.8346081930987591, "grad_norm": 11.758940696716309, "learning_rate": 1.2519122896481388e-05, "loss": 3.0292, "step": 4910 }, { "epoch": 0.8363080061193269, "grad_norm": 9.617775917053223, "learning_rate": 1.2544620091789904e-05, "loss": 3.2076, "step": 4920 }, { "epoch": 0.8380078191398946, "grad_norm": 14.5275239944458, "learning_rate": 1.257011728709842e-05, "loss": 2.9941, "step": 4930 }, { "epoch": 0.8397076321604624, "grad_norm": 18.696931838989258, "learning_rate": 1.2595614482406936e-05, "loss": 3.0199, "step": 4940 }, { "epoch": 0.8414074451810301, "grad_norm": 13.407628059387207, "learning_rate": 1.262111167771545e-05, "loss": 3.2312, "step": 4950 }, { "epoch": 0.8431072582015978, "grad_norm": 9.699679374694824, "learning_rate": 1.2646608873023967e-05, "loss": 3.2488, "step": 4960 }, { "epoch": 0.8448070712221656, "grad_norm": 10.92834186553955, "learning_rate": 1.2672106068332484e-05, "loss": 3.1854, "step": 4970 }, { "epoch": 0.8465068842427333, "grad_norm": 16.945674896240234, "learning_rate": 1.2697603263641e-05, "loss": 2.9592, "step": 4980 }, { "epoch": 0.848206697263301, "grad_norm": 13.415432929992676, "learning_rate": 1.2723100458949515e-05, "loss": 3.0664, "step": 4990 }, { "epoch": 0.8499065102838688, "grad_norm": 7.1043195724487305, "learning_rate": 1.2748597654258032e-05, "loss": 3.1285, "step": 5000 }, { "epoch": 0.8516063233044365, "grad_norm": 12.606735229492188, "learning_rate": 1.2774094849566549e-05, "loss": 3.083, "step": 5010 }, { "epoch": 0.8533061363250043, "grad_norm": 13.419086456298828, "learning_rate": 1.2799592044875065e-05, "loss": 3.3599, "step": 5020 }, { "epoch": 0.855005949345572, "grad_norm": 10.532175064086914, "learning_rate": 1.282508924018358e-05, "loss": 2.9347, "step": 5030 }, { "epoch": 0.8567057623661397, "grad_norm": 19.814544677734375, "learning_rate": 1.2850586435492097e-05, "loss": 3.4067, "step": 5040 }, { "epoch": 0.8584055753867075, "grad_norm": 11.688233375549316, "learning_rate": 1.2876083630800612e-05, "loss": 3.105, "step": 5050 }, { "epoch": 0.8601053884072752, "grad_norm": 15.496794700622559, "learning_rate": 1.2901580826109128e-05, "loss": 3.0153, "step": 5060 }, { "epoch": 0.8618052014278429, "grad_norm": 12.994009971618652, "learning_rate": 1.2927078021417643e-05, "loss": 3.1678, "step": 5070 }, { "epoch": 0.8635050144484107, "grad_norm": 10.579789161682129, "learning_rate": 1.295257521672616e-05, "loss": 3.0643, "step": 5080 }, { "epoch": 0.8652048274689784, "grad_norm": 16.27181053161621, "learning_rate": 1.2978072412034676e-05, "loss": 3.2009, "step": 5090 }, { "epoch": 0.8669046404895462, "grad_norm": 9.2947998046875, "learning_rate": 1.3003569607343193e-05, "loss": 3.15, "step": 5100 }, { "epoch": 0.8686044535101138, "grad_norm": 18.95619773864746, "learning_rate": 1.3029066802651708e-05, "loss": 3.3199, "step": 5110 }, { "epoch": 0.8703042665306816, "grad_norm": 10.991951942443848, "learning_rate": 1.3054563997960225e-05, "loss": 2.9242, "step": 5120 }, { "epoch": 0.8720040795512494, "grad_norm": 8.475244522094727, "learning_rate": 1.3080061193268741e-05, "loss": 3.1723, "step": 5130 }, { "epoch": 0.8737038925718171, "grad_norm": 15.63136100769043, "learning_rate": 1.3105558388577258e-05, "loss": 3.3188, "step": 5140 }, { "epoch": 0.8754037055923848, "grad_norm": 10.039711952209473, "learning_rate": 1.3131055583885773e-05, "loss": 3.2678, "step": 5150 }, { "epoch": 0.8771035186129525, "grad_norm": 20.525917053222656, "learning_rate": 1.315655277919429e-05, "loss": 3.2325, "step": 5160 }, { "epoch": 0.8788033316335203, "grad_norm": 18.65244483947754, "learning_rate": 1.3182049974502804e-05, "loss": 3.0805, "step": 5170 }, { "epoch": 0.8805031446540881, "grad_norm": 8.498313903808594, "learning_rate": 1.320754716981132e-05, "loss": 3.2013, "step": 5180 }, { "epoch": 0.8822029576746558, "grad_norm": 12.777634620666504, "learning_rate": 1.3233044365119837e-05, "loss": 3.1706, "step": 5190 }, { "epoch": 0.8839027706952235, "grad_norm": 9.833357810974121, "learning_rate": 1.3258541560428352e-05, "loss": 3.2333, "step": 5200 }, { "epoch": 0.8856025837157913, "grad_norm": 16.285722732543945, "learning_rate": 1.3284038755736869e-05, "loss": 3.0083, "step": 5210 }, { "epoch": 0.887302396736359, "grad_norm": 10.922857284545898, "learning_rate": 1.3309535951045386e-05, "loss": 3.2081, "step": 5220 }, { "epoch": 0.8890022097569268, "grad_norm": 15.91651439666748, "learning_rate": 1.3335033146353902e-05, "loss": 3.1968, "step": 5230 }, { "epoch": 0.8907020227774944, "grad_norm": 12.250123023986816, "learning_rate": 1.3360530341662417e-05, "loss": 3.0789, "step": 5240 }, { "epoch": 0.8924018357980622, "grad_norm": 14.351374626159668, "learning_rate": 1.3386027536970934e-05, "loss": 3.053, "step": 5250 }, { "epoch": 0.89410164881863, "grad_norm": 18.923492431640625, "learning_rate": 1.341152473227945e-05, "loss": 3.0389, "step": 5260 }, { "epoch": 0.8958014618391977, "grad_norm": 10.724119186401367, "learning_rate": 1.3437021927587967e-05, "loss": 3.3154, "step": 5270 }, { "epoch": 0.8975012748597654, "grad_norm": 14.690865516662598, "learning_rate": 1.3462519122896482e-05, "loss": 3.1679, "step": 5280 }, { "epoch": 0.8992010878803331, "grad_norm": 15.007009506225586, "learning_rate": 1.3488016318204997e-05, "loss": 3.4481, "step": 5290 }, { "epoch": 0.9009009009009009, "grad_norm": 10.77540397644043, "learning_rate": 1.3513513513513513e-05, "loss": 3.3465, "step": 5300 }, { "epoch": 0.9026007139214687, "grad_norm": 14.751619338989258, "learning_rate": 1.353901070882203e-05, "loss": 3.0768, "step": 5310 }, { "epoch": 0.9043005269420363, "grad_norm": 13.693533897399902, "learning_rate": 1.3564507904130545e-05, "loss": 3.2568, "step": 5320 }, { "epoch": 0.9060003399626041, "grad_norm": 17.435483932495117, "learning_rate": 1.3590005099439062e-05, "loss": 3.0134, "step": 5330 }, { "epoch": 0.9077001529831719, "grad_norm": 10.970024108886719, "learning_rate": 1.3615502294747578e-05, "loss": 3.1865, "step": 5340 }, { "epoch": 0.9093999660037396, "grad_norm": 10.266316413879395, "learning_rate": 1.3640999490056095e-05, "loss": 3.064, "step": 5350 }, { "epoch": 0.9110997790243073, "grad_norm": 12.489026069641113, "learning_rate": 1.366649668536461e-05, "loss": 3.0657, "step": 5360 }, { "epoch": 0.912799592044875, "grad_norm": 14.964912414550781, "learning_rate": 1.3691993880673126e-05, "loss": 2.9105, "step": 5370 }, { "epoch": 0.9144994050654428, "grad_norm": 21.21876335144043, "learning_rate": 1.3717491075981643e-05, "loss": 3.2266, "step": 5380 }, { "epoch": 0.9161992180860106, "grad_norm": 15.731476783752441, "learning_rate": 1.374298827129016e-05, "loss": 3.1894, "step": 5390 }, { "epoch": 0.9178990311065782, "grad_norm": 18.860591888427734, "learning_rate": 1.3768485466598674e-05, "loss": 3.1735, "step": 5400 }, { "epoch": 0.919598844127146, "grad_norm": 14.236080169677734, "learning_rate": 1.379398266190719e-05, "loss": 3.3689, "step": 5410 }, { "epoch": 0.9212986571477138, "grad_norm": 10.072312355041504, "learning_rate": 1.3819479857215706e-05, "loss": 3.2977, "step": 5420 }, { "epoch": 0.9229984701682815, "grad_norm": 12.595905303955078, "learning_rate": 1.3844977052524223e-05, "loss": 3.1361, "step": 5430 }, { "epoch": 0.9246982831888493, "grad_norm": 11.909892082214355, "learning_rate": 1.3870474247832739e-05, "loss": 3.2818, "step": 5440 }, { "epoch": 0.9263980962094169, "grad_norm": 13.80375862121582, "learning_rate": 1.3895971443141254e-05, "loss": 3.2163, "step": 5450 }, { "epoch": 0.9280979092299847, "grad_norm": 6.5016188621521, "learning_rate": 1.392146863844977e-05, "loss": 3.1588, "step": 5460 }, { "epoch": 0.9297977222505525, "grad_norm": 14.719461441040039, "learning_rate": 1.3946965833758287e-05, "loss": 3.1408, "step": 5470 }, { "epoch": 0.9314975352711202, "grad_norm": 182.97003173828125, "learning_rate": 1.3972463029066804e-05, "loss": 3.0876, "step": 5480 }, { "epoch": 0.9331973482916879, "grad_norm": 11.720510482788086, "learning_rate": 1.3997960224375319e-05, "loss": 3.1176, "step": 5490 }, { "epoch": 0.9348971613122556, "grad_norm": 12.402026176452637, "learning_rate": 1.4023457419683835e-05, "loss": 3.2145, "step": 5500 }, { "epoch": 0.9365969743328234, "grad_norm": 13.029556274414062, "learning_rate": 1.4048954614992352e-05, "loss": 3.2752, "step": 5510 }, { "epoch": 0.9382967873533912, "grad_norm": 7.776852130889893, "learning_rate": 1.4074451810300869e-05, "loss": 3.2212, "step": 5520 }, { "epoch": 0.9399966003739588, "grad_norm": 12.444241523742676, "learning_rate": 1.4099949005609384e-05, "loss": 3.0361, "step": 5530 }, { "epoch": 0.9416964133945266, "grad_norm": 12.063976287841797, "learning_rate": 1.4125446200917898e-05, "loss": 2.8759, "step": 5540 }, { "epoch": 0.9433962264150944, "grad_norm": 11.813680648803711, "learning_rate": 1.4150943396226415e-05, "loss": 3.0657, "step": 5550 }, { "epoch": 0.9450960394356621, "grad_norm": 11.741292953491211, "learning_rate": 1.4176440591534932e-05, "loss": 3.1425, "step": 5560 }, { "epoch": 0.9467958524562298, "grad_norm": 25.957914352416992, "learning_rate": 1.4201937786843447e-05, "loss": 3.2839, "step": 5570 }, { "epoch": 0.9484956654767975, "grad_norm": 8.54137134552002, "learning_rate": 1.4227434982151963e-05, "loss": 3.0309, "step": 5580 }, { "epoch": 0.9501954784973653, "grad_norm": 9.651363372802734, "learning_rate": 1.425293217746048e-05, "loss": 3.1894, "step": 5590 }, { "epoch": 0.9518952915179331, "grad_norm": 10.516382217407227, "learning_rate": 1.4278429372768996e-05, "loss": 3.1435, "step": 5600 }, { "epoch": 0.9535951045385007, "grad_norm": 11.838349342346191, "learning_rate": 1.4303926568077511e-05, "loss": 3.1986, "step": 5610 }, { "epoch": 0.9552949175590685, "grad_norm": 17.808353424072266, "learning_rate": 1.4329423763386028e-05, "loss": 3.1049, "step": 5620 }, { "epoch": 0.9569947305796362, "grad_norm": 11.117119789123535, "learning_rate": 1.4354920958694545e-05, "loss": 3.1514, "step": 5630 }, { "epoch": 0.958694543600204, "grad_norm": 17.050132751464844, "learning_rate": 1.4380418154003061e-05, "loss": 3.024, "step": 5640 }, { "epoch": 0.9603943566207717, "grad_norm": 28.01728630065918, "learning_rate": 1.4405915349311576e-05, "loss": 2.9755, "step": 5650 }, { "epoch": 0.9620941696413394, "grad_norm": 11.773494720458984, "learning_rate": 1.4431412544620091e-05, "loss": 3.0657, "step": 5660 }, { "epoch": 0.9637939826619072, "grad_norm": 12.502875328063965, "learning_rate": 1.4456909739928608e-05, "loss": 3.1165, "step": 5670 }, { "epoch": 0.965493795682475, "grad_norm": 14.018399238586426, "learning_rate": 1.4482406935237124e-05, "loss": 3.1737, "step": 5680 }, { "epoch": 0.9671936087030427, "grad_norm": 17.347026824951172, "learning_rate": 1.450790413054564e-05, "loss": 2.9692, "step": 5690 }, { "epoch": 0.9688934217236104, "grad_norm": 13.59555721282959, "learning_rate": 1.4533401325854156e-05, "loss": 2.9575, "step": 5700 }, { "epoch": 0.9705932347441781, "grad_norm": 24.551652908325195, "learning_rate": 1.4558898521162672e-05, "loss": 3.0491, "step": 5710 }, { "epoch": 0.9722930477647459, "grad_norm": 11.310931205749512, "learning_rate": 1.4584395716471189e-05, "loss": 3.3173, "step": 5720 }, { "epoch": 0.9739928607853137, "grad_norm": 22.989946365356445, "learning_rate": 1.4609892911779706e-05, "loss": 3.2307, "step": 5730 }, { "epoch": 0.9756926738058813, "grad_norm": 8.932762145996094, "learning_rate": 1.463539010708822e-05, "loss": 2.9856, "step": 5740 }, { "epoch": 0.9773924868264491, "grad_norm": 9.657200813293457, "learning_rate": 1.4660887302396737e-05, "loss": 2.9917, "step": 5750 }, { "epoch": 0.9790922998470168, "grad_norm": 13.11313247680664, "learning_rate": 1.4686384497705254e-05, "loss": 3.1001, "step": 5760 }, { "epoch": 0.9807921128675846, "grad_norm": 10.844243049621582, "learning_rate": 1.471188169301377e-05, "loss": 3.2436, "step": 5770 }, { "epoch": 0.9824919258881523, "grad_norm": 14.14329719543457, "learning_rate": 1.4737378888322284e-05, "loss": 3.1018, "step": 5780 }, { "epoch": 0.98419173890872, "grad_norm": 17.60891342163086, "learning_rate": 1.47628760836308e-05, "loss": 3.4034, "step": 5790 }, { "epoch": 0.9858915519292878, "grad_norm": 10.301651954650879, "learning_rate": 1.4788373278939317e-05, "loss": 3.2105, "step": 5800 }, { "epoch": 0.9875913649498556, "grad_norm": 8.932085037231445, "learning_rate": 1.4813870474247833e-05, "loss": 3.1481, "step": 5810 }, { "epoch": 0.9892911779704232, "grad_norm": 10.628128051757812, "learning_rate": 1.4839367669556348e-05, "loss": 3.0442, "step": 5820 }, { "epoch": 0.990990990990991, "grad_norm": 6.971434116363525, "learning_rate": 1.4864864864864865e-05, "loss": 3.1957, "step": 5830 }, { "epoch": 0.9926908040115587, "grad_norm": 16.54310417175293, "learning_rate": 1.4890362060173381e-05, "loss": 2.9899, "step": 5840 }, { "epoch": 0.9943906170321265, "grad_norm": 13.878704071044922, "learning_rate": 1.4915859255481898e-05, "loss": 3.039, "step": 5850 }, { "epoch": 0.9960904300526942, "grad_norm": 13.312946319580078, "learning_rate": 1.4941356450790413e-05, "loss": 3.1054, "step": 5860 }, { "epoch": 0.9977902430732619, "grad_norm": 14.06927490234375, "learning_rate": 1.496685364609893e-05, "loss": 3.1348, "step": 5870 }, { "epoch": 0.9994900560938297, "grad_norm": 15.188454627990723, "learning_rate": 1.4992350841407446e-05, "loss": 3.1527, "step": 5880 }, { "epoch": 1.0, "eval_cer": 1.0, "eval_loss": 3.4367897510528564, "eval_runtime": 2071.1773, "eval_samples_per_second": 0.227, "eval_steps_per_second": 0.227, "step": 5883 }, { "epoch": 1.0011898691143974, "grad_norm": 9.099020004272461, "learning_rate": 1.5017848036715963e-05, "loss": 2.8946, "step": 5890 }, { "epoch": 1.002889682134965, "grad_norm": 16.559484481811523, "learning_rate": 1.5043345232024476e-05, "loss": 2.9737, "step": 5900 }, { "epoch": 1.004589495155533, "grad_norm": 13.34508991241455, "learning_rate": 1.5068842427332994e-05, "loss": 3.1793, "step": 5910 }, { "epoch": 1.0062893081761006, "grad_norm": 9.121037483215332, "learning_rate": 1.509433962264151e-05, "loss": 3.0705, "step": 5920 }, { "epoch": 1.0079891211966683, "grad_norm": 19.170673370361328, "learning_rate": 1.5119836817950024e-05, "loss": 2.8761, "step": 5930 }, { "epoch": 1.0096889342172362, "grad_norm": 24.546308517456055, "learning_rate": 1.5145334013258543e-05, "loss": 2.7371, "step": 5940 }, { "epoch": 1.0113887472378038, "grad_norm": 11.402448654174805, "learning_rate": 1.5170831208567057e-05, "loss": 2.9814, "step": 5950 }, { "epoch": 1.0130885602583717, "grad_norm": 19.692134857177734, "learning_rate": 1.5196328403875576e-05, "loss": 2.9445, "step": 5960 }, { "epoch": 1.0147883732789393, "grad_norm": 25.491727828979492, "learning_rate": 1.522182559918409e-05, "loss": 2.9768, "step": 5970 }, { "epoch": 1.016488186299507, "grad_norm": 11.527777671813965, "learning_rate": 1.5247322794492606e-05, "loss": 3.2455, "step": 5980 }, { "epoch": 1.0181879993200749, "grad_norm": 14.239469528198242, "learning_rate": 1.5272819989801122e-05, "loss": 2.9832, "step": 5990 }, { "epoch": 1.0198878123406425, "grad_norm": 10.149969100952148, "learning_rate": 1.5298317185109637e-05, "loss": 3.0282, "step": 6000 }, { "epoch": 1.0215876253612102, "grad_norm": 10.683549880981445, "learning_rate": 1.5323814380418152e-05, "loss": 3.02, "step": 6010 }, { "epoch": 1.023287438381778, "grad_norm": 9.013233184814453, "learning_rate": 1.534931157572667e-05, "loss": 3.0264, "step": 6020 }, { "epoch": 1.0249872514023457, "grad_norm": 9.101768493652344, "learning_rate": 1.5374808771035185e-05, "loss": 3.0682, "step": 6030 }, { "epoch": 1.0266870644229136, "grad_norm": 21.46320152282715, "learning_rate": 1.5400305966343704e-05, "loss": 3.063, "step": 6040 }, { "epoch": 1.0283868774434812, "grad_norm": 15.119242668151855, "learning_rate": 1.542580316165222e-05, "loss": 2.9123, "step": 6050 }, { "epoch": 1.0300866904640489, "grad_norm": 12.21047592163086, "learning_rate": 1.5451300356960733e-05, "loss": 3.1802, "step": 6060 }, { "epoch": 1.0317865034846168, "grad_norm": 13.663722038269043, "learning_rate": 1.547679755226925e-05, "loss": 2.9912, "step": 6070 }, { "epoch": 1.0334863165051844, "grad_norm": 10.665254592895508, "learning_rate": 1.5502294747577767e-05, "loss": 2.881, "step": 6080 }, { "epoch": 1.035186129525752, "grad_norm": 13.434707641601562, "learning_rate": 1.552779194288628e-05, "loss": 3.1915, "step": 6090 }, { "epoch": 1.03688594254632, "grad_norm": 9.631866455078125, "learning_rate": 1.55532891381948e-05, "loss": 3.1161, "step": 6100 }, { "epoch": 1.0385857555668876, "grad_norm": 16.676883697509766, "learning_rate": 1.5578786333503315e-05, "loss": 2.8594, "step": 6110 }, { "epoch": 1.0402855685874555, "grad_norm": 11.73638916015625, "learning_rate": 1.5604283528811833e-05, "loss": 2.9496, "step": 6120 }, { "epoch": 1.0419853816080231, "grad_norm": 14.010331153869629, "learning_rate": 1.5629780724120348e-05, "loss": 3.0531, "step": 6130 }, { "epoch": 1.0436851946285908, "grad_norm": 16.196941375732422, "learning_rate": 1.5655277919428863e-05, "loss": 3.1241, "step": 6140 }, { "epoch": 1.0453850076491586, "grad_norm": 18.473928451538086, "learning_rate": 1.568077511473738e-05, "loss": 3.0146, "step": 6150 }, { "epoch": 1.0470848206697263, "grad_norm": 15.512066841125488, "learning_rate": 1.5706272310045896e-05, "loss": 3.0989, "step": 6160 }, { "epoch": 1.048784633690294, "grad_norm": 18.589357376098633, "learning_rate": 1.5731769505354414e-05, "loss": 3.0182, "step": 6170 }, { "epoch": 1.0504844467108618, "grad_norm": 14.080510139465332, "learning_rate": 1.575726670066293e-05, "loss": 2.9406, "step": 6180 }, { "epoch": 1.0521842597314295, "grad_norm": 12.041447639465332, "learning_rate": 1.5782763895971444e-05, "loss": 3.1703, "step": 6190 }, { "epoch": 1.0538840727519974, "grad_norm": 9.438977241516113, "learning_rate": 1.580826109127996e-05, "loss": 2.857, "step": 6200 }, { "epoch": 1.055583885772565, "grad_norm": 13.741844177246094, "learning_rate": 1.5833758286588474e-05, "loss": 2.9302, "step": 6210 }, { "epoch": 1.0572836987931327, "grad_norm": 14.813919067382812, "learning_rate": 1.585925548189699e-05, "loss": 3.2088, "step": 6220 }, { "epoch": 1.0589835118137005, "grad_norm": 11.130279541015625, "learning_rate": 1.5884752677205507e-05, "loss": 3.0398, "step": 6230 }, { "epoch": 1.0606833248342682, "grad_norm": 9.82925796508789, "learning_rate": 1.5910249872514022e-05, "loss": 3.1771, "step": 6240 }, { "epoch": 1.062383137854836, "grad_norm": 12.267308235168457, "learning_rate": 1.593574706782254e-05, "loss": 3.1362, "step": 6250 }, { "epoch": 1.0640829508754037, "grad_norm": 10.790688514709473, "learning_rate": 1.5961244263131055e-05, "loss": 3.0045, "step": 6260 }, { "epoch": 1.0657827638959714, "grad_norm": 9.349706649780273, "learning_rate": 1.598674145843957e-05, "loss": 2.9947, "step": 6270 }, { "epoch": 1.0674825769165392, "grad_norm": 14.729166984558105, "learning_rate": 1.601223865374809e-05, "loss": 2.9557, "step": 6280 }, { "epoch": 1.069182389937107, "grad_norm": 14.068483352661133, "learning_rate": 1.6037735849056604e-05, "loss": 3.0189, "step": 6290 }, { "epoch": 1.0708822029576746, "grad_norm": 16.30710220336914, "learning_rate": 1.606323304436512e-05, "loss": 3.0357, "step": 6300 }, { "epoch": 1.0725820159782424, "grad_norm": 15.822524070739746, "learning_rate": 1.6088730239673637e-05, "loss": 3.016, "step": 6310 }, { "epoch": 1.07428182899881, "grad_norm": 7.990253448486328, "learning_rate": 1.611422743498215e-05, "loss": 3.0887, "step": 6320 }, { "epoch": 1.075981642019378, "grad_norm": 18.19045639038086, "learning_rate": 1.613972463029067e-05, "loss": 2.9577, "step": 6330 }, { "epoch": 1.0776814550399456, "grad_norm": 17.04360580444336, "learning_rate": 1.6165221825599185e-05, "loss": 2.8723, "step": 6340 }, { "epoch": 1.0793812680605133, "grad_norm": 13.9346284866333, "learning_rate": 1.61907190209077e-05, "loss": 2.925, "step": 6350 }, { "epoch": 1.0810810810810811, "grad_norm": 10.968546867370605, "learning_rate": 1.6216216216216218e-05, "loss": 3.0226, "step": 6360 }, { "epoch": 1.0827808941016488, "grad_norm": 17.10808753967285, "learning_rate": 1.6241713411524733e-05, "loss": 3.0176, "step": 6370 }, { "epoch": 1.0844807071222164, "grad_norm": 11.325939178466797, "learning_rate": 1.6267210606833248e-05, "loss": 3.1996, "step": 6380 }, { "epoch": 1.0861805201427843, "grad_norm": 12.284127235412598, "learning_rate": 1.6292707802141766e-05, "loss": 3.1934, "step": 6390 }, { "epoch": 1.087880333163352, "grad_norm": 12.81438159942627, "learning_rate": 1.631820499745028e-05, "loss": 2.9695, "step": 6400 }, { "epoch": 1.0895801461839199, "grad_norm": 12.256128311157227, "learning_rate": 1.63437021927588e-05, "loss": 2.8376, "step": 6410 }, { "epoch": 1.0912799592044875, "grad_norm": 11.762933731079102, "learning_rate": 1.6369199388067314e-05, "loss": 3.172, "step": 6420 }, { "epoch": 1.0929797722250552, "grad_norm": 23.581899642944336, "learning_rate": 1.639469658337583e-05, "loss": 3.1097, "step": 6430 }, { "epoch": 1.094679585245623, "grad_norm": 14.523329734802246, "learning_rate": 1.6420193778684348e-05, "loss": 2.9236, "step": 6440 }, { "epoch": 1.0963793982661907, "grad_norm": 12.841472625732422, "learning_rate": 1.644569097399286e-05, "loss": 2.7919, "step": 6450 }, { "epoch": 1.0980792112867586, "grad_norm": 13.873420715332031, "learning_rate": 1.6471188169301377e-05, "loss": 3.1072, "step": 6460 }, { "epoch": 1.0997790243073262, "grad_norm": 14.535904884338379, "learning_rate": 1.6496685364609892e-05, "loss": 3.0173, "step": 6470 }, { "epoch": 1.1014788373278939, "grad_norm": 12.890002250671387, "learning_rate": 1.6522182559918407e-05, "loss": 3.0749, "step": 6480 }, { "epoch": 1.1031786503484617, "grad_norm": 17.308191299438477, "learning_rate": 1.6547679755226926e-05, "loss": 3.2062, "step": 6490 }, { "epoch": 1.1048784633690294, "grad_norm": 12.21264362335205, "learning_rate": 1.657317695053544e-05, "loss": 2.9969, "step": 6500 }, { "epoch": 1.106578276389597, "grad_norm": 15.282565116882324, "learning_rate": 1.6598674145843955e-05, "loss": 3.1357, "step": 6510 }, { "epoch": 1.108278089410165, "grad_norm": 9.713172912597656, "learning_rate": 1.6624171341152474e-05, "loss": 3.006, "step": 6520 }, { "epoch": 1.1099779024307326, "grad_norm": 8.177983283996582, "learning_rate": 1.664966853646099e-05, "loss": 2.9272, "step": 6530 }, { "epoch": 1.1116777154513005, "grad_norm": 13.164339065551758, "learning_rate": 1.6675165731769507e-05, "loss": 3.2712, "step": 6540 }, { "epoch": 1.113377528471868, "grad_norm": 14.968598365783691, "learning_rate": 1.6700662927078022e-05, "loss": 3.0847, "step": 6550 }, { "epoch": 1.1150773414924358, "grad_norm": 14.435781478881836, "learning_rate": 1.6726160122386537e-05, "loss": 2.9161, "step": 6560 }, { "epoch": 1.1167771545130036, "grad_norm": 11.859112739562988, "learning_rate": 1.6751657317695055e-05, "loss": 2.8785, "step": 6570 }, { "epoch": 1.1184769675335713, "grad_norm": 10.635931968688965, "learning_rate": 1.677715451300357e-05, "loss": 3.1069, "step": 6580 }, { "epoch": 1.120176780554139, "grad_norm": 7.384354114532471, "learning_rate": 1.6802651708312085e-05, "loss": 3.0149, "step": 6590 }, { "epoch": 1.1218765935747068, "grad_norm": 11.08924674987793, "learning_rate": 1.6828148903620603e-05, "loss": 3.0963, "step": 6600 }, { "epoch": 1.1235764065952745, "grad_norm": 14.599223136901855, "learning_rate": 1.6853646098929118e-05, "loss": 3.0298, "step": 6610 }, { "epoch": 1.1252762196158423, "grad_norm": 12.829401016235352, "learning_rate": 1.6879143294237636e-05, "loss": 3.1239, "step": 6620 }, { "epoch": 1.12697603263641, "grad_norm": 9.800748825073242, "learning_rate": 1.690464048954615e-05, "loss": 3.0247, "step": 6630 }, { "epoch": 1.1286758456569776, "grad_norm": 14.746355056762695, "learning_rate": 1.6930137684854666e-05, "loss": 3.013, "step": 6640 }, { "epoch": 1.1303756586775455, "grad_norm": 12.41847038269043, "learning_rate": 1.6955634880163185e-05, "loss": 2.911, "step": 6650 }, { "epoch": 1.1320754716981132, "grad_norm": 14.528356552124023, "learning_rate": 1.69811320754717e-05, "loss": 3.2038, "step": 6660 }, { "epoch": 1.133775284718681, "grad_norm": 12.187211990356445, "learning_rate": 1.7006629270780218e-05, "loss": 3.1185, "step": 6670 }, { "epoch": 1.1354750977392487, "grad_norm": 9.254138946533203, "learning_rate": 1.7032126466088733e-05, "loss": 3.0945, "step": 6680 }, { "epoch": 1.1371749107598164, "grad_norm": 13.008185386657715, "learning_rate": 1.7057623661397244e-05, "loss": 2.9821, "step": 6690 }, { "epoch": 1.1388747237803842, "grad_norm": 9.45468807220459, "learning_rate": 1.7083120856705762e-05, "loss": 3.165, "step": 6700 }, { "epoch": 1.1405745368009519, "grad_norm": 26.485496520996094, "learning_rate": 1.7108618052014277e-05, "loss": 3.0611, "step": 6710 }, { "epoch": 1.1422743498215195, "grad_norm": 13.66334056854248, "learning_rate": 1.7134115247322792e-05, "loss": 3.0125, "step": 6720 }, { "epoch": 1.1439741628420874, "grad_norm": 9.747392654418945, "learning_rate": 1.715961244263131e-05, "loss": 2.947, "step": 6730 }, { "epoch": 1.145673975862655, "grad_norm": 12.67062759399414, "learning_rate": 1.7185109637939826e-05, "loss": 2.9515, "step": 6740 }, { "epoch": 1.1473737888832227, "grad_norm": 17.260221481323242, "learning_rate": 1.7210606833248344e-05, "loss": 3.0861, "step": 6750 }, { "epoch": 1.1490736019037906, "grad_norm": 14.28956127166748, "learning_rate": 1.723610402855686e-05, "loss": 3.2736, "step": 6760 }, { "epoch": 1.1507734149243583, "grad_norm": 18.848146438598633, "learning_rate": 1.7261601223865374e-05, "loss": 2.8968, "step": 6770 }, { "epoch": 1.1524732279449261, "grad_norm": 17.548362731933594, "learning_rate": 1.7287098419173892e-05, "loss": 2.8636, "step": 6780 }, { "epoch": 1.1541730409654938, "grad_norm": 12.525976181030273, "learning_rate": 1.7312595614482407e-05, "loss": 2.8148, "step": 6790 }, { "epoch": 1.1558728539860614, "grad_norm": 12.815458297729492, "learning_rate": 1.7338092809790922e-05, "loss": 3.0839, "step": 6800 }, { "epoch": 1.1575726670066293, "grad_norm": 26.64970588684082, "learning_rate": 1.736359000509944e-05, "loss": 2.9094, "step": 6810 }, { "epoch": 1.159272480027197, "grad_norm": 13.567337036132812, "learning_rate": 1.7389087200407955e-05, "loss": 2.9992, "step": 6820 }, { "epoch": 1.1609722930477648, "grad_norm": 9.671055793762207, "learning_rate": 1.7414584395716473e-05, "loss": 3.0388, "step": 6830 }, { "epoch": 1.1626721060683325, "grad_norm": 11.772592544555664, "learning_rate": 1.7440081591024988e-05, "loss": 2.819, "step": 6840 }, { "epoch": 1.1643719190889001, "grad_norm": 13.36852741241455, "learning_rate": 1.7465578786333503e-05, "loss": 3.2133, "step": 6850 }, { "epoch": 1.166071732109468, "grad_norm": 27.12263298034668, "learning_rate": 1.749107598164202e-05, "loss": 2.8378, "step": 6860 }, { "epoch": 1.1677715451300357, "grad_norm": 21.8685302734375, "learning_rate": 1.7516573176950536e-05, "loss": 3.101, "step": 6870 }, { "epoch": 1.1694713581506035, "grad_norm": 23.275184631347656, "learning_rate": 1.754207037225905e-05, "loss": 2.8587, "step": 6880 }, { "epoch": 1.1711711711711712, "grad_norm": 21.47266387939453, "learning_rate": 1.756756756756757e-05, "loss": 3.0971, "step": 6890 }, { "epoch": 1.1728709841917389, "grad_norm": 17.569372177124023, "learning_rate": 1.7593064762876085e-05, "loss": 3.0578, "step": 6900 }, { "epoch": 1.1745707972123067, "grad_norm": 10.636435508728027, "learning_rate": 1.7618561958184603e-05, "loss": 3.078, "step": 6910 }, { "epoch": 1.1762706102328744, "grad_norm": 12.987581253051758, "learning_rate": 1.7644059153493118e-05, "loss": 3.3262, "step": 6920 }, { "epoch": 1.177970423253442, "grad_norm": 18.341596603393555, "learning_rate": 1.766955634880163e-05, "loss": 3.2076, "step": 6930 }, { "epoch": 1.17967023627401, "grad_norm": 11.43285083770752, "learning_rate": 1.7695053544110148e-05, "loss": 3.1162, "step": 6940 }, { "epoch": 1.1813700492945776, "grad_norm": 13.126506805419922, "learning_rate": 1.7720550739418662e-05, "loss": 2.9847, "step": 6950 }, { "epoch": 1.1830698623151452, "grad_norm": 11.771278381347656, "learning_rate": 1.774604793472718e-05, "loss": 3.0281, "step": 6960 }, { "epoch": 1.184769675335713, "grad_norm": 11.74142074584961, "learning_rate": 1.7771545130035696e-05, "loss": 3.1432, "step": 6970 }, { "epoch": 1.1864694883562807, "grad_norm": 21.51688575744629, "learning_rate": 1.779704232534421e-05, "loss": 3.1124, "step": 6980 }, { "epoch": 1.1881693013768486, "grad_norm": 14.624879837036133, "learning_rate": 1.782253952065273e-05, "loss": 2.7382, "step": 6990 }, { "epoch": 1.1898691143974163, "grad_norm": 17.312400817871094, "learning_rate": 1.7848036715961244e-05, "loss": 2.9122, "step": 7000 }, { "epoch": 1.191568927417984, "grad_norm": 10.444293975830078, "learning_rate": 1.787353391126976e-05, "loss": 2.7194, "step": 7010 }, { "epoch": 1.1932687404385518, "grad_norm": 9.001030921936035, "learning_rate": 1.7899031106578277e-05, "loss": 3.1923, "step": 7020 }, { "epoch": 1.1949685534591195, "grad_norm": 12.978578567504883, "learning_rate": 1.7924528301886792e-05, "loss": 3.0652, "step": 7030 }, { "epoch": 1.1966683664796873, "grad_norm": 15.727500915527344, "learning_rate": 1.795002549719531e-05, "loss": 3.1765, "step": 7040 }, { "epoch": 1.198368179500255, "grad_norm": 13.436139106750488, "learning_rate": 1.7975522692503825e-05, "loss": 3.1298, "step": 7050 }, { "epoch": 1.2000679925208226, "grad_norm": 13.43437671661377, "learning_rate": 1.800101988781234e-05, "loss": 3.0795, "step": 7060 }, { "epoch": 1.2017678055413905, "grad_norm": 9.875482559204102, "learning_rate": 1.802651708312086e-05, "loss": 3.1162, "step": 7070 }, { "epoch": 1.2034676185619582, "grad_norm": 11.638504028320312, "learning_rate": 1.8052014278429373e-05, "loss": 3.1171, "step": 7080 }, { "epoch": 1.205167431582526, "grad_norm": 10.11965274810791, "learning_rate": 1.8077511473737888e-05, "loss": 2.9787, "step": 7090 }, { "epoch": 1.2068672446030937, "grad_norm": 14.256173133850098, "learning_rate": 1.8103008669046407e-05, "loss": 3.1437, "step": 7100 }, { "epoch": 1.2085670576236613, "grad_norm": 12.809945106506348, "learning_rate": 1.812850586435492e-05, "loss": 3.0786, "step": 7110 }, { "epoch": 1.2102668706442292, "grad_norm": 10.670147895812988, "learning_rate": 1.815400305966344e-05, "loss": 2.9494, "step": 7120 }, { "epoch": 1.2119666836647969, "grad_norm": 11.446723937988281, "learning_rate": 1.8179500254971955e-05, "loss": 3.0632, "step": 7130 }, { "epoch": 1.2136664966853645, "grad_norm": 16.948373794555664, "learning_rate": 1.820499745028047e-05, "loss": 2.9932, "step": 7140 }, { "epoch": 1.2153663097059324, "grad_norm": 13.426396369934082, "learning_rate": 1.8230494645588988e-05, "loss": 2.9176, "step": 7150 }, { "epoch": 1.2170661227265, "grad_norm": 12.522746086120605, "learning_rate": 1.8255991840897503e-05, "loss": 3.0878, "step": 7160 }, { "epoch": 1.2187659357470677, "grad_norm": 9.861989974975586, "learning_rate": 1.8281489036206018e-05, "loss": 3.0832, "step": 7170 }, { "epoch": 1.2204657487676356, "grad_norm": 10.964983940124512, "learning_rate": 1.8306986231514533e-05, "loss": 3.2162, "step": 7180 }, { "epoch": 1.2221655617882032, "grad_norm": 12.789056777954102, "learning_rate": 1.8332483426823048e-05, "loss": 3.0841, "step": 7190 }, { "epoch": 1.2238653748087711, "grad_norm": 10.298905372619629, "learning_rate": 1.8357980622131566e-05, "loss": 2.9117, "step": 7200 }, { "epoch": 1.2255651878293388, "grad_norm": 18.561683654785156, "learning_rate": 1.838347781744008e-05, "loss": 3.0043, "step": 7210 }, { "epoch": 1.2272650008499064, "grad_norm": 14.204672813415527, "learning_rate": 1.8408975012748596e-05, "loss": 3.0402, "step": 7220 }, { "epoch": 1.2289648138704743, "grad_norm": 9.594409942626953, "learning_rate": 1.8434472208057114e-05, "loss": 2.9908, "step": 7230 }, { "epoch": 1.230664626891042, "grad_norm": 14.57271957397461, "learning_rate": 1.845996940336563e-05, "loss": 3.1288, "step": 7240 }, { "epoch": 1.2323644399116098, "grad_norm": 13.430442810058594, "learning_rate": 1.8485466598674147e-05, "loss": 3.212, "step": 7250 }, { "epoch": 1.2340642529321775, "grad_norm": 13.934501647949219, "learning_rate": 1.8510963793982662e-05, "loss": 3.1047, "step": 7260 }, { "epoch": 1.2357640659527451, "grad_norm": 17.572248458862305, "learning_rate": 1.8536460989291177e-05, "loss": 3.1705, "step": 7270 }, { "epoch": 1.237463878973313, "grad_norm": 12.18200969696045, "learning_rate": 1.8561958184599695e-05, "loss": 3.0037, "step": 7280 }, { "epoch": 1.2391636919938807, "grad_norm": 14.953913688659668, "learning_rate": 1.858745537990821e-05, "loss": 3.1243, "step": 7290 }, { "epoch": 1.2408635050144485, "grad_norm": 9.771578788757324, "learning_rate": 1.8612952575216725e-05, "loss": 3.229, "step": 7300 }, { "epoch": 1.2425633180350162, "grad_norm": 243.3424835205078, "learning_rate": 1.8638449770525243e-05, "loss": 2.7579, "step": 7310 }, { "epoch": 1.2442631310555838, "grad_norm": 43.55302429199219, "learning_rate": 1.866394696583376e-05, "loss": 3.0827, "step": 7320 }, { "epoch": 1.2459629440761517, "grad_norm": 12.697163581848145, "learning_rate": 1.8689444161142277e-05, "loss": 3.1059, "step": 7330 }, { "epoch": 1.2476627570967194, "grad_norm": 14.187751770019531, "learning_rate": 1.871494135645079e-05, "loss": 2.8352, "step": 7340 }, { "epoch": 1.249362570117287, "grad_norm": 26.43241310119629, "learning_rate": 1.8740438551759307e-05, "loss": 3.2234, "step": 7350 }, { "epoch": 1.251062383137855, "grad_norm": 14.16451358795166, "learning_rate": 1.8765935747067825e-05, "loss": 3.1412, "step": 7360 }, { "epoch": 1.2527621961584225, "grad_norm": 10.739542007446289, "learning_rate": 1.879143294237634e-05, "loss": 3.0718, "step": 7370 }, { "epoch": 1.2544620091789902, "grad_norm": 10.526477813720703, "learning_rate": 1.8816930137684855e-05, "loss": 3.072, "step": 7380 }, { "epoch": 1.256161822199558, "grad_norm": 14.572227478027344, "learning_rate": 1.8842427332993373e-05, "loss": 2.9805, "step": 7390 }, { "epoch": 1.2578616352201257, "grad_norm": 12.407571792602539, "learning_rate": 1.8867924528301888e-05, "loss": 3.043, "step": 7400 }, { "epoch": 1.2595614482406936, "grad_norm": 9.804975509643555, "learning_rate": 1.8893421723610406e-05, "loss": 2.7077, "step": 7410 }, { "epoch": 1.2612612612612613, "grad_norm": 9.84636402130127, "learning_rate": 1.8918918918918918e-05, "loss": 3.1496, "step": 7420 }, { "epoch": 1.262961074281829, "grad_norm": 11.418081283569336, "learning_rate": 1.8944416114227433e-05, "loss": 3.0033, "step": 7430 }, { "epoch": 1.2646608873023968, "grad_norm": 20.194503784179688, "learning_rate": 1.896991330953595e-05, "loss": 2.9609, "step": 7440 }, { "epoch": 1.2663607003229644, "grad_norm": 10.2821626663208, "learning_rate": 1.8995410504844466e-05, "loss": 3.1288, "step": 7450 }, { "epoch": 1.2680605133435323, "grad_norm": 16.4918155670166, "learning_rate": 1.9020907700152984e-05, "loss": 3.2673, "step": 7460 }, { "epoch": 1.2697603263641, "grad_norm": 11.809102058410645, "learning_rate": 1.90464048954615e-05, "loss": 2.9157, "step": 7470 }, { "epoch": 1.2714601393846676, "grad_norm": 9.084770202636719, "learning_rate": 1.9071902090770014e-05, "loss": 3.0724, "step": 7480 }, { "epoch": 1.2731599524052355, "grad_norm": 12.093840599060059, "learning_rate": 1.9097399286078532e-05, "loss": 3.0655, "step": 7490 }, { "epoch": 1.2748597654258031, "grad_norm": 11.359443664550781, "learning_rate": 1.9122896481387047e-05, "loss": 2.9765, "step": 7500 }, { "epoch": 1.276559578446371, "grad_norm": 17.8306884765625, "learning_rate": 1.9148393676695562e-05, "loss": 3.0798, "step": 7510 }, { "epoch": 1.2782593914669387, "grad_norm": 15.105916976928711, "learning_rate": 1.917389087200408e-05, "loss": 2.9924, "step": 7520 }, { "epoch": 1.2799592044875063, "grad_norm": 17.16417121887207, "learning_rate": 1.9199388067312595e-05, "loss": 2.9137, "step": 7530 }, { "epoch": 1.281659017508074, "grad_norm": 16.7780704498291, "learning_rate": 1.9224885262621114e-05, "loss": 3.0256, "step": 7540 }, { "epoch": 1.2833588305286419, "grad_norm": 9.9179048538208, "learning_rate": 1.925038245792963e-05, "loss": 3.0809, "step": 7550 }, { "epoch": 1.2850586435492095, "grad_norm": 19.033756256103516, "learning_rate": 1.9275879653238143e-05, "loss": 3.0842, "step": 7560 }, { "epoch": 1.2867584565697774, "grad_norm": 9.363567352294922, "learning_rate": 1.9301376848546662e-05, "loss": 2.9887, "step": 7570 }, { "epoch": 1.288458269590345, "grad_norm": 14.625814437866211, "learning_rate": 1.9326874043855177e-05, "loss": 3.0456, "step": 7580 }, { "epoch": 1.2901580826109127, "grad_norm": 13.298257827758789, "learning_rate": 1.935237123916369e-05, "loss": 3.0165, "step": 7590 }, { "epoch": 1.2918578956314806, "grad_norm": 9.660453796386719, "learning_rate": 1.937786843447221e-05, "loss": 3.2365, "step": 7600 }, { "epoch": 1.2935577086520482, "grad_norm": 21.580326080322266, "learning_rate": 1.9403365629780725e-05, "loss": 3.0523, "step": 7610 }, { "epoch": 1.295257521672616, "grad_norm": 15.572149276733398, "learning_rate": 1.9428862825089243e-05, "loss": 2.8809, "step": 7620 }, { "epoch": 1.2969573346931837, "grad_norm": 12.981857299804688, "learning_rate": 1.9454360020397758e-05, "loss": 2.8692, "step": 7630 }, { "epoch": 1.2986571477137514, "grad_norm": 13.053767204284668, "learning_rate": 1.9479857215706273e-05, "loss": 2.9619, "step": 7640 }, { "epoch": 1.3003569607343193, "grad_norm": 9.44816780090332, "learning_rate": 1.950535441101479e-05, "loss": 2.9896, "step": 7650 }, { "epoch": 1.302056773754887, "grad_norm": 31.114248275756836, "learning_rate": 1.9530851606323303e-05, "loss": 3.1041, "step": 7660 }, { "epoch": 1.3037565867754548, "grad_norm": 11.939348220825195, "learning_rate": 1.955634880163182e-05, "loss": 2.842, "step": 7670 }, { "epoch": 1.3054563997960225, "grad_norm": 12.616188049316406, "learning_rate": 1.9581845996940336e-05, "loss": 3.246, "step": 7680 }, { "epoch": 1.3071562128165901, "grad_norm": 15.646702766418457, "learning_rate": 1.960734319224885e-05, "loss": 2.9215, "step": 7690 }, { "epoch": 1.308856025837158, "grad_norm": 19.00981330871582, "learning_rate": 1.963284038755737e-05, "loss": 2.9218, "step": 7700 }, { "epoch": 1.3105558388577256, "grad_norm": 24.865867614746094, "learning_rate": 1.9658337582865884e-05, "loss": 3.1307, "step": 7710 }, { "epoch": 1.3122556518782935, "grad_norm": 11.946696281433105, "learning_rate": 1.96838347781744e-05, "loss": 3.2213, "step": 7720 }, { "epoch": 1.3139554648988612, "grad_norm": 9.410223960876465, "learning_rate": 1.9709331973482917e-05, "loss": 2.9962, "step": 7730 }, { "epoch": 1.3156552779194288, "grad_norm": 15.763139724731445, "learning_rate": 1.9734829168791432e-05, "loss": 3.0125, "step": 7740 }, { "epoch": 1.3173550909399965, "grad_norm": 12.755207061767578, "learning_rate": 1.976032636409995e-05, "loss": 3.1159, "step": 7750 }, { "epoch": 1.3190549039605644, "grad_norm": 9.701370239257812, "learning_rate": 1.9785823559408466e-05, "loss": 2.9169, "step": 7760 }, { "epoch": 1.320754716981132, "grad_norm": 16.62944221496582, "learning_rate": 1.981132075471698e-05, "loss": 3.0466, "step": 7770 }, { "epoch": 1.3224545300016999, "grad_norm": 10.310956954956055, "learning_rate": 1.98368179500255e-05, "loss": 2.8878, "step": 7780 }, { "epoch": 1.3241543430222675, "grad_norm": 12.797650337219238, "learning_rate": 1.9862315145334014e-05, "loss": 3.1119, "step": 7790 }, { "epoch": 1.3258541560428352, "grad_norm": 14.332683563232422, "learning_rate": 1.988781234064253e-05, "loss": 2.9204, "step": 7800 }, { "epoch": 1.327553969063403, "grad_norm": 22.58984375, "learning_rate": 1.9913309535951047e-05, "loss": 3.1383, "step": 7810 }, { "epoch": 1.3292537820839707, "grad_norm": 14.02595329284668, "learning_rate": 1.9938806731259562e-05, "loss": 3.2798, "step": 7820 }, { "epoch": 1.3309535951045386, "grad_norm": 17.99853515625, "learning_rate": 1.996430392656808e-05, "loss": 3.2347, "step": 7830 }, { "epoch": 1.3326534081251062, "grad_norm": 15.793841361999512, "learning_rate": 1.9989801121876595e-05, "loss": 3.1547, "step": 7840 }, { "epoch": 1.334353221145674, "grad_norm": 10.859179496765137, "learning_rate": 2.001529831718511e-05, "loss": 3.2228, "step": 7850 }, { "epoch": 1.3360530341662418, "grad_norm": 20.197622299194336, "learning_rate": 2.0040795512493628e-05, "loss": 3.1188, "step": 7860 }, { "epoch": 1.3377528471868094, "grad_norm": 10.827670097351074, "learning_rate": 2.0066292707802143e-05, "loss": 3.1229, "step": 7870 }, { "epoch": 1.3394526602073773, "grad_norm": 11.646002769470215, "learning_rate": 2.009178990311066e-05, "loss": 3.183, "step": 7880 }, { "epoch": 1.341152473227945, "grad_norm": 12.546407699584961, "learning_rate": 2.0117287098419176e-05, "loss": 2.8805, "step": 7890 }, { "epoch": 1.3428522862485126, "grad_norm": 13.032623291015625, "learning_rate": 2.014278429372769e-05, "loss": 2.9907, "step": 7900 }, { "epoch": 1.3445520992690805, "grad_norm": 11.428818702697754, "learning_rate": 2.0168281489036206e-05, "loss": 3.1729, "step": 7910 }, { "epoch": 1.3462519122896481, "grad_norm": 21.600074768066406, "learning_rate": 2.019377868434472e-05, "loss": 3.1369, "step": 7920 }, { "epoch": 1.347951725310216, "grad_norm": 18.934057235717773, "learning_rate": 2.0219275879653236e-05, "loss": 3.1962, "step": 7930 }, { "epoch": 1.3496515383307837, "grad_norm": 12.927472114562988, "learning_rate": 2.0244773074961754e-05, "loss": 2.9427, "step": 7940 }, { "epoch": 1.3513513513513513, "grad_norm": 11.822232246398926, "learning_rate": 2.027027027027027e-05, "loss": 3.0934, "step": 7950 }, { "epoch": 1.353051164371919, "grad_norm": 14.031065940856934, "learning_rate": 2.0295767465578788e-05, "loss": 3.3011, "step": 7960 }, { "epoch": 1.3547509773924868, "grad_norm": 12.469574928283691, "learning_rate": 2.0321264660887302e-05, "loss": 3.0171, "step": 7970 }, { "epoch": 1.3564507904130545, "grad_norm": 12.283653259277344, "learning_rate": 2.0346761856195817e-05, "loss": 2.9324, "step": 7980 }, { "epoch": 1.3581506034336224, "grad_norm": 15.912969589233398, "learning_rate": 2.0372259051504336e-05, "loss": 3.1774, "step": 7990 }, { "epoch": 1.35985041645419, "grad_norm": 13.61784553527832, "learning_rate": 2.039775624681285e-05, "loss": 3.1607, "step": 8000 }, { "epoch": 1.3615502294747577, "grad_norm": 13.22695255279541, "learning_rate": 2.0423253442121365e-05, "loss": 3.102, "step": 8010 }, { "epoch": 1.3632500424953256, "grad_norm": 22.547258377075195, "learning_rate": 2.0448750637429884e-05, "loss": 3.0463, "step": 8020 }, { "epoch": 1.3649498555158932, "grad_norm": 30.61237907409668, "learning_rate": 2.04742478327384e-05, "loss": 2.8922, "step": 8030 }, { "epoch": 1.366649668536461, "grad_norm": 19.90154457092285, "learning_rate": 2.0499745028046917e-05, "loss": 3.103, "step": 8040 }, { "epoch": 1.3683494815570287, "grad_norm": 21.14404296875, "learning_rate": 2.0525242223355432e-05, "loss": 3.0243, "step": 8050 }, { "epoch": 1.3700492945775964, "grad_norm": 16.316911697387695, "learning_rate": 2.0550739418663947e-05, "loss": 2.9898, "step": 8060 }, { "epoch": 1.3717491075981643, "grad_norm": 14.796309471130371, "learning_rate": 2.0576236613972465e-05, "loss": 2.9907, "step": 8070 }, { "epoch": 1.373448920618732, "grad_norm": 10.7953462600708, "learning_rate": 2.060173380928098e-05, "loss": 2.9633, "step": 8080 }, { "epoch": 1.3751487336392998, "grad_norm": 15.130738258361816, "learning_rate": 2.0627231004589495e-05, "loss": 3.1096, "step": 8090 }, { "epoch": 1.3768485466598674, "grad_norm": 10.62794303894043, "learning_rate": 2.0652728199898013e-05, "loss": 3.034, "step": 8100 }, { "epoch": 1.378548359680435, "grad_norm": 12.66087532043457, "learning_rate": 2.0678225395206528e-05, "loss": 3.1855, "step": 8110 }, { "epoch": 1.3802481727010028, "grad_norm": 15.631053924560547, "learning_rate": 2.0703722590515047e-05, "loss": 3.0341, "step": 8120 }, { "epoch": 1.3819479857215706, "grad_norm": 14.039857864379883, "learning_rate": 2.072921978582356e-05, "loss": 3.2653, "step": 8130 }, { "epoch": 1.3836477987421385, "grad_norm": 15.475055694580078, "learning_rate": 2.0754716981132076e-05, "loss": 2.9539, "step": 8140 }, { "epoch": 1.3853476117627062, "grad_norm": 17.58354949951172, "learning_rate": 2.078021417644059e-05, "loss": 3.1304, "step": 8150 }, { "epoch": 1.3870474247832738, "grad_norm": 14.014993667602539, "learning_rate": 2.0805711371749106e-05, "loss": 3.087, "step": 8160 }, { "epoch": 1.3887472378038415, "grad_norm": 14.675745010375977, "learning_rate": 2.0831208567057624e-05, "loss": 2.9925, "step": 8170 }, { "epoch": 1.3904470508244093, "grad_norm": 12.922043800354004, "learning_rate": 2.085670576236614e-05, "loss": 3.0823, "step": 8180 }, { "epoch": 1.392146863844977, "grad_norm": 19.662792205810547, "learning_rate": 2.0882202957674654e-05, "loss": 3.124, "step": 8190 }, { "epoch": 1.3938466768655449, "grad_norm": 15.762495040893555, "learning_rate": 2.0907700152983173e-05, "loss": 3.038, "step": 8200 }, { "epoch": 1.3955464898861125, "grad_norm": 7.737033843994141, "learning_rate": 2.0933197348291688e-05, "loss": 3.1165, "step": 8210 }, { "epoch": 1.3972463029066802, "grad_norm": 20.177358627319336, "learning_rate": 2.0958694543600202e-05, "loss": 2.8769, "step": 8220 }, { "epoch": 1.398946115927248, "grad_norm": 20.954530715942383, "learning_rate": 2.098419173890872e-05, "loss": 3.1316, "step": 8230 }, { "epoch": 1.4006459289478157, "grad_norm": 13.44528865814209, "learning_rate": 2.1009688934217236e-05, "loss": 3.0505, "step": 8240 }, { "epoch": 1.4023457419683836, "grad_norm": 9.868990898132324, "learning_rate": 2.1035186129525754e-05, "loss": 3.0473, "step": 8250 }, { "epoch": 1.4040455549889512, "grad_norm": 11.001752853393555, "learning_rate": 2.106068332483427e-05, "loss": 3.1307, "step": 8260 }, { "epoch": 1.4057453680095189, "grad_norm": 21.834836959838867, "learning_rate": 2.1086180520142784e-05, "loss": 3.0165, "step": 8270 }, { "epoch": 1.4074451810300868, "grad_norm": 11.86886215209961, "learning_rate": 2.1111677715451302e-05, "loss": 3.3347, "step": 8280 }, { "epoch": 1.4091449940506544, "grad_norm": 17.726633071899414, "learning_rate": 2.1137174910759817e-05, "loss": 3.1155, "step": 8290 }, { "epoch": 1.4108448070712223, "grad_norm": 20.329246520996094, "learning_rate": 2.1162672106068332e-05, "loss": 3.0758, "step": 8300 }, { "epoch": 1.41254462009179, "grad_norm": 13.640406608581543, "learning_rate": 2.118816930137685e-05, "loss": 3.2184, "step": 8310 }, { "epoch": 1.4142444331123576, "grad_norm": 14.782703399658203, "learning_rate": 2.1213666496685365e-05, "loss": 2.9826, "step": 8320 }, { "epoch": 1.4159442461329252, "grad_norm": 9.962373733520508, "learning_rate": 2.1239163691993883e-05, "loss": 3.0632, "step": 8330 }, { "epoch": 1.4176440591534931, "grad_norm": 12.189846992492676, "learning_rate": 2.12646608873024e-05, "loss": 3.2155, "step": 8340 }, { "epoch": 1.4193438721740608, "grad_norm": 14.436613082885742, "learning_rate": 2.1290158082610913e-05, "loss": 3.1101, "step": 8350 }, { "epoch": 1.4210436851946286, "grad_norm": 16.37892723083496, "learning_rate": 2.131565527791943e-05, "loss": 3.1008, "step": 8360 }, { "epoch": 1.4227434982151963, "grad_norm": 14.486772537231445, "learning_rate": 2.1341152473227946e-05, "loss": 3.1352, "step": 8370 }, { "epoch": 1.424443311235764, "grad_norm": 10.904356002807617, "learning_rate": 2.1366649668536465e-05, "loss": 3.2587, "step": 8380 }, { "epoch": 1.4261431242563318, "grad_norm": 12.078149795532227, "learning_rate": 2.1392146863844976e-05, "loss": 3.062, "step": 8390 }, { "epoch": 1.4278429372768995, "grad_norm": 12.008806228637695, "learning_rate": 2.141764405915349e-05, "loss": 3.1847, "step": 8400 }, { "epoch": 1.4295427502974674, "grad_norm": 11.759283065795898, "learning_rate": 2.144314125446201e-05, "loss": 2.9522, "step": 8410 }, { "epoch": 1.431242563318035, "grad_norm": 13.322396278381348, "learning_rate": 2.1468638449770524e-05, "loss": 3.1565, "step": 8420 }, { "epoch": 1.4329423763386027, "grad_norm": 14.88284683227539, "learning_rate": 2.149413564507904e-05, "loss": 3.1211, "step": 8430 }, { "epoch": 1.4346421893591705, "grad_norm": 20.200000762939453, "learning_rate": 2.1519632840387558e-05, "loss": 3.1026, "step": 8440 }, { "epoch": 1.4363420023797382, "grad_norm": 11.946788787841797, "learning_rate": 2.1545130035696073e-05, "loss": 3.0136, "step": 8450 }, { "epoch": 1.438041815400306, "grad_norm": 10.828475952148438, "learning_rate": 2.157062723100459e-05, "loss": 3.052, "step": 8460 }, { "epoch": 1.4397416284208737, "grad_norm": 9.034709930419922, "learning_rate": 2.1596124426313106e-05, "loss": 3.2829, "step": 8470 }, { "epoch": 1.4414414414414414, "grad_norm": 15.506436347961426, "learning_rate": 2.162162162162162e-05, "loss": 2.9704, "step": 8480 }, { "epoch": 1.4431412544620092, "grad_norm": 12.725008964538574, "learning_rate": 2.164711881693014e-05, "loss": 2.9229, "step": 8490 }, { "epoch": 1.444841067482577, "grad_norm": 11.64745044708252, "learning_rate": 2.1672616012238654e-05, "loss": 3.1805, "step": 8500 }, { "epoch": 1.4465408805031448, "grad_norm": 10.070914268493652, "learning_rate": 2.169811320754717e-05, "loss": 3.1998, "step": 8510 }, { "epoch": 1.4482406935237124, "grad_norm": 12.921521186828613, "learning_rate": 2.1723610402855687e-05, "loss": 2.9301, "step": 8520 }, { "epoch": 1.44994050654428, "grad_norm": 12.510153770446777, "learning_rate": 2.1749107598164202e-05, "loss": 3.1665, "step": 8530 }, { "epoch": 1.4516403195648477, "grad_norm": 10.809615135192871, "learning_rate": 2.177460479347272e-05, "loss": 3.0722, "step": 8540 }, { "epoch": 1.4533401325854156, "grad_norm": 10.12088394165039, "learning_rate": 2.1800101988781235e-05, "loss": 3.0917, "step": 8550 }, { "epoch": 1.4550399456059833, "grad_norm": 37.011199951171875, "learning_rate": 2.182559918408975e-05, "loss": 3.1957, "step": 8560 }, { "epoch": 1.4567397586265511, "grad_norm": 12.212715148925781, "learning_rate": 2.185109637939827e-05, "loss": 3.375, "step": 8570 }, { "epoch": 1.4584395716471188, "grad_norm": 15.460896492004395, "learning_rate": 2.1876593574706783e-05, "loss": 2.8686, "step": 8580 }, { "epoch": 1.4601393846676864, "grad_norm": 15.197768211364746, "learning_rate": 2.19020907700153e-05, "loss": 3.2764, "step": 8590 }, { "epoch": 1.4618391976882543, "grad_norm": 15.658273696899414, "learning_rate": 2.1927587965323817e-05, "loss": 3.1296, "step": 8600 }, { "epoch": 1.463539010708822, "grad_norm": 14.4297513961792, "learning_rate": 2.195308516063233e-05, "loss": 3.0803, "step": 8610 }, { "epoch": 1.4652388237293898, "grad_norm": 11.141242980957031, "learning_rate": 2.197858235594085e-05, "loss": 3.0378, "step": 8620 }, { "epoch": 1.4669386367499575, "grad_norm": 12.657317161560059, "learning_rate": 2.2004079551249365e-05, "loss": 3.1714, "step": 8630 }, { "epoch": 1.4686384497705252, "grad_norm": 13.440191268920898, "learning_rate": 2.2029576746557876e-05, "loss": 3.1926, "step": 8640 }, { "epoch": 1.470338262791093, "grad_norm": 12.996848106384277, "learning_rate": 2.2055073941866395e-05, "loss": 3.0202, "step": 8650 }, { "epoch": 1.4720380758116607, "grad_norm": 12.40601634979248, "learning_rate": 2.208057113717491e-05, "loss": 2.8135, "step": 8660 }, { "epoch": 1.4737378888322286, "grad_norm": 9.624019622802734, "learning_rate": 2.2106068332483428e-05, "loss": 3.177, "step": 8670 }, { "epoch": 1.4754377018527962, "grad_norm": 16.426067352294922, "learning_rate": 2.2131565527791943e-05, "loss": 3.1741, "step": 8680 }, { "epoch": 1.4771375148733639, "grad_norm": 18.758520126342773, "learning_rate": 2.2157062723100458e-05, "loss": 3.1382, "step": 8690 }, { "epoch": 1.4788373278939317, "grad_norm": 16.009050369262695, "learning_rate": 2.2182559918408976e-05, "loss": 3.1591, "step": 8700 }, { "epoch": 1.4805371409144994, "grad_norm": 16.065515518188477, "learning_rate": 2.220805711371749e-05, "loss": 3.1225, "step": 8710 }, { "epoch": 1.4822369539350673, "grad_norm": 8.14550495147705, "learning_rate": 2.2233554309026006e-05, "loss": 3.1322, "step": 8720 }, { "epoch": 1.483936766955635, "grad_norm": 11.860363960266113, "learning_rate": 2.2259051504334524e-05, "loss": 3.0781, "step": 8730 }, { "epoch": 1.4856365799762026, "grad_norm": 18.73965835571289, "learning_rate": 2.228454869964304e-05, "loss": 3.2028, "step": 8740 }, { "epoch": 1.4873363929967702, "grad_norm": 13.070472717285156, "learning_rate": 2.2310045894951557e-05, "loss": 3.0779, "step": 8750 }, { "epoch": 1.489036206017338, "grad_norm": 10.734965324401855, "learning_rate": 2.2335543090260072e-05, "loss": 3.141, "step": 8760 }, { "epoch": 1.4907360190379058, "grad_norm": 13.652433395385742, "learning_rate": 2.2361040285568587e-05, "loss": 2.7494, "step": 8770 }, { "epoch": 1.4924358320584736, "grad_norm": 16.33147430419922, "learning_rate": 2.2386537480877105e-05, "loss": 2.9212, "step": 8780 }, { "epoch": 1.4941356450790413, "grad_norm": 16.899085998535156, "learning_rate": 2.241203467618562e-05, "loss": 3.1701, "step": 8790 }, { "epoch": 1.495835458099609, "grad_norm": 15.629868507385254, "learning_rate": 2.2437531871494135e-05, "loss": 2.9947, "step": 8800 }, { "epoch": 1.4975352711201768, "grad_norm": 13.467726707458496, "learning_rate": 2.2463029066802654e-05, "loss": 3.0021, "step": 8810 }, { "epoch": 1.4992350841407445, "grad_norm": 10.5439453125, "learning_rate": 2.248852626211117e-05, "loss": 3.0953, "step": 8820 }, { "epoch": 1.5009348971613123, "grad_norm": 12.284975051879883, "learning_rate": 2.2514023457419687e-05, "loss": 2.8334, "step": 8830 }, { "epoch": 1.50263471018188, "grad_norm": 10.325065612792969, "learning_rate": 2.2539520652728202e-05, "loss": 3.09, "step": 8840 }, { "epoch": 1.5043345232024476, "grad_norm": 13.635150909423828, "learning_rate": 2.2565017848036717e-05, "loss": 2.9894, "step": 8850 }, { "epoch": 1.5060343362230153, "grad_norm": 16.824420928955078, "learning_rate": 2.2590515043345235e-05, "loss": 2.9126, "step": 8860 }, { "epoch": 1.5077341492435832, "grad_norm": 22.245872497558594, "learning_rate": 2.261601223865375e-05, "loss": 3.146, "step": 8870 }, { "epoch": 1.509433962264151, "grad_norm": 7.902062892913818, "learning_rate": 2.2641509433962265e-05, "loss": 3.1016, "step": 8880 }, { "epoch": 1.5111337752847187, "grad_norm": 13.17785358428955, "learning_rate": 2.266700662927078e-05, "loss": 2.9965, "step": 8890 }, { "epoch": 1.5128335883052864, "grad_norm": 18.236968994140625, "learning_rate": 2.2692503824579295e-05, "loss": 3.0833, "step": 8900 }, { "epoch": 1.514533401325854, "grad_norm": 13.454537391662598, "learning_rate": 2.2718001019887813e-05, "loss": 3.175, "step": 8910 }, { "epoch": 1.5162332143464219, "grad_norm": 8.267361640930176, "learning_rate": 2.2743498215196328e-05, "loss": 3.0134, "step": 8920 }, { "epoch": 1.5179330273669898, "grad_norm": 12.939946174621582, "learning_rate": 2.2768995410504843e-05, "loss": 2.9473, "step": 8930 }, { "epoch": 1.5196328403875574, "grad_norm": 13.714632987976074, "learning_rate": 2.279449260581336e-05, "loss": 3.0711, "step": 8940 }, { "epoch": 1.521332653408125, "grad_norm": 15.263347625732422, "learning_rate": 2.2819989801121876e-05, "loss": 2.9899, "step": 8950 }, { "epoch": 1.5230324664286927, "grad_norm": 14.545839309692383, "learning_rate": 2.2845486996430394e-05, "loss": 3.0869, "step": 8960 }, { "epoch": 1.5247322794492606, "grad_norm": 14.119864463806152, "learning_rate": 2.287098419173891e-05, "loss": 3.036, "step": 8970 }, { "epoch": 1.5264320924698285, "grad_norm": 8.738968849182129, "learning_rate": 2.2896481387047424e-05, "loss": 2.9955, "step": 8980 }, { "epoch": 1.5281319054903961, "grad_norm": 13.439234733581543, "learning_rate": 2.2921978582355942e-05, "loss": 3.13, "step": 8990 }, { "epoch": 1.5298317185109638, "grad_norm": 15.679054260253906, "learning_rate": 2.2947475777664457e-05, "loss": 3.0977, "step": 9000 }, { "epoch": 1.5315315315315314, "grad_norm": 9.525003433227539, "learning_rate": 2.2972972972972972e-05, "loss": 3.1178, "step": 9010 }, { "epoch": 1.5332313445520993, "grad_norm": 11.532698631286621, "learning_rate": 2.299847016828149e-05, "loss": 2.9891, "step": 9020 }, { "epoch": 1.534931157572667, "grad_norm": 9.909059524536133, "learning_rate": 2.3023967363590005e-05, "loss": 3.1189, "step": 9030 }, { "epoch": 1.5366309705932348, "grad_norm": 12.237615585327148, "learning_rate": 2.3049464558898524e-05, "loss": 3.0543, "step": 9040 }, { "epoch": 1.5383307836138025, "grad_norm": 9.620635986328125, "learning_rate": 2.307496175420704e-05, "loss": 3.1177, "step": 9050 }, { "epoch": 1.5400305966343701, "grad_norm": 10.415854454040527, "learning_rate": 2.3100458949515554e-05, "loss": 2.9816, "step": 9060 }, { "epoch": 1.5417304096549378, "grad_norm": 21.93893051147461, "learning_rate": 2.3125956144824072e-05, "loss": 3.0006, "step": 9070 }, { "epoch": 1.5434302226755057, "grad_norm": 14.692625045776367, "learning_rate": 2.3151453340132587e-05, "loss": 2.9222, "step": 9080 }, { "epoch": 1.5451300356960735, "grad_norm": 19.11028289794922, "learning_rate": 2.3176950535441102e-05, "loss": 2.9743, "step": 9090 }, { "epoch": 1.5468298487166412, "grad_norm": 12.724831581115723, "learning_rate": 2.320244773074962e-05, "loss": 3.0559, "step": 9100 }, { "epoch": 1.5485296617372089, "grad_norm": 23.454076766967773, "learning_rate": 2.3227944926058135e-05, "loss": 3.142, "step": 9110 }, { "epoch": 1.5502294747577765, "grad_norm": 12.75560188293457, "learning_rate": 2.325344212136665e-05, "loss": 2.9886, "step": 9120 }, { "epoch": 1.5519292877783444, "grad_norm": 16.101659774780273, "learning_rate": 2.3278939316675165e-05, "loss": 3.099, "step": 9130 }, { "epoch": 1.5536291007989123, "grad_norm": 10.55557918548584, "learning_rate": 2.330443651198368e-05, "loss": 3.0109, "step": 9140 }, { "epoch": 1.55532891381948, "grad_norm": 8.483447074890137, "learning_rate": 2.3329933707292198e-05, "loss": 3.1191, "step": 9150 }, { "epoch": 1.5570287268400476, "grad_norm": 9.856464385986328, "learning_rate": 2.3355430902600713e-05, "loss": 3.1045, "step": 9160 }, { "epoch": 1.5587285398606152, "grad_norm": 12.279275894165039, "learning_rate": 2.338092809790923e-05, "loss": 3.1495, "step": 9170 }, { "epoch": 1.560428352881183, "grad_norm": 12.995061874389648, "learning_rate": 2.3406425293217746e-05, "loss": 3.1697, "step": 9180 }, { "epoch": 1.562128165901751, "grad_norm": 16.226409912109375, "learning_rate": 2.343192248852626e-05, "loss": 3.1473, "step": 9190 }, { "epoch": 1.5638279789223186, "grad_norm": 15.853055953979492, "learning_rate": 2.345741968383478e-05, "loss": 3.0008, "step": 9200 }, { "epoch": 1.5655277919428863, "grad_norm": 8.692983627319336, "learning_rate": 2.3482916879143294e-05, "loss": 3.0528, "step": 9210 }, { "epoch": 1.567227604963454, "grad_norm": 16.771629333496094, "learning_rate": 2.350841407445181e-05, "loss": 3.1725, "step": 9220 }, { "epoch": 1.5689274179840218, "grad_norm": 15.087244987487793, "learning_rate": 2.3533911269760327e-05, "loss": 2.9083, "step": 9230 }, { "epoch": 1.5706272310045895, "grad_norm": 12.088857650756836, "learning_rate": 2.3559408465068842e-05, "loss": 2.8321, "step": 9240 }, { "epoch": 1.5723270440251573, "grad_norm": 17.293304443359375, "learning_rate": 2.358490566037736e-05, "loss": 3.2994, "step": 9250 }, { "epoch": 1.574026857045725, "grad_norm": 13.236509323120117, "learning_rate": 2.3610402855685876e-05, "loss": 3.0677, "step": 9260 }, { "epoch": 1.5757266700662926, "grad_norm": 13.878504753112793, "learning_rate": 2.363590005099439e-05, "loss": 3.0326, "step": 9270 }, { "epoch": 1.5774264830868603, "grad_norm": 16.27393913269043, "learning_rate": 2.366139724630291e-05, "loss": 3.0995, "step": 9280 }, { "epoch": 1.5791262961074282, "grad_norm": 14.988511085510254, "learning_rate": 2.3686894441611424e-05, "loss": 3.2366, "step": 9290 }, { "epoch": 1.580826109127996, "grad_norm": 8.975615501403809, "learning_rate": 2.371239163691994e-05, "loss": 2.9638, "step": 9300 }, { "epoch": 1.5825259221485637, "grad_norm": 9.618557929992676, "learning_rate": 2.3737888832228457e-05, "loss": 3.0493, "step": 9310 }, { "epoch": 1.5842257351691313, "grad_norm": 11.818693161010742, "learning_rate": 2.3763386027536972e-05, "loss": 2.9928, "step": 9320 }, { "epoch": 1.585925548189699, "grad_norm": 10.354353904724121, "learning_rate": 2.378888322284549e-05, "loss": 3.2684, "step": 9330 }, { "epoch": 1.5876253612102669, "grad_norm": 12.327958106994629, "learning_rate": 2.3814380418154005e-05, "loss": 3.0907, "step": 9340 }, { "epoch": 1.5893251742308347, "grad_norm": 9.211414337158203, "learning_rate": 2.383987761346252e-05, "loss": 3.1251, "step": 9350 }, { "epoch": 1.5910249872514024, "grad_norm": 10.507803916931152, "learning_rate": 2.3865374808771035e-05, "loss": 2.9404, "step": 9360 }, { "epoch": 1.59272480027197, "grad_norm": 9.457756042480469, "learning_rate": 2.389087200407955e-05, "loss": 3.1824, "step": 9370 }, { "epoch": 1.5944246132925377, "grad_norm": 15.2610502243042, "learning_rate": 2.3916369199388068e-05, "loss": 3.1289, "step": 9380 }, { "epoch": 1.5961244263131056, "grad_norm": 17.74786376953125, "learning_rate": 2.3941866394696583e-05, "loss": 3.027, "step": 9390 }, { "epoch": 1.5978242393336735, "grad_norm": 14.728708267211914, "learning_rate": 2.3967363590005098e-05, "loss": 3.282, "step": 9400 }, { "epoch": 1.599524052354241, "grad_norm": 13.18935489654541, "learning_rate": 2.3992860785313616e-05, "loss": 3.0336, "step": 9410 }, { "epoch": 1.6012238653748088, "grad_norm": 22.41632080078125, "learning_rate": 2.401835798062213e-05, "loss": 3.0166, "step": 9420 }, { "epoch": 1.6029236783953764, "grad_norm": 8.655169486999512, "learning_rate": 2.4043855175930646e-05, "loss": 3.2189, "step": 9430 }, { "epoch": 1.6046234914159443, "grad_norm": 14.43822193145752, "learning_rate": 2.4069352371239164e-05, "loss": 2.9633, "step": 9440 }, { "epoch": 1.606323304436512, "grad_norm": 23.929428100585938, "learning_rate": 2.409484956654768e-05, "loss": 2.9646, "step": 9450 }, { "epoch": 1.6080231174570798, "grad_norm": 20.42289924621582, "learning_rate": 2.4120346761856198e-05, "loss": 3.1621, "step": 9460 }, { "epoch": 1.6097229304776475, "grad_norm": 12.508589744567871, "learning_rate": 2.4145843957164713e-05, "loss": 2.8942, "step": 9470 }, { "epoch": 1.6114227434982151, "grad_norm": 16.926647186279297, "learning_rate": 2.4171341152473227e-05, "loss": 2.9532, "step": 9480 }, { "epoch": 1.6131225565187828, "grad_norm": 11.688694953918457, "learning_rate": 2.4196838347781746e-05, "loss": 3.0919, "step": 9490 }, { "epoch": 1.6148223695393507, "grad_norm": 16.890682220458984, "learning_rate": 2.422233554309026e-05, "loss": 3.2033, "step": 9500 }, { "epoch": 1.6165221825599185, "grad_norm": 8.956343650817871, "learning_rate": 2.4247832738398776e-05, "loss": 3.066, "step": 9510 }, { "epoch": 1.6182219955804862, "grad_norm": 10.823946952819824, "learning_rate": 2.4273329933707294e-05, "loss": 3.2322, "step": 9520 }, { "epoch": 1.6199218086010538, "grad_norm": 16.82953453063965, "learning_rate": 2.429882712901581e-05, "loss": 3.2117, "step": 9530 }, { "epoch": 1.6216216216216215, "grad_norm": 14.498383522033691, "learning_rate": 2.4324324324324327e-05, "loss": 3.0084, "step": 9540 }, { "epoch": 1.6233214346421894, "grad_norm": 11.117939949035645, "learning_rate": 2.4349821519632842e-05, "loss": 3.2322, "step": 9550 }, { "epoch": 1.6250212476627572, "grad_norm": 10.250185012817383, "learning_rate": 2.4375318714941357e-05, "loss": 2.8855, "step": 9560 }, { "epoch": 1.626721060683325, "grad_norm": 12.66822624206543, "learning_rate": 2.4400815910249875e-05, "loss": 3.0842, "step": 9570 }, { "epoch": 1.6284208737038925, "grad_norm": 10.270153045654297, "learning_rate": 2.442631310555839e-05, "loss": 3.2431, "step": 9580 }, { "epoch": 1.6301206867244602, "grad_norm": 9.241272926330566, "learning_rate": 2.4451810300866905e-05, "loss": 2.8856, "step": 9590 }, { "epoch": 1.631820499745028, "grad_norm": 9.125298500061035, "learning_rate": 2.4477307496175423e-05, "loss": 2.8702, "step": 9600 }, { "epoch": 1.633520312765596, "grad_norm": 17.72060775756836, "learning_rate": 2.4502804691483935e-05, "loss": 3.122, "step": 9610 }, { "epoch": 1.6352201257861636, "grad_norm": 14.890719413757324, "learning_rate": 2.4528301886792453e-05, "loss": 3.2781, "step": 9620 }, { "epoch": 1.6369199388067313, "grad_norm": 13.4912691116333, "learning_rate": 2.4553799082100968e-05, "loss": 2.9239, "step": 9630 }, { "epoch": 1.638619751827299, "grad_norm": 14.433952331542969, "learning_rate": 2.4579296277409483e-05, "loss": 2.978, "step": 9640 }, { "epoch": 1.6403195648478668, "grad_norm": 13.481905937194824, "learning_rate": 2.4604793472718e-05, "loss": 3.0181, "step": 9650 }, { "epoch": 1.6420193778684344, "grad_norm": 14.872626304626465, "learning_rate": 2.4630290668026516e-05, "loss": 2.9878, "step": 9660 }, { "epoch": 1.6437191908890023, "grad_norm": 12.66200065612793, "learning_rate": 2.4655787863335035e-05, "loss": 3.0663, "step": 9670 }, { "epoch": 1.64541900390957, "grad_norm": 17.555879592895508, "learning_rate": 2.468128505864355e-05, "loss": 3.0183, "step": 9680 }, { "epoch": 1.6471188169301376, "grad_norm": 9.04166316986084, "learning_rate": 2.4706782253952064e-05, "loss": 3.0794, "step": 9690 }, { "epoch": 1.6488186299507053, "grad_norm": 8.414446830749512, "learning_rate": 2.4732279449260583e-05, "loss": 3.2936, "step": 9700 }, { "epoch": 1.6505184429712731, "grad_norm": 13.67115306854248, "learning_rate": 2.4757776644569098e-05, "loss": 2.9166, "step": 9710 }, { "epoch": 1.652218255991841, "grad_norm": 22.532974243164062, "learning_rate": 2.4783273839877613e-05, "loss": 2.9215, "step": 9720 }, { "epoch": 1.6539180690124087, "grad_norm": 26.578327178955078, "learning_rate": 2.480877103518613e-05, "loss": 2.9999, "step": 9730 }, { "epoch": 1.6556178820329763, "grad_norm": 20.17474937438965, "learning_rate": 2.4834268230494646e-05, "loss": 2.9945, "step": 9740 }, { "epoch": 1.657317695053544, "grad_norm": 15.339580535888672, "learning_rate": 2.4859765425803164e-05, "loss": 3.2904, "step": 9750 }, { "epoch": 1.6590175080741119, "grad_norm": 11.24197006225586, "learning_rate": 2.488526262111168e-05, "loss": 2.8743, "step": 9760 }, { "epoch": 1.6607173210946797, "grad_norm": 10.03195571899414, "learning_rate": 2.4910759816420194e-05, "loss": 3.1405, "step": 9770 }, { "epoch": 1.6624171341152474, "grad_norm": 16.137435913085938, "learning_rate": 2.4936257011728712e-05, "loss": 2.9659, "step": 9780 }, { "epoch": 1.664116947135815, "grad_norm": 12.558754920959473, "learning_rate": 2.4961754207037227e-05, "loss": 3.1015, "step": 9790 }, { "epoch": 1.6658167601563827, "grad_norm": 9.514764785766602, "learning_rate": 2.4987251402345742e-05, "loss": 2.9197, "step": 9800 }, { "epoch": 1.6675165731769506, "grad_norm": 64.32532501220703, "learning_rate": 2.501274859765426e-05, "loss": 3.045, "step": 9810 }, { "epoch": 1.6692163861975182, "grad_norm": 17.993694305419922, "learning_rate": 2.5038245792962775e-05, "loss": 3.0396, "step": 9820 }, { "epoch": 1.670916199218086, "grad_norm": 15.291401863098145, "learning_rate": 2.5063742988271294e-05, "loss": 3.0152, "step": 9830 }, { "epoch": 1.6726160122386537, "grad_norm": 16.732561111450195, "learning_rate": 2.508924018357981e-05, "loss": 3.0015, "step": 9840 }, { "epoch": 1.6743158252592214, "grad_norm": 12.280149459838867, "learning_rate": 2.511473737888832e-05, "loss": 2.9739, "step": 9850 }, { "epoch": 1.6760156382797893, "grad_norm": 12.673494338989258, "learning_rate": 2.514023457419684e-05, "loss": 3.2576, "step": 9860 }, { "epoch": 1.677715451300357, "grad_norm": 16.353870391845703, "learning_rate": 2.5165731769505353e-05, "loss": 3.0256, "step": 9870 }, { "epoch": 1.6794152643209248, "grad_norm": 13.80148983001709, "learning_rate": 2.519122896481387e-05, "loss": 2.9585, "step": 9880 }, { "epoch": 1.6811150773414925, "grad_norm": 12.019322395324707, "learning_rate": 2.5216726160122386e-05, "loss": 3.2657, "step": 9890 }, { "epoch": 1.6828148903620601, "grad_norm": 11.782510757446289, "learning_rate": 2.52422233554309e-05, "loss": 3.0861, "step": 9900 }, { "epoch": 1.6845147033826278, "grad_norm": 12.43201732635498, "learning_rate": 2.526772055073942e-05, "loss": 3.0792, "step": 9910 }, { "epoch": 1.6862145164031956, "grad_norm": 8.911182403564453, "learning_rate": 2.5293217746047935e-05, "loss": 3.0748, "step": 9920 }, { "epoch": 1.6879143294237635, "grad_norm": 12.666579246520996, "learning_rate": 2.531871494135645e-05, "loss": 3.0534, "step": 9930 }, { "epoch": 1.6896141424443312, "grad_norm": 12.415400505065918, "learning_rate": 2.5344212136664968e-05, "loss": 3.0489, "step": 9940 }, { "epoch": 1.6913139554648988, "grad_norm": 11.13748836517334, "learning_rate": 2.5369709331973483e-05, "loss": 3.1056, "step": 9950 }, { "epoch": 1.6930137684854665, "grad_norm": 7.347387313842773, "learning_rate": 2.5395206527282e-05, "loss": 3.0959, "step": 9960 }, { "epoch": 1.6947135815060343, "grad_norm": 9.008841514587402, "learning_rate": 2.5420703722590516e-05, "loss": 3.1674, "step": 9970 }, { "epoch": 1.6964133945266022, "grad_norm": 24.168949127197266, "learning_rate": 2.544620091789903e-05, "loss": 2.8582, "step": 9980 }, { "epoch": 1.6981132075471699, "grad_norm": 9.098365783691406, "learning_rate": 2.547169811320755e-05, "loss": 2.9733, "step": 9990 }, { "epoch": 1.6998130205677375, "grad_norm": 15.936201095581055, "learning_rate": 2.5497195308516064e-05, "loss": 3.0612, "step": 10000 }, { "epoch": 1.7015128335883052, "grad_norm": 11.394989013671875, "learning_rate": 2.552269250382458e-05, "loss": 3.1502, "step": 10010 }, { "epoch": 1.703212646608873, "grad_norm": 10.217010498046875, "learning_rate": 2.5548189699133097e-05, "loss": 2.9994, "step": 10020 }, { "epoch": 1.7049124596294407, "grad_norm": 12.912842750549316, "learning_rate": 2.5573686894441612e-05, "loss": 3.3233, "step": 10030 }, { "epoch": 1.7066122726500086, "grad_norm": 11.944778442382812, "learning_rate": 2.559918408975013e-05, "loss": 2.9674, "step": 10040 }, { "epoch": 1.7083120856705762, "grad_norm": 13.85303020477295, "learning_rate": 2.5624681285058645e-05, "loss": 3.0929, "step": 10050 }, { "epoch": 1.710011898691144, "grad_norm": 9.793182373046875, "learning_rate": 2.565017848036716e-05, "loss": 3.1764, "step": 10060 }, { "epoch": 1.7117117117117115, "grad_norm": 12.058687210083008, "learning_rate": 2.567567567567568e-05, "loss": 3.0991, "step": 10070 }, { "epoch": 1.7134115247322794, "grad_norm": 20.667728424072266, "learning_rate": 2.5701172870984194e-05, "loss": 3.1233, "step": 10080 }, { "epoch": 1.7151113377528473, "grad_norm": 17.827726364135742, "learning_rate": 2.572667006629271e-05, "loss": 2.9616, "step": 10090 }, { "epoch": 1.716811150773415, "grad_norm": 19.678693771362305, "learning_rate": 2.5752167261601223e-05, "loss": 3.1292, "step": 10100 }, { "epoch": 1.7185109637939826, "grad_norm": 21.549644470214844, "learning_rate": 2.577766445690974e-05, "loss": 2.9357, "step": 10110 }, { "epoch": 1.7202107768145503, "grad_norm": 11.769120216369629, "learning_rate": 2.5803161652218257e-05, "loss": 3.0685, "step": 10120 }, { "epoch": 1.7219105898351181, "grad_norm": 15.037254333496094, "learning_rate": 2.582865884752677e-05, "loss": 3.3615, "step": 10130 }, { "epoch": 1.723610402855686, "grad_norm": 10.696980476379395, "learning_rate": 2.5854156042835286e-05, "loss": 3.0553, "step": 10140 }, { "epoch": 1.7253102158762537, "grad_norm": 38.634700775146484, "learning_rate": 2.5879653238143805e-05, "loss": 2.9165, "step": 10150 }, { "epoch": 1.7270100288968213, "grad_norm": 11.326424598693848, "learning_rate": 2.590515043345232e-05, "loss": 3.1094, "step": 10160 }, { "epoch": 1.728709841917389, "grad_norm": 10.94000244140625, "learning_rate": 2.5930647628760838e-05, "loss": 3.3651, "step": 10170 }, { "epoch": 1.7304096549379568, "grad_norm": 16.26889419555664, "learning_rate": 2.5956144824069353e-05, "loss": 2.8055, "step": 10180 }, { "epoch": 1.7321094679585247, "grad_norm": 10.302118301391602, "learning_rate": 2.5981642019377868e-05, "loss": 3.1245, "step": 10190 }, { "epoch": 1.7338092809790924, "grad_norm": 15.514974594116211, "learning_rate": 2.6007139214686386e-05, "loss": 3.0726, "step": 10200 }, { "epoch": 1.73550909399966, "grad_norm": 11.697123527526855, "learning_rate": 2.60326364099949e-05, "loss": 2.992, "step": 10210 }, { "epoch": 1.7372089070202277, "grad_norm": 10.282608985900879, "learning_rate": 2.6058133605303416e-05, "loss": 3.1441, "step": 10220 }, { "epoch": 1.7389087200407956, "grad_norm": 13.142204284667969, "learning_rate": 2.6083630800611934e-05, "loss": 3.2075, "step": 10230 }, { "epoch": 1.7406085330613632, "grad_norm": 18.205181121826172, "learning_rate": 2.610912799592045e-05, "loss": 2.9126, "step": 10240 }, { "epoch": 1.742308346081931, "grad_norm": 11.395153045654297, "learning_rate": 2.6134625191228967e-05, "loss": 3.2134, "step": 10250 }, { "epoch": 1.7440081591024987, "grad_norm": 17.13705825805664, "learning_rate": 2.6160122386537482e-05, "loss": 3.1542, "step": 10260 }, { "epoch": 1.7457079721230664, "grad_norm": 10.764301300048828, "learning_rate": 2.6185619581845997e-05, "loss": 3.2281, "step": 10270 }, { "epoch": 1.747407785143634, "grad_norm": 11.020539283752441, "learning_rate": 2.6211116777154516e-05, "loss": 2.8658, "step": 10280 }, { "epoch": 1.749107598164202, "grad_norm": 13.097972869873047, "learning_rate": 2.623661397246303e-05, "loss": 3.1907, "step": 10290 }, { "epoch": 1.7508074111847698, "grad_norm": 15.172438621520996, "learning_rate": 2.6262111167771545e-05, "loss": 3.2517, "step": 10300 }, { "epoch": 1.7525072242053374, "grad_norm": 23.982074737548828, "learning_rate": 2.6287608363080064e-05, "loss": 2.897, "step": 10310 }, { "epoch": 1.754207037225905, "grad_norm": 11.554465293884277, "learning_rate": 2.631310555838858e-05, "loss": 2.7821, "step": 10320 }, { "epoch": 1.7559068502464728, "grad_norm": 10.759733200073242, "learning_rate": 2.6338602753697097e-05, "loss": 2.8871, "step": 10330 }, { "epoch": 1.7576066632670406, "grad_norm": 12.058603286743164, "learning_rate": 2.636409994900561e-05, "loss": 3.2842, "step": 10340 }, { "epoch": 1.7593064762876085, "grad_norm": 11.994332313537598, "learning_rate": 2.6389597144314123e-05, "loss": 2.9787, "step": 10350 }, { "epoch": 1.7610062893081762, "grad_norm": 14.308028221130371, "learning_rate": 2.641509433962264e-05, "loss": 2.9784, "step": 10360 }, { "epoch": 1.7627061023287438, "grad_norm": 10.300353050231934, "learning_rate": 2.6440591534931157e-05, "loss": 3.0796, "step": 10370 }, { "epoch": 1.7644059153493115, "grad_norm": 11.345549583435059, "learning_rate": 2.6466088730239675e-05, "loss": 3.1136, "step": 10380 }, { "epoch": 1.7661057283698793, "grad_norm": 32.1320686340332, "learning_rate": 2.649158592554819e-05, "loss": 2.8398, "step": 10390 }, { "epoch": 1.7678055413904472, "grad_norm": 14.721816062927246, "learning_rate": 2.6517083120856705e-05, "loss": 2.9746, "step": 10400 }, { "epoch": 1.7695053544110149, "grad_norm": 21.266986846923828, "learning_rate": 2.6542580316165223e-05, "loss": 3.2042, "step": 10410 }, { "epoch": 1.7712051674315825, "grad_norm": 14.275193214416504, "learning_rate": 2.6568077511473738e-05, "loss": 3.0421, "step": 10420 }, { "epoch": 1.7729049804521502, "grad_norm": 11.373222351074219, "learning_rate": 2.6593574706782253e-05, "loss": 3.0566, "step": 10430 }, { "epoch": 1.774604793472718, "grad_norm": 8.127288818359375, "learning_rate": 2.661907190209077e-05, "loss": 3.0767, "step": 10440 }, { "epoch": 1.7763046064932857, "grad_norm": 13.60678768157959, "learning_rate": 2.6644569097399286e-05, "loss": 3.3167, "step": 10450 }, { "epoch": 1.7780044195138536, "grad_norm": 12.122283935546875, "learning_rate": 2.6670066292707804e-05, "loss": 3.0882, "step": 10460 }, { "epoch": 1.7797042325344212, "grad_norm": 19.52994728088379, "learning_rate": 2.669556348801632e-05, "loss": 3.1002, "step": 10470 }, { "epoch": 1.7814040455549889, "grad_norm": 12.169440269470215, "learning_rate": 2.6721060683324834e-05, "loss": 2.9183, "step": 10480 }, { "epoch": 1.7831038585755565, "grad_norm": 11.62039852142334, "learning_rate": 2.6746557878633353e-05, "loss": 3.1596, "step": 10490 }, { "epoch": 1.7848036715961244, "grad_norm": 12.235337257385254, "learning_rate": 2.6772055073941867e-05, "loss": 2.9644, "step": 10500 }, { "epoch": 1.7865034846166923, "grad_norm": 14.475881576538086, "learning_rate": 2.6797552269250382e-05, "loss": 2.9825, "step": 10510 }, { "epoch": 1.78820329763726, "grad_norm": 30.871639251708984, "learning_rate": 2.68230494645589e-05, "loss": 3.1642, "step": 10520 }, { "epoch": 1.7899031106578276, "grad_norm": 17.65963363647461, "learning_rate": 2.6848546659867416e-05, "loss": 3.0555, "step": 10530 }, { "epoch": 1.7916029236783952, "grad_norm": 13.783432960510254, "learning_rate": 2.6874043855175934e-05, "loss": 2.7079, "step": 10540 }, { "epoch": 1.7933027366989631, "grad_norm": 12.940052032470703, "learning_rate": 2.689954105048445e-05, "loss": 3.0871, "step": 10550 }, { "epoch": 1.795002549719531, "grad_norm": 19.161487579345703, "learning_rate": 2.6925038245792964e-05, "loss": 2.862, "step": 10560 }, { "epoch": 1.7967023627400986, "grad_norm": 13.911806106567383, "learning_rate": 2.6950535441101482e-05, "loss": 3.1748, "step": 10570 }, { "epoch": 1.7984021757606663, "grad_norm": 9.570555686950684, "learning_rate": 2.6976032636409994e-05, "loss": 2.9016, "step": 10580 }, { "epoch": 1.800101988781234, "grad_norm": 13.897273063659668, "learning_rate": 2.7001529831718512e-05, "loss": 3.1598, "step": 10590 }, { "epoch": 1.8018018018018018, "grad_norm": 10.44074821472168, "learning_rate": 2.7027027027027027e-05, "loss": 3.0022, "step": 10600 }, { "epoch": 1.8035016148223697, "grad_norm": 12.89862060546875, "learning_rate": 2.705252422233554e-05, "loss": 2.8838, "step": 10610 }, { "epoch": 1.8052014278429374, "grad_norm": 16.25481414794922, "learning_rate": 2.707802141764406e-05, "loss": 2.8314, "step": 10620 }, { "epoch": 1.806901240863505, "grad_norm": 11.072563171386719, "learning_rate": 2.7103518612952575e-05, "loss": 3.1239, "step": 10630 }, { "epoch": 1.8086010538840727, "grad_norm": 8.108997344970703, "learning_rate": 2.712901580826109e-05, "loss": 3.0596, "step": 10640 }, { "epoch": 1.8103008669046405, "grad_norm": 11.663498878479004, "learning_rate": 2.7154513003569608e-05, "loss": 3.154, "step": 10650 }, { "epoch": 1.8120006799252082, "grad_norm": 13.18729019165039, "learning_rate": 2.7180010198878123e-05, "loss": 2.9232, "step": 10660 }, { "epoch": 1.813700492945776, "grad_norm": 17.385366439819336, "learning_rate": 2.720550739418664e-05, "loss": 3.3772, "step": 10670 }, { "epoch": 1.8154003059663437, "grad_norm": 15.720002174377441, "learning_rate": 2.7231004589495156e-05, "loss": 3.0788, "step": 10680 }, { "epoch": 1.8171001189869114, "grad_norm": 13.382634162902832, "learning_rate": 2.725650178480367e-05, "loss": 3.1322, "step": 10690 }, { "epoch": 1.818799932007479, "grad_norm": 22.140411376953125, "learning_rate": 2.728199898011219e-05, "loss": 2.7896, "step": 10700 }, { "epoch": 1.820499745028047, "grad_norm": 9.080121994018555, "learning_rate": 2.7307496175420704e-05, "loss": 3.1521, "step": 10710 }, { "epoch": 1.8221995580486148, "grad_norm": 13.225481033325195, "learning_rate": 2.733299337072922e-05, "loss": 2.9932, "step": 10720 }, { "epoch": 1.8238993710691824, "grad_norm": 12.550629615783691, "learning_rate": 2.7358490566037738e-05, "loss": 3.3243, "step": 10730 }, { "epoch": 1.82559918408975, "grad_norm": 12.914381980895996, "learning_rate": 2.7383987761346253e-05, "loss": 3.1611, "step": 10740 }, { "epoch": 1.8272989971103177, "grad_norm": 12.797728538513184, "learning_rate": 2.740948495665477e-05, "loss": 2.8778, "step": 10750 }, { "epoch": 1.8289988101308856, "grad_norm": 11.922527313232422, "learning_rate": 2.7434982151963286e-05, "loss": 2.8066, "step": 10760 }, { "epoch": 1.8306986231514535, "grad_norm": 11.116738319396973, "learning_rate": 2.74604793472718e-05, "loss": 3.0192, "step": 10770 }, { "epoch": 1.8323984361720211, "grad_norm": 13.908686637878418, "learning_rate": 2.748597654258032e-05, "loss": 2.9237, "step": 10780 }, { "epoch": 1.8340982491925888, "grad_norm": 19.420122146606445, "learning_rate": 2.7511473737888834e-05, "loss": 3.0423, "step": 10790 }, { "epoch": 1.8357980622131564, "grad_norm": 11.174956321716309, "learning_rate": 2.753697093319735e-05, "loss": 3.182, "step": 10800 }, { "epoch": 1.8374978752337243, "grad_norm": 11.500720024108887, "learning_rate": 2.7562468128505867e-05, "loss": 3.0178, "step": 10810 }, { "epoch": 1.839197688254292, "grad_norm": 18.878711700439453, "learning_rate": 2.758796532381438e-05, "loss": 3.1473, "step": 10820 }, { "epoch": 1.8408975012748598, "grad_norm": 10.75930404663086, "learning_rate": 2.7613462519122897e-05, "loss": 3.0789, "step": 10830 }, { "epoch": 1.8425973142954275, "grad_norm": 14.654841423034668, "learning_rate": 2.7638959714431412e-05, "loss": 3.0527, "step": 10840 }, { "epoch": 1.8442971273159952, "grad_norm": 15.402490615844727, "learning_rate": 2.7664456909739927e-05, "loss": 3.0818, "step": 10850 }, { "epoch": 1.845996940336563, "grad_norm": 14.511011123657227, "learning_rate": 2.7689954105048445e-05, "loss": 3.0071, "step": 10860 }, { "epoch": 1.8476967533571307, "grad_norm": 8.163779258728027, "learning_rate": 2.771545130035696e-05, "loss": 3.1128, "step": 10870 }, { "epoch": 1.8493965663776986, "grad_norm": 18.835479736328125, "learning_rate": 2.7740948495665478e-05, "loss": 3.1945, "step": 10880 }, { "epoch": 1.8510963793982662, "grad_norm": 10.195968627929688, "learning_rate": 2.7766445690973993e-05, "loss": 3.079, "step": 10890 }, { "epoch": 1.8527961924188339, "grad_norm": 11.1441011428833, "learning_rate": 2.7791942886282508e-05, "loss": 3.2217, "step": 10900 }, { "epoch": 1.8544960054394015, "grad_norm": 11.745121955871582, "learning_rate": 2.7817440081591026e-05, "loss": 3.1571, "step": 10910 }, { "epoch": 1.8561958184599694, "grad_norm": 13.270892143249512, "learning_rate": 2.784293727689954e-05, "loss": 3.0323, "step": 10920 }, { "epoch": 1.8578956314805373, "grad_norm": 11.599114418029785, "learning_rate": 2.7868434472208056e-05, "loss": 3.26, "step": 10930 }, { "epoch": 1.859595444501105, "grad_norm": 11.212349891662598, "learning_rate": 2.7893931667516575e-05, "loss": 2.9323, "step": 10940 }, { "epoch": 1.8612952575216726, "grad_norm": 9.489967346191406, "learning_rate": 2.791942886282509e-05, "loss": 3.0791, "step": 10950 }, { "epoch": 1.8629950705422402, "grad_norm": 32.320064544677734, "learning_rate": 2.7944926058133608e-05, "loss": 2.8098, "step": 10960 }, { "epoch": 1.864694883562808, "grad_norm": 16.496370315551758, "learning_rate": 2.7970423253442123e-05, "loss": 2.9431, "step": 10970 }, { "epoch": 1.866394696583376, "grad_norm": 14.174517631530762, "learning_rate": 2.7995920448750638e-05, "loss": 3.0168, "step": 10980 }, { "epoch": 1.8680945096039436, "grad_norm": 13.390837669372559, "learning_rate": 2.8021417644059156e-05, "loss": 2.9159, "step": 10990 }, { "epoch": 1.8697943226245113, "grad_norm": 16.728439331054688, "learning_rate": 2.804691483936767e-05, "loss": 3.0546, "step": 11000 }, { "epoch": 1.871494135645079, "grad_norm": 12.51033878326416, "learning_rate": 2.8072412034676186e-05, "loss": 3.1619, "step": 11010 }, { "epoch": 1.8731939486656468, "grad_norm": 23.473873138427734, "learning_rate": 2.8097909229984704e-05, "loss": 3.0844, "step": 11020 }, { "epoch": 1.8748937616862145, "grad_norm": 13.670848846435547, "learning_rate": 2.812340642529322e-05, "loss": 3.0705, "step": 11030 }, { "epoch": 1.8765935747067823, "grad_norm": 13.73306941986084, "learning_rate": 2.8148903620601737e-05, "loss": 3.1691, "step": 11040 }, { "epoch": 1.87829338772735, "grad_norm": 13.307453155517578, "learning_rate": 2.8174400815910252e-05, "loss": 3.07, "step": 11050 }, { "epoch": 1.8799932007479176, "grad_norm": 11.27647876739502, "learning_rate": 2.8199898011218767e-05, "loss": 2.9465, "step": 11060 }, { "epoch": 1.8816930137684853, "grad_norm": 8.150965690612793, "learning_rate": 2.8225395206527282e-05, "loss": 3.15, "step": 11070 }, { "epoch": 1.8833928267890532, "grad_norm": 10.206308364868164, "learning_rate": 2.8250892401835797e-05, "loss": 2.8848, "step": 11080 }, { "epoch": 1.885092639809621, "grad_norm": 10.754135131835938, "learning_rate": 2.8276389597144315e-05, "loss": 2.9864, "step": 11090 }, { "epoch": 1.8867924528301887, "grad_norm": 16.21930694580078, "learning_rate": 2.830188679245283e-05, "loss": 2.8869, "step": 11100 }, { "epoch": 1.8884922658507564, "grad_norm": 17.4716854095459, "learning_rate": 2.8327383987761345e-05, "loss": 3.0435, "step": 11110 }, { "epoch": 1.890192078871324, "grad_norm": 10.021828651428223, "learning_rate": 2.8352881183069863e-05, "loss": 3.0933, "step": 11120 }, { "epoch": 1.8918918918918919, "grad_norm": 17.883981704711914, "learning_rate": 2.8378378378378378e-05, "loss": 3.3079, "step": 11130 }, { "epoch": 1.8935917049124598, "grad_norm": 9.11367130279541, "learning_rate": 2.8403875573686893e-05, "loss": 2.9601, "step": 11140 }, { "epoch": 1.8952915179330274, "grad_norm": 11.470654487609863, "learning_rate": 2.842937276899541e-05, "loss": 3.1623, "step": 11150 }, { "epoch": 1.896991330953595, "grad_norm": 11.341806411743164, "learning_rate": 2.8454869964303926e-05, "loss": 3.0573, "step": 11160 }, { "epoch": 1.8986911439741627, "grad_norm": 14.870643615722656, "learning_rate": 2.8480367159612445e-05, "loss": 3.1604, "step": 11170 }, { "epoch": 1.9003909569947306, "grad_norm": 10.923785209655762, "learning_rate": 2.850586435492096e-05, "loss": 2.9066, "step": 11180 }, { "epoch": 1.9020907700152985, "grad_norm": 11.490958213806152, "learning_rate": 2.8531361550229475e-05, "loss": 3.1059, "step": 11190 }, { "epoch": 1.9037905830358661, "grad_norm": 13.28293228149414, "learning_rate": 2.8556858745537993e-05, "loss": 3.0651, "step": 11200 }, { "epoch": 1.9054903960564338, "grad_norm": 10.672990798950195, "learning_rate": 2.8582355940846508e-05, "loss": 2.8128, "step": 11210 }, { "epoch": 1.9071902090770014, "grad_norm": 19.645780563354492, "learning_rate": 2.8607853136155023e-05, "loss": 2.8501, "step": 11220 }, { "epoch": 1.9088900220975693, "grad_norm": 14.914410591125488, "learning_rate": 2.863335033146354e-05, "loss": 3.133, "step": 11230 }, { "epoch": 1.910589835118137, "grad_norm": 12.474550247192383, "learning_rate": 2.8658847526772056e-05, "loss": 3.0015, "step": 11240 }, { "epoch": 1.9122896481387048, "grad_norm": 14.144326210021973, "learning_rate": 2.8684344722080574e-05, "loss": 3.0226, "step": 11250 }, { "epoch": 1.9139894611592725, "grad_norm": 21.955459594726562, "learning_rate": 2.870984191738909e-05, "loss": 2.9912, "step": 11260 }, { "epoch": 1.9156892741798401, "grad_norm": 10.635844230651855, "learning_rate": 2.8735339112697604e-05, "loss": 3.045, "step": 11270 }, { "epoch": 1.9173890872004078, "grad_norm": 11.418295860290527, "learning_rate": 2.8760836308006122e-05, "loss": 3.2107, "step": 11280 }, { "epoch": 1.9190889002209757, "grad_norm": 14.690167427062988, "learning_rate": 2.8786333503314637e-05, "loss": 2.9902, "step": 11290 }, { "epoch": 1.9207887132415435, "grad_norm": 16.5681095123291, "learning_rate": 2.8811830698623152e-05, "loss": 3.0248, "step": 11300 }, { "epoch": 1.9224885262621112, "grad_norm": 12.974201202392578, "learning_rate": 2.8837327893931667e-05, "loss": 3.0562, "step": 11310 }, { "epoch": 1.9241883392826789, "grad_norm": 16.653783798217773, "learning_rate": 2.8862825089240182e-05, "loss": 3.0971, "step": 11320 }, { "epoch": 1.9258881523032465, "grad_norm": 10.937787055969238, "learning_rate": 2.88883222845487e-05, "loss": 3.0045, "step": 11330 }, { "epoch": 1.9275879653238144, "grad_norm": 9.910713195800781, "learning_rate": 2.8913819479857215e-05, "loss": 3.0136, "step": 11340 }, { "epoch": 1.9292877783443823, "grad_norm": 15.553596496582031, "learning_rate": 2.893931667516573e-05, "loss": 3.1552, "step": 11350 }, { "epoch": 1.93098759136495, "grad_norm": 14.930377006530762, "learning_rate": 2.896481387047425e-05, "loss": 2.9216, "step": 11360 }, { "epoch": 1.9326874043855176, "grad_norm": 10.934012413024902, "learning_rate": 2.8990311065782763e-05, "loss": 3.0785, "step": 11370 }, { "epoch": 1.9343872174060852, "grad_norm": 8.611066818237305, "learning_rate": 2.901580826109128e-05, "loss": 2.9676, "step": 11380 }, { "epoch": 1.936087030426653, "grad_norm": 10.206334114074707, "learning_rate": 2.9041305456399797e-05, "loss": 2.8133, "step": 11390 }, { "epoch": 1.937786843447221, "grad_norm": 10.415631294250488, "learning_rate": 2.906680265170831e-05, "loss": 3.0878, "step": 11400 }, { "epoch": 1.9394866564677886, "grad_norm": 12.258246421813965, "learning_rate": 2.909229984701683e-05, "loss": 2.9457, "step": 11410 }, { "epoch": 1.9411864694883563, "grad_norm": 10.988842964172363, "learning_rate": 2.9117797042325345e-05, "loss": 2.9544, "step": 11420 }, { "epoch": 1.942886282508924, "grad_norm": 15.027336120605469, "learning_rate": 2.914329423763386e-05, "loss": 3.1624, "step": 11430 }, { "epoch": 1.9445860955294918, "grad_norm": 10.959155082702637, "learning_rate": 2.9168791432942378e-05, "loss": 3.1548, "step": 11440 }, { "epoch": 1.9462859085500595, "grad_norm": 18.436216354370117, "learning_rate": 2.9194288628250893e-05, "loss": 3.0698, "step": 11450 }, { "epoch": 1.9479857215706273, "grad_norm": 17.731115341186523, "learning_rate": 2.921978582355941e-05, "loss": 3.3212, "step": 11460 }, { "epoch": 1.949685534591195, "grad_norm": 11.892354965209961, "learning_rate": 2.9245283018867926e-05, "loss": 3.0756, "step": 11470 }, { "epoch": 1.9513853476117626, "grad_norm": 23.029308319091797, "learning_rate": 2.927078021417644e-05, "loss": 2.9157, "step": 11480 }, { "epoch": 1.9530851606323303, "grad_norm": 27.032081604003906, "learning_rate": 2.929627740948496e-05, "loss": 2.9717, "step": 11490 }, { "epoch": 1.9547849736528982, "grad_norm": 14.409347534179688, "learning_rate": 2.9321774604793474e-05, "loss": 2.9883, "step": 11500 }, { "epoch": 1.956484786673466, "grad_norm": 12.167057991027832, "learning_rate": 2.934727180010199e-05, "loss": 3.0125, "step": 11510 }, { "epoch": 1.9581845996940337, "grad_norm": 11.785877227783203, "learning_rate": 2.9372768995410507e-05, "loss": 3.1191, "step": 11520 }, { "epoch": 1.9598844127146013, "grad_norm": 12.767763137817383, "learning_rate": 2.9398266190719022e-05, "loss": 2.9244, "step": 11530 }, { "epoch": 1.961584225735169, "grad_norm": 11.866793632507324, "learning_rate": 2.942376338602754e-05, "loss": 3.1329, "step": 11540 }, { "epoch": 1.9632840387557369, "grad_norm": 10.458184242248535, "learning_rate": 2.9449260581336052e-05, "loss": 2.9319, "step": 11550 }, { "epoch": 1.9649838517763047, "grad_norm": 18.729665756225586, "learning_rate": 2.9474757776644567e-05, "loss": 3.008, "step": 11560 }, { "epoch": 1.9666836647968724, "grad_norm": 13.468454360961914, "learning_rate": 2.9500254971953085e-05, "loss": 3.3233, "step": 11570 }, { "epoch": 1.96838347781744, "grad_norm": 13.87508773803711, "learning_rate": 2.95257521672616e-05, "loss": 2.9031, "step": 11580 }, { "epoch": 1.9700832908380077, "grad_norm": 10.62235164642334, "learning_rate": 2.955124936257012e-05, "loss": 2.9259, "step": 11590 }, { "epoch": 1.9717831038585756, "grad_norm": 11.317169189453125, "learning_rate": 2.9576746557878634e-05, "loss": 2.9433, "step": 11600 }, { "epoch": 1.9734829168791435, "grad_norm": 9.546210289001465, "learning_rate": 2.960224375318715e-05, "loss": 3.0039, "step": 11610 }, { "epoch": 1.975182729899711, "grad_norm": 10.268428802490234, "learning_rate": 2.9627740948495667e-05, "loss": 3.1848, "step": 11620 }, { "epoch": 1.9768825429202788, "grad_norm": 14.274723052978516, "learning_rate": 2.965323814380418e-05, "loss": 3.0462, "step": 11630 }, { "epoch": 1.9785823559408464, "grad_norm": 14.115768432617188, "learning_rate": 2.9678735339112697e-05, "loss": 3.0722, "step": 11640 }, { "epoch": 1.9802821689614143, "grad_norm": 12.530299186706543, "learning_rate": 2.9704232534421215e-05, "loss": 3.3013, "step": 11650 }, { "epoch": 1.981981981981982, "grad_norm": 16.787250518798828, "learning_rate": 2.972972972972973e-05, "loss": 3.0988, "step": 11660 }, { "epoch": 1.9836817950025498, "grad_norm": 18.97042465209961, "learning_rate": 2.9755226925038248e-05, "loss": 3.1104, "step": 11670 }, { "epoch": 1.9853816080231175, "grad_norm": 24.20513153076172, "learning_rate": 2.9780724120346763e-05, "loss": 3.0891, "step": 11680 }, { "epoch": 1.9870814210436851, "grad_norm": 10.024070739746094, "learning_rate": 2.9806221315655278e-05, "loss": 3.2187, "step": 11690 }, { "epoch": 1.9887812340642528, "grad_norm": 13.916494369506836, "learning_rate": 2.9831718510963796e-05, "loss": 3.0381, "step": 11700 }, { "epoch": 1.9904810470848207, "grad_norm": 13.86467456817627, "learning_rate": 2.985721570627231e-05, "loss": 3.1375, "step": 11710 }, { "epoch": 1.9921808601053885, "grad_norm": 14.30278205871582, "learning_rate": 2.9882712901580826e-05, "loss": 2.8557, "step": 11720 }, { "epoch": 1.9938806731259562, "grad_norm": 9.79979419708252, "learning_rate": 2.9908210096889344e-05, "loss": 3.2721, "step": 11730 }, { "epoch": 1.9955804861465238, "grad_norm": 9.374077796936035, "learning_rate": 2.993370729219786e-05, "loss": 3.0876, "step": 11740 }, { "epoch": 1.9972802991670915, "grad_norm": 19.786325454711914, "learning_rate": 2.9959204487506378e-05, "loss": 3.07, "step": 11750 }, { "epoch": 1.9989801121876594, "grad_norm": 9.891304016113281, "learning_rate": 2.9984701682814892e-05, "loss": 3.271, "step": 11760 }, { "epoch": 2.0, "eval_cer": 1.000118371212121, "eval_loss": 3.2031567096710205, "eval_runtime": 1955.7675, "eval_samples_per_second": 0.241, "eval_steps_per_second": 0.241, "step": 11766 }, { "epoch": 2.0006799252082272, "grad_norm": 18.791025161743164, "learning_rate": 2.999886679131962e-05, "loss": 2.7198, "step": 11770 }, { "epoch": 2.002379738228795, "grad_norm": 12.94577407836914, "learning_rate": 2.9996033769618675e-05, "loss": 2.954, "step": 11780 }, { "epoch": 2.0040795512493625, "grad_norm": 25.000930786132812, "learning_rate": 2.9993200747917732e-05, "loss": 2.9818, "step": 11790 }, { "epoch": 2.00577936426993, "grad_norm": 11.790266990661621, "learning_rate": 2.9990367726216785e-05, "loss": 2.9089, "step": 11800 }, { "epoch": 2.007479177290498, "grad_norm": 8.775589942932129, "learning_rate": 2.9987534704515836e-05, "loss": 2.9278, "step": 11810 }, { "epoch": 2.009178990311066, "grad_norm": 12.04806137084961, "learning_rate": 2.9984701682814892e-05, "loss": 2.875, "step": 11820 }, { "epoch": 2.0108788033316336, "grad_norm": 17.967151641845703, "learning_rate": 2.9981868661113946e-05, "loss": 2.7544, "step": 11830 }, { "epoch": 2.0125786163522013, "grad_norm": 15.540682792663574, "learning_rate": 2.9979035639412996e-05, "loss": 2.7626, "step": 11840 }, { "epoch": 2.014278429372769, "grad_norm": 12.079930305480957, "learning_rate": 2.9976202617712053e-05, "loss": 2.8885, "step": 11850 }, { "epoch": 2.0159782423933366, "grad_norm": 7.694283485412598, "learning_rate": 2.9973369596011107e-05, "loss": 3.1008, "step": 11860 }, { "epoch": 2.0176780554139047, "grad_norm": 14.91864013671875, "learning_rate": 2.997053657431016e-05, "loss": 2.7119, "step": 11870 }, { "epoch": 2.0193778684344723, "grad_norm": 12.421855926513672, "learning_rate": 2.9967703552609214e-05, "loss": 3.0226, "step": 11880 }, { "epoch": 2.02107768145504, "grad_norm": 8.494792938232422, "learning_rate": 2.9964870530908267e-05, "loss": 2.9196, "step": 11890 }, { "epoch": 2.0227774944756076, "grad_norm": 10.394904136657715, "learning_rate": 2.996203750920732e-05, "loss": 2.7419, "step": 11900 }, { "epoch": 2.0244773074961753, "grad_norm": 18.54844856262207, "learning_rate": 2.9959204487506378e-05, "loss": 2.8374, "step": 11910 }, { "epoch": 2.0261771205167434, "grad_norm": 11.473125457763672, "learning_rate": 2.9956371465805428e-05, "loss": 2.7071, "step": 11920 }, { "epoch": 2.027876933537311, "grad_norm": 15.622718811035156, "learning_rate": 2.995353844410448e-05, "loss": 2.9231, "step": 11930 }, { "epoch": 2.0295767465578787, "grad_norm": 10.664665222167969, "learning_rate": 2.9950705422403538e-05, "loss": 2.8722, "step": 11940 }, { "epoch": 2.0312765595784463, "grad_norm": 13.929644584655762, "learning_rate": 2.994787240070259e-05, "loss": 2.941, "step": 11950 }, { "epoch": 2.032976372599014, "grad_norm": 11.930373191833496, "learning_rate": 2.9945039379001642e-05, "loss": 3.0739, "step": 11960 }, { "epoch": 2.0346761856195816, "grad_norm": 12.8961820602417, "learning_rate": 2.99422063573007e-05, "loss": 3.1096, "step": 11970 }, { "epoch": 2.0363759986401497, "grad_norm": 10.14059066772461, "learning_rate": 2.9939373335599752e-05, "loss": 2.8439, "step": 11980 }, { "epoch": 2.0380758116607174, "grad_norm": 16.64179229736328, "learning_rate": 2.9936540313898802e-05, "loss": 2.6129, "step": 11990 }, { "epoch": 2.039775624681285, "grad_norm": 13.018667221069336, "learning_rate": 2.993370729219786e-05, "loss": 2.8835, "step": 12000 }, { "epoch": 2.0414754377018527, "grad_norm": 11.047811508178711, "learning_rate": 2.9930874270496913e-05, "loss": 3.0064, "step": 12010 }, { "epoch": 2.0431752507224203, "grad_norm": 9.939908027648926, "learning_rate": 2.9928041248795966e-05, "loss": 2.963, "step": 12020 }, { "epoch": 2.0448750637429884, "grad_norm": 14.737626075744629, "learning_rate": 2.992520822709502e-05, "loss": 2.9272, "step": 12030 }, { "epoch": 2.046574876763556, "grad_norm": 12.254145622253418, "learning_rate": 2.9922375205394073e-05, "loss": 2.8978, "step": 12040 }, { "epoch": 2.0482746897841237, "grad_norm": 10.261839866638184, "learning_rate": 2.9919542183693127e-05, "loss": 2.9104, "step": 12050 }, { "epoch": 2.0499745028046914, "grad_norm": 10.069540977478027, "learning_rate": 2.9916709161992184e-05, "loss": 2.9646, "step": 12060 }, { "epoch": 2.051674315825259, "grad_norm": 12.539708137512207, "learning_rate": 2.9913876140291234e-05, "loss": 2.9326, "step": 12070 }, { "epoch": 2.053374128845827, "grad_norm": 11.15083122253418, "learning_rate": 2.9911043118590287e-05, "loss": 2.773, "step": 12080 }, { "epoch": 2.055073941866395, "grad_norm": 15.086181640625, "learning_rate": 2.9908210096889344e-05, "loss": 2.9603, "step": 12090 }, { "epoch": 2.0567737548869625, "grad_norm": 11.350518226623535, "learning_rate": 2.9905377075188398e-05, "loss": 2.9716, "step": 12100 }, { "epoch": 2.05847356790753, "grad_norm": 11.22713565826416, "learning_rate": 2.9902544053487448e-05, "loss": 2.9099, "step": 12110 }, { "epoch": 2.0601733809280978, "grad_norm": 13.275711059570312, "learning_rate": 2.9899711031786505e-05, "loss": 2.8633, "step": 12120 }, { "epoch": 2.061873193948666, "grad_norm": 8.776313781738281, "learning_rate": 2.989687801008556e-05, "loss": 2.9141, "step": 12130 }, { "epoch": 2.0635730069692335, "grad_norm": 19.555875778198242, "learning_rate": 2.9894044988384612e-05, "loss": 2.8122, "step": 12140 }, { "epoch": 2.065272819989801, "grad_norm": 10.110665321350098, "learning_rate": 2.9891211966683665e-05, "loss": 2.8583, "step": 12150 }, { "epoch": 2.066972633010369, "grad_norm": 14.92020034790039, "learning_rate": 2.988837894498272e-05, "loss": 2.8048, "step": 12160 }, { "epoch": 2.0686724460309365, "grad_norm": 11.524712562561035, "learning_rate": 2.9885545923281773e-05, "loss": 2.9128, "step": 12170 }, { "epoch": 2.070372259051504, "grad_norm": 13.57174015045166, "learning_rate": 2.9882712901580826e-05, "loss": 2.9171, "step": 12180 }, { "epoch": 2.0720720720720722, "grad_norm": 14.572854042053223, "learning_rate": 2.987987987987988e-05, "loss": 3.1253, "step": 12190 }, { "epoch": 2.07377188509264, "grad_norm": 12.003131866455078, "learning_rate": 2.9877046858178933e-05, "loss": 3.1395, "step": 12200 }, { "epoch": 2.0754716981132075, "grad_norm": 8.87679386138916, "learning_rate": 2.987421383647799e-05, "loss": 2.942, "step": 12210 }, { "epoch": 2.077171511133775, "grad_norm": 17.95876121520996, "learning_rate": 2.987138081477704e-05, "loss": 2.9201, "step": 12220 }, { "epoch": 2.078871324154343, "grad_norm": 13.916251182556152, "learning_rate": 2.9868547793076094e-05, "loss": 2.9005, "step": 12230 }, { "epoch": 2.080571137174911, "grad_norm": 14.881242752075195, "learning_rate": 2.986571477137515e-05, "loss": 2.8399, "step": 12240 }, { "epoch": 2.0822709501954786, "grad_norm": 10.302447319030762, "learning_rate": 2.9862881749674204e-05, "loss": 2.913, "step": 12250 }, { "epoch": 2.0839707632160462, "grad_norm": 10.645849227905273, "learning_rate": 2.9860048727973254e-05, "loss": 2.9498, "step": 12260 }, { "epoch": 2.085670576236614, "grad_norm": 10.130167961120605, "learning_rate": 2.985721570627231e-05, "loss": 2.9184, "step": 12270 }, { "epoch": 2.0873703892571815, "grad_norm": 19.192617416381836, "learning_rate": 2.9854382684571365e-05, "loss": 2.9002, "step": 12280 }, { "epoch": 2.0890702022777496, "grad_norm": 12.608309745788574, "learning_rate": 2.9851549662870418e-05, "loss": 3.0198, "step": 12290 }, { "epoch": 2.0907700152983173, "grad_norm": 12.441994667053223, "learning_rate": 2.9848716641169472e-05, "loss": 2.9101, "step": 12300 }, { "epoch": 2.092469828318885, "grad_norm": 41.70418930053711, "learning_rate": 2.9845883619468525e-05, "loss": 3.0072, "step": 12310 }, { "epoch": 2.0941696413394526, "grad_norm": 20.397340774536133, "learning_rate": 2.9843050597767582e-05, "loss": 2.5505, "step": 12320 }, { "epoch": 2.0958694543600203, "grad_norm": 18.28340721130371, "learning_rate": 2.9840217576066632e-05, "loss": 2.8982, "step": 12330 }, { "epoch": 2.097569267380588, "grad_norm": 17.02174949645996, "learning_rate": 2.9837384554365686e-05, "loss": 2.7497, "step": 12340 }, { "epoch": 2.099269080401156, "grad_norm": 11.128389358520508, "learning_rate": 2.9834551532664743e-05, "loss": 2.8945, "step": 12350 }, { "epoch": 2.1009688934217237, "grad_norm": 14.179865837097168, "learning_rate": 2.9831718510963796e-05, "loss": 2.6377, "step": 12360 }, { "epoch": 2.1026687064422913, "grad_norm": 13.79997730255127, "learning_rate": 2.9828885489262846e-05, "loss": 3.0294, "step": 12370 }, { "epoch": 2.104368519462859, "grad_norm": 12.344956398010254, "learning_rate": 2.9826052467561903e-05, "loss": 2.9506, "step": 12380 }, { "epoch": 2.1060683324834266, "grad_norm": 22.15533447265625, "learning_rate": 2.9823219445860957e-05, "loss": 2.8274, "step": 12390 }, { "epoch": 2.1077681455039947, "grad_norm": 12.9762601852417, "learning_rate": 2.982038642416001e-05, "loss": 2.8868, "step": 12400 }, { "epoch": 2.1094679585245624, "grad_norm": 17.745149612426758, "learning_rate": 2.9817553402459064e-05, "loss": 2.9567, "step": 12410 }, { "epoch": 2.11116777154513, "grad_norm": 12.818342208862305, "learning_rate": 2.9814720380758117e-05, "loss": 3.0225, "step": 12420 }, { "epoch": 2.1128675845656977, "grad_norm": 13.69707202911377, "learning_rate": 2.981188735905717e-05, "loss": 2.8389, "step": 12430 }, { "epoch": 2.1145673975862653, "grad_norm": 11.536348342895508, "learning_rate": 2.9809054337356228e-05, "loss": 2.8309, "step": 12440 }, { "epoch": 2.1162672106068334, "grad_norm": 17.38291358947754, "learning_rate": 2.9806221315655278e-05, "loss": 2.9323, "step": 12450 }, { "epoch": 2.117967023627401, "grad_norm": 21.668764114379883, "learning_rate": 2.980338829395433e-05, "loss": 2.9584, "step": 12460 }, { "epoch": 2.1196668366479687, "grad_norm": 24.522512435913086, "learning_rate": 2.980055527225339e-05, "loss": 3.114, "step": 12470 }, { "epoch": 2.1213666496685364, "grad_norm": 12.700536727905273, "learning_rate": 2.9797722250552442e-05, "loss": 2.7686, "step": 12480 }, { "epoch": 2.123066462689104, "grad_norm": 46.13813400268555, "learning_rate": 2.9794889228851492e-05, "loss": 2.8069, "step": 12490 }, { "epoch": 2.124766275709672, "grad_norm": 14.141912460327148, "learning_rate": 2.979205620715055e-05, "loss": 3.0234, "step": 12500 }, { "epoch": 2.12646608873024, "grad_norm": 23.995065689086914, "learning_rate": 2.9789223185449602e-05, "loss": 2.8229, "step": 12510 }, { "epoch": 2.1281659017508074, "grad_norm": 10.902254104614258, "learning_rate": 2.9786390163748653e-05, "loss": 2.8597, "step": 12520 }, { "epoch": 2.129865714771375, "grad_norm": 14.649701118469238, "learning_rate": 2.978355714204771e-05, "loss": 2.8191, "step": 12530 }, { "epoch": 2.1315655277919427, "grad_norm": 11.017483711242676, "learning_rate": 2.9780724120346763e-05, "loss": 2.8763, "step": 12540 }, { "epoch": 2.1332653408125104, "grad_norm": 25.47491455078125, "learning_rate": 2.9777891098645817e-05, "loss": 2.9185, "step": 12550 }, { "epoch": 2.1349651538330785, "grad_norm": 13.110555648803711, "learning_rate": 2.977505807694487e-05, "loss": 2.854, "step": 12560 }, { "epoch": 2.136664966853646, "grad_norm": 12.696219444274902, "learning_rate": 2.9772225055243924e-05, "loss": 2.8953, "step": 12570 }, { "epoch": 2.138364779874214, "grad_norm": 11.840238571166992, "learning_rate": 2.9769392033542977e-05, "loss": 3.0445, "step": 12580 }, { "epoch": 2.1400645928947815, "grad_norm": 20.288719177246094, "learning_rate": 2.9766559011842034e-05, "loss": 2.9758, "step": 12590 }, { "epoch": 2.141764405915349, "grad_norm": 12.110265731811523, "learning_rate": 2.9763725990141084e-05, "loss": 2.8967, "step": 12600 }, { "epoch": 2.143464218935917, "grad_norm": 14.235834121704102, "learning_rate": 2.9760892968440138e-05, "loss": 3.0239, "step": 12610 }, { "epoch": 2.145164031956485, "grad_norm": 10.826822280883789, "learning_rate": 2.9758059946739195e-05, "loss": 2.9309, "step": 12620 }, { "epoch": 2.1468638449770525, "grad_norm": 10.807554244995117, "learning_rate": 2.9755226925038248e-05, "loss": 2.9364, "step": 12630 }, { "epoch": 2.14856365799762, "grad_norm": 14.857220649719238, "learning_rate": 2.9752393903337298e-05, "loss": 2.7673, "step": 12640 }, { "epoch": 2.150263471018188, "grad_norm": 16.179155349731445, "learning_rate": 2.9749560881636355e-05, "loss": 3.0569, "step": 12650 }, { "epoch": 2.151963284038756, "grad_norm": 13.328742980957031, "learning_rate": 2.974672785993541e-05, "loss": 3.0001, "step": 12660 }, { "epoch": 2.1536630970593236, "grad_norm": 12.574847221374512, "learning_rate": 2.974389483823446e-05, "loss": 2.9702, "step": 12670 }, { "epoch": 2.1553629100798912, "grad_norm": 14.10012149810791, "learning_rate": 2.9741061816533516e-05, "loss": 2.9054, "step": 12680 }, { "epoch": 2.157062723100459, "grad_norm": 13.554207801818848, "learning_rate": 2.973822879483257e-05, "loss": 2.992, "step": 12690 }, { "epoch": 2.1587625361210265, "grad_norm": 10.912349700927734, "learning_rate": 2.9735395773131623e-05, "loss": 2.8161, "step": 12700 }, { "epoch": 2.1604623491415946, "grad_norm": 14.654378890991211, "learning_rate": 2.9732562751430676e-05, "loss": 2.5565, "step": 12710 }, { "epoch": 2.1621621621621623, "grad_norm": 19.732797622680664, "learning_rate": 2.972972972972973e-05, "loss": 2.7681, "step": 12720 }, { "epoch": 2.16386197518273, "grad_norm": 15.819750785827637, "learning_rate": 2.9726896708028783e-05, "loss": 2.8774, "step": 12730 }, { "epoch": 2.1655617882032976, "grad_norm": 17.500751495361328, "learning_rate": 2.972406368632784e-05, "loss": 2.7715, "step": 12740 }, { "epoch": 2.1672616012238652, "grad_norm": 16.41480827331543, "learning_rate": 2.972123066462689e-05, "loss": 2.8203, "step": 12750 }, { "epoch": 2.168961414244433, "grad_norm": 12.016535758972168, "learning_rate": 2.9718397642925944e-05, "loss": 2.7809, "step": 12760 }, { "epoch": 2.170661227265001, "grad_norm": 11.61678409576416, "learning_rate": 2.9715564621225e-05, "loss": 3.0422, "step": 12770 }, { "epoch": 2.1723610402855686, "grad_norm": 13.161426544189453, "learning_rate": 2.9712731599524054e-05, "loss": 2.8823, "step": 12780 }, { "epoch": 2.1740608533061363, "grad_norm": 33.33045959472656, "learning_rate": 2.9709898577823104e-05, "loss": 2.7983, "step": 12790 }, { "epoch": 2.175760666326704, "grad_norm": 16.75160026550293, "learning_rate": 2.970706555612216e-05, "loss": 3.0978, "step": 12800 }, { "epoch": 2.1774604793472716, "grad_norm": 11.842535972595215, "learning_rate": 2.9704232534421215e-05, "loss": 3.0131, "step": 12810 }, { "epoch": 2.1791602923678397, "grad_norm": 12.515992164611816, "learning_rate": 2.970139951272027e-05, "loss": 2.8463, "step": 12820 }, { "epoch": 2.1808601053884074, "grad_norm": 11.856687545776367, "learning_rate": 2.9698566491019322e-05, "loss": 3.1295, "step": 12830 }, { "epoch": 2.182559918408975, "grad_norm": 14.35540771484375, "learning_rate": 2.9695733469318375e-05, "loss": 2.772, "step": 12840 }, { "epoch": 2.1842597314295427, "grad_norm": 13.17465591430664, "learning_rate": 2.969290044761743e-05, "loss": 3.0302, "step": 12850 }, { "epoch": 2.1859595444501103, "grad_norm": 10.442919731140137, "learning_rate": 2.9690067425916482e-05, "loss": 2.8085, "step": 12860 }, { "epoch": 2.1876593574706784, "grad_norm": 10.493829727172852, "learning_rate": 2.9687234404215536e-05, "loss": 2.7355, "step": 12870 }, { "epoch": 2.189359170491246, "grad_norm": 11.260954856872559, "learning_rate": 2.968440138251459e-05, "loss": 2.5852, "step": 12880 }, { "epoch": 2.1910589835118137, "grad_norm": 14.764145851135254, "learning_rate": 2.9681568360813646e-05, "loss": 2.8231, "step": 12890 }, { "epoch": 2.1927587965323814, "grad_norm": 14.138676643371582, "learning_rate": 2.9678735339112697e-05, "loss": 2.8293, "step": 12900 }, { "epoch": 2.194458609552949, "grad_norm": 16.835590362548828, "learning_rate": 2.967590231741175e-05, "loss": 2.8758, "step": 12910 }, { "epoch": 2.196158422573517, "grad_norm": 12.319572448730469, "learning_rate": 2.9673069295710807e-05, "loss": 2.8213, "step": 12920 }, { "epoch": 2.1978582355940848, "grad_norm": 15.319928169250488, "learning_rate": 2.967023627400986e-05, "loss": 2.9385, "step": 12930 }, { "epoch": 2.1995580486146524, "grad_norm": 19.753456115722656, "learning_rate": 2.966740325230891e-05, "loss": 2.7608, "step": 12940 }, { "epoch": 2.20125786163522, "grad_norm": 14.360424041748047, "learning_rate": 2.9664570230607968e-05, "loss": 2.9277, "step": 12950 }, { "epoch": 2.2029576746557877, "grad_norm": 20.91707420349121, "learning_rate": 2.966173720890702e-05, "loss": 2.633, "step": 12960 }, { "epoch": 2.2046574876763554, "grad_norm": 8.788515090942383, "learning_rate": 2.9658904187206075e-05, "loss": 3.0117, "step": 12970 }, { "epoch": 2.2063573006969235, "grad_norm": 13.58300495147705, "learning_rate": 2.9656071165505128e-05, "loss": 2.9914, "step": 12980 }, { "epoch": 2.208057113717491, "grad_norm": 9.093235969543457, "learning_rate": 2.965323814380418e-05, "loss": 2.9662, "step": 12990 }, { "epoch": 2.209756926738059, "grad_norm": 10.837757110595703, "learning_rate": 2.9650405122103235e-05, "loss": 2.9829, "step": 13000 }, { "epoch": 2.2114567397586264, "grad_norm": 13.679630279541016, "learning_rate": 2.9647572100402292e-05, "loss": 2.9148, "step": 13010 }, { "epoch": 2.213156552779194, "grad_norm": 16.878189086914062, "learning_rate": 2.9644739078701342e-05, "loss": 2.8689, "step": 13020 }, { "epoch": 2.214856365799762, "grad_norm": 18.214683532714844, "learning_rate": 2.96419060570004e-05, "loss": 2.938, "step": 13030 }, { "epoch": 2.21655617882033, "grad_norm": 10.539591789245605, "learning_rate": 2.9639073035299453e-05, "loss": 2.8476, "step": 13040 }, { "epoch": 2.2182559918408975, "grad_norm": 9.061704635620117, "learning_rate": 2.9636240013598503e-05, "loss": 2.9252, "step": 13050 }, { "epoch": 2.219955804861465, "grad_norm": 12.330977439880371, "learning_rate": 2.963340699189756e-05, "loss": 2.7742, "step": 13060 }, { "epoch": 2.221655617882033, "grad_norm": 14.0772705078125, "learning_rate": 2.9630573970196613e-05, "loss": 2.9355, "step": 13070 }, { "epoch": 2.223355430902601, "grad_norm": 11.289369583129883, "learning_rate": 2.9627740948495667e-05, "loss": 2.8782, "step": 13080 }, { "epoch": 2.2250552439231686, "grad_norm": 13.039993286132812, "learning_rate": 2.962490792679472e-05, "loss": 3.01, "step": 13090 }, { "epoch": 2.226755056943736, "grad_norm": 20.06501007080078, "learning_rate": 2.9622074905093774e-05, "loss": 2.6444, "step": 13100 }, { "epoch": 2.228454869964304, "grad_norm": 16.824369430541992, "learning_rate": 2.9619241883392827e-05, "loss": 2.7747, "step": 13110 }, { "epoch": 2.2301546829848715, "grad_norm": 7.981271743774414, "learning_rate": 2.9616408861691884e-05, "loss": 3.0279, "step": 13120 }, { "epoch": 2.2318544960054396, "grad_norm": 11.092769622802734, "learning_rate": 2.9613575839990934e-05, "loss": 3.0843, "step": 13130 }, { "epoch": 2.2335543090260073, "grad_norm": 12.716200828552246, "learning_rate": 2.9610742818289988e-05, "loss": 2.7365, "step": 13140 }, { "epoch": 2.235254122046575, "grad_norm": 19.846881866455078, "learning_rate": 2.9607909796589045e-05, "loss": 3.1751, "step": 13150 }, { "epoch": 2.2369539350671426, "grad_norm": 19.89596939086914, "learning_rate": 2.9605076774888098e-05, "loss": 2.9958, "step": 13160 }, { "epoch": 2.2386537480877102, "grad_norm": 24.260433197021484, "learning_rate": 2.960224375318715e-05, "loss": 2.7171, "step": 13170 }, { "epoch": 2.240353561108278, "grad_norm": 11.528610229492188, "learning_rate": 2.9599410731486205e-05, "loss": 2.8498, "step": 13180 }, { "epoch": 2.242053374128846, "grad_norm": 15.128596305847168, "learning_rate": 2.959657770978526e-05, "loss": 2.5615, "step": 13190 }, { "epoch": 2.2437531871494136, "grad_norm": 16.367246627807617, "learning_rate": 2.959374468808431e-05, "loss": 3.1516, "step": 13200 }, { "epoch": 2.2454530001699813, "grad_norm": 19.39472007751465, "learning_rate": 2.9590911666383366e-05, "loss": 3.0614, "step": 13210 }, { "epoch": 2.247152813190549, "grad_norm": 9.580227851867676, "learning_rate": 2.958807864468242e-05, "loss": 2.902, "step": 13220 }, { "epoch": 2.2488526262111166, "grad_norm": 12.191386222839355, "learning_rate": 2.9585245622981473e-05, "loss": 2.9881, "step": 13230 }, { "epoch": 2.2505524392316847, "grad_norm": 13.339408874511719, "learning_rate": 2.9582412601280526e-05, "loss": 2.6206, "step": 13240 }, { "epoch": 2.2522522522522523, "grad_norm": 10.12553882598877, "learning_rate": 2.957957957957958e-05, "loss": 2.8719, "step": 13250 }, { "epoch": 2.25395206527282, "grad_norm": 10.633309364318848, "learning_rate": 2.9576746557878634e-05, "loss": 3.0523, "step": 13260 }, { "epoch": 2.2556518782933876, "grad_norm": 13.985868453979492, "learning_rate": 2.957391353617769e-05, "loss": 2.9282, "step": 13270 }, { "epoch": 2.2573516913139553, "grad_norm": 15.152982711791992, "learning_rate": 2.957108051447674e-05, "loss": 2.8198, "step": 13280 }, { "epoch": 2.259051504334523, "grad_norm": 16.309091567993164, "learning_rate": 2.9568247492775794e-05, "loss": 2.6641, "step": 13290 }, { "epoch": 2.260751317355091, "grad_norm": 6.626944541931152, "learning_rate": 2.956541447107485e-05, "loss": 2.5204, "step": 13300 }, { "epoch": 2.2624511303756587, "grad_norm": 16.82219696044922, "learning_rate": 2.9562581449373905e-05, "loss": 2.8261, "step": 13310 }, { "epoch": 2.2641509433962264, "grad_norm": 13.15018367767334, "learning_rate": 2.9559748427672955e-05, "loss": 2.8516, "step": 13320 }, { "epoch": 2.265850756416794, "grad_norm": 11.281691551208496, "learning_rate": 2.955691540597201e-05, "loss": 2.7151, "step": 13330 }, { "epoch": 2.267550569437362, "grad_norm": 22.7078914642334, "learning_rate": 2.9554082384271065e-05, "loss": 2.6932, "step": 13340 }, { "epoch": 2.2692503824579298, "grad_norm": 18.381790161132812, "learning_rate": 2.955124936257012e-05, "loss": 3.2206, "step": 13350 }, { "epoch": 2.2709501954784974, "grad_norm": 21.310625076293945, "learning_rate": 2.9548416340869172e-05, "loss": 2.9053, "step": 13360 }, { "epoch": 2.272650008499065, "grad_norm": 24.139928817749023, "learning_rate": 2.9545583319168226e-05, "loss": 2.8567, "step": 13370 }, { "epoch": 2.2743498215196327, "grad_norm": 13.932974815368652, "learning_rate": 2.954275029746728e-05, "loss": 2.9517, "step": 13380 }, { "epoch": 2.2760496345402004, "grad_norm": 21.07594108581543, "learning_rate": 2.9539917275766333e-05, "loss": 2.7673, "step": 13390 }, { "epoch": 2.2777494475607685, "grad_norm": 9.656015396118164, "learning_rate": 2.9537084254065386e-05, "loss": 2.6981, "step": 13400 }, { "epoch": 2.279449260581336, "grad_norm": 13.45248794555664, "learning_rate": 2.953425123236444e-05, "loss": 2.8354, "step": 13410 }, { "epoch": 2.2811490736019038, "grad_norm": 17.58300018310547, "learning_rate": 2.9531418210663497e-05, "loss": 2.7334, "step": 13420 }, { "epoch": 2.2828488866224714, "grad_norm": 21.964534759521484, "learning_rate": 2.9528585188962547e-05, "loss": 2.9206, "step": 13430 }, { "epoch": 2.284548699643039, "grad_norm": 16.74259376525879, "learning_rate": 2.95257521672616e-05, "loss": 2.6978, "step": 13440 }, { "epoch": 2.286248512663607, "grad_norm": 14.855996131896973, "learning_rate": 2.9522919145560657e-05, "loss": 2.8279, "step": 13450 }, { "epoch": 2.287948325684175, "grad_norm": 18.915973663330078, "learning_rate": 2.952008612385971e-05, "loss": 2.7317, "step": 13460 }, { "epoch": 2.2896481387047425, "grad_norm": 18.386489868164062, "learning_rate": 2.951725310215876e-05, "loss": 2.7726, "step": 13470 }, { "epoch": 2.29134795172531, "grad_norm": 17.66826629638672, "learning_rate": 2.9514420080457818e-05, "loss": 3.0096, "step": 13480 }, { "epoch": 2.293047764745878, "grad_norm": 8.599526405334473, "learning_rate": 2.951158705875687e-05, "loss": 2.9645, "step": 13490 }, { "epoch": 2.2947475777664454, "grad_norm": 13.809362411499023, "learning_rate": 2.9508754037055925e-05, "loss": 2.9605, "step": 13500 }, { "epoch": 2.2964473907870135, "grad_norm": 14.301692962646484, "learning_rate": 2.950592101535498e-05, "loss": 2.8986, "step": 13510 }, { "epoch": 2.298147203807581, "grad_norm": 9.920862197875977, "learning_rate": 2.9503087993654032e-05, "loss": 2.9228, "step": 13520 }, { "epoch": 2.299847016828149, "grad_norm": 24.96611976623535, "learning_rate": 2.9500254971953085e-05, "loss": 2.9322, "step": 13530 }, { "epoch": 2.3015468298487165, "grad_norm": 14.745706558227539, "learning_rate": 2.949742195025214e-05, "loss": 2.7663, "step": 13540 }, { "epoch": 2.3032466428692846, "grad_norm": 14.40487003326416, "learning_rate": 2.9494588928551192e-05, "loss": 2.7911, "step": 13550 }, { "epoch": 2.3049464558898523, "grad_norm": 14.716666221618652, "learning_rate": 2.9491755906850246e-05, "loss": 2.7573, "step": 13560 }, { "epoch": 2.30664626891042, "grad_norm": 13.50830078125, "learning_rate": 2.9488922885149303e-05, "loss": 3.0231, "step": 13570 }, { "epoch": 2.3083460819309876, "grad_norm": 15.560547828674316, "learning_rate": 2.9486089863448353e-05, "loss": 2.9285, "step": 13580 }, { "epoch": 2.310045894951555, "grad_norm": 17.93254852294922, "learning_rate": 2.9483256841747407e-05, "loss": 2.7583, "step": 13590 }, { "epoch": 2.311745707972123, "grad_norm": 19.777273178100586, "learning_rate": 2.9480423820046463e-05, "loss": 2.7727, "step": 13600 }, { "epoch": 2.313445520992691, "grad_norm": 13.937402725219727, "learning_rate": 2.9477590798345517e-05, "loss": 2.9216, "step": 13610 }, { "epoch": 2.3151453340132586, "grad_norm": 15.250155448913574, "learning_rate": 2.9474757776644567e-05, "loss": 2.6196, "step": 13620 }, { "epoch": 2.3168451470338263, "grad_norm": 11.92586612701416, "learning_rate": 2.9471924754943624e-05, "loss": 2.5974, "step": 13630 }, { "epoch": 2.318544960054394, "grad_norm": 10.633994102478027, "learning_rate": 2.9469091733242678e-05, "loss": 2.974, "step": 13640 }, { "epoch": 2.3202447730749616, "grad_norm": 9.002276420593262, "learning_rate": 2.946625871154173e-05, "loss": 2.953, "step": 13650 }, { "epoch": 2.3219445860955297, "grad_norm": 12.703737258911133, "learning_rate": 2.9463425689840785e-05, "loss": 2.8093, "step": 13660 }, { "epoch": 2.3236443991160973, "grad_norm": 16.226848602294922, "learning_rate": 2.9460592668139838e-05, "loss": 2.9033, "step": 13670 }, { "epoch": 2.325344212136665, "grad_norm": 9.444445610046387, "learning_rate": 2.945775964643889e-05, "loss": 2.864, "step": 13680 }, { "epoch": 2.3270440251572326, "grad_norm": 14.8817777633667, "learning_rate": 2.945492662473795e-05, "loss": 2.7907, "step": 13690 }, { "epoch": 2.3287438381778003, "grad_norm": 11.429038047790527, "learning_rate": 2.9452093603037e-05, "loss": 2.8744, "step": 13700 }, { "epoch": 2.330443651198368, "grad_norm": 10.990591049194336, "learning_rate": 2.9449260581336052e-05, "loss": 3.0286, "step": 13710 }, { "epoch": 2.332143464218936, "grad_norm": 11.84371280670166, "learning_rate": 2.944642755963511e-05, "loss": 2.9729, "step": 13720 }, { "epoch": 2.3338432772395037, "grad_norm": 13.697321891784668, "learning_rate": 2.944359453793416e-05, "loss": 2.9305, "step": 13730 }, { "epoch": 2.3355430902600713, "grad_norm": 24.61151123046875, "learning_rate": 2.9440761516233216e-05, "loss": 2.8382, "step": 13740 }, { "epoch": 2.337242903280639, "grad_norm": 27.80831527709961, "learning_rate": 2.943792849453227e-05, "loss": 2.7963, "step": 13750 }, { "epoch": 2.338942716301207, "grad_norm": 17.473905563354492, "learning_rate": 2.9435095472831323e-05, "loss": 2.9928, "step": 13760 }, { "epoch": 2.3406425293217747, "grad_norm": 12.555267333984375, "learning_rate": 2.9432262451130377e-05, "loss": 3.0269, "step": 13770 }, { "epoch": 2.3423423423423424, "grad_norm": 15.71702766418457, "learning_rate": 2.942942942942943e-05, "loss": 2.6276, "step": 13780 }, { "epoch": 2.34404215536291, "grad_norm": 20.00925064086914, "learning_rate": 2.9426596407728484e-05, "loss": 2.966, "step": 13790 }, { "epoch": 2.3457419683834777, "grad_norm": 18.329755783081055, "learning_rate": 2.942376338602754e-05, "loss": 2.8344, "step": 13800 }, { "epoch": 2.3474417814040454, "grad_norm": 13.392374992370605, "learning_rate": 2.942093036432659e-05, "loss": 2.7418, "step": 13810 }, { "epoch": 2.3491415944246135, "grad_norm": 12.525157928466797, "learning_rate": 2.9418097342625644e-05, "loss": 3.0558, "step": 13820 }, { "epoch": 2.350841407445181, "grad_norm": 23.50555992126465, "learning_rate": 2.94152643209247e-05, "loss": 2.8675, "step": 13830 }, { "epoch": 2.3525412204657488, "grad_norm": 12.542020797729492, "learning_rate": 2.9412431299223755e-05, "loss": 2.9251, "step": 13840 }, { "epoch": 2.3542410334863164, "grad_norm": 16.825746536254883, "learning_rate": 2.9409598277522805e-05, "loss": 2.7679, "step": 13850 }, { "epoch": 2.355940846506884, "grad_norm": 9.92492961883545, "learning_rate": 2.9406765255821862e-05, "loss": 2.7671, "step": 13860 }, { "epoch": 2.357640659527452, "grad_norm": 20.372913360595703, "learning_rate": 2.9403932234120915e-05, "loss": 2.9517, "step": 13870 }, { "epoch": 2.35934047254802, "grad_norm": 12.595799446105957, "learning_rate": 2.9401099212419965e-05, "loss": 2.8571, "step": 13880 }, { "epoch": 2.3610402855685875, "grad_norm": 12.23437786102295, "learning_rate": 2.9398266190719022e-05, "loss": 2.8097, "step": 13890 }, { "epoch": 2.362740098589155, "grad_norm": 10.448447227478027, "learning_rate": 2.9395433169018076e-05, "loss": 2.7431, "step": 13900 }, { "epoch": 2.3644399116097228, "grad_norm": 15.633070945739746, "learning_rate": 2.939260014731713e-05, "loss": 2.9023, "step": 13910 }, { "epoch": 2.3661397246302904, "grad_norm": 13.686700820922852, "learning_rate": 2.9389767125616183e-05, "loss": 2.9679, "step": 13920 }, { "epoch": 2.3678395376508585, "grad_norm": 12.425289154052734, "learning_rate": 2.9386934103915236e-05, "loss": 2.8248, "step": 13930 }, { "epoch": 2.369539350671426, "grad_norm": 8.83059024810791, "learning_rate": 2.938410108221429e-05, "loss": 2.866, "step": 13940 }, { "epoch": 2.371239163691994, "grad_norm": 13.601810455322266, "learning_rate": 2.9381268060513347e-05, "loss": 2.7595, "step": 13950 }, { "epoch": 2.3729389767125615, "grad_norm": 16.083162307739258, "learning_rate": 2.9378435038812397e-05, "loss": 2.887, "step": 13960 }, { "epoch": 2.3746387897331296, "grad_norm": 10.859299659729004, "learning_rate": 2.937560201711145e-05, "loss": 2.8973, "step": 13970 }, { "epoch": 2.3763386027536972, "grad_norm": 15.424132347106934, "learning_rate": 2.9372768995410507e-05, "loss": 2.9835, "step": 13980 }, { "epoch": 2.378038415774265, "grad_norm": 14.21430778503418, "learning_rate": 2.936993597370956e-05, "loss": 2.795, "step": 13990 }, { "epoch": 2.3797382287948325, "grad_norm": 13.469867706298828, "learning_rate": 2.936710295200861e-05, "loss": 3.0364, "step": 14000 }, { "epoch": 2.3814380418154, "grad_norm": 12.24038028717041, "learning_rate": 2.9364269930307668e-05, "loss": 2.8298, "step": 14010 }, { "epoch": 2.383137854835968, "grad_norm": 13.775562286376953, "learning_rate": 2.936143690860672e-05, "loss": 2.6882, "step": 14020 }, { "epoch": 2.384837667856536, "grad_norm": 9.86568546295166, "learning_rate": 2.9358603886905775e-05, "loss": 2.9891, "step": 14030 }, { "epoch": 2.3865374808771036, "grad_norm": 10.732856750488281, "learning_rate": 2.935577086520483e-05, "loss": 2.662, "step": 14040 }, { "epoch": 2.3882372938976713, "grad_norm": 13.800535202026367, "learning_rate": 2.9352937843503882e-05, "loss": 2.759, "step": 14050 }, { "epoch": 2.389937106918239, "grad_norm": 11.122281074523926, "learning_rate": 2.9350104821802936e-05, "loss": 3.0387, "step": 14060 }, { "epoch": 2.3916369199388066, "grad_norm": 13.22276496887207, "learning_rate": 2.934727180010199e-05, "loss": 2.6823, "step": 14070 }, { "epoch": 2.3933367329593747, "grad_norm": 17.293289184570312, "learning_rate": 2.9344438778401043e-05, "loss": 2.8456, "step": 14080 }, { "epoch": 2.3950365459799423, "grad_norm": 11.877317428588867, "learning_rate": 2.9341605756700096e-05, "loss": 2.8573, "step": 14090 }, { "epoch": 2.39673635900051, "grad_norm": 10.067644119262695, "learning_rate": 2.9338772734999153e-05, "loss": 2.8883, "step": 14100 }, { "epoch": 2.3984361720210776, "grad_norm": 14.496647834777832, "learning_rate": 2.9335939713298203e-05, "loss": 2.7959, "step": 14110 }, { "epoch": 2.4001359850416453, "grad_norm": 9.10915756225586, "learning_rate": 2.9333106691597257e-05, "loss": 2.8877, "step": 14120 }, { "epoch": 2.401835798062213, "grad_norm": 23.20372772216797, "learning_rate": 2.9330273669896314e-05, "loss": 2.8912, "step": 14130 }, { "epoch": 2.403535611082781, "grad_norm": 19.039255142211914, "learning_rate": 2.9327440648195367e-05, "loss": 2.867, "step": 14140 }, { "epoch": 2.4052354241033487, "grad_norm": 13.149199485778809, "learning_rate": 2.9324607626494417e-05, "loss": 3.0786, "step": 14150 }, { "epoch": 2.4069352371239163, "grad_norm": 14.291259765625, "learning_rate": 2.9321774604793474e-05, "loss": 2.6151, "step": 14160 }, { "epoch": 2.408635050144484, "grad_norm": 12.241497993469238, "learning_rate": 2.9318941583092528e-05, "loss": 3.1147, "step": 14170 }, { "epoch": 2.410334863165052, "grad_norm": 8.408692359924316, "learning_rate": 2.931610856139158e-05, "loss": 2.885, "step": 14180 }, { "epoch": 2.4120346761856197, "grad_norm": 11.224897384643555, "learning_rate": 2.9313275539690635e-05, "loss": 2.788, "step": 14190 }, { "epoch": 2.4137344892061874, "grad_norm": 9.727250099182129, "learning_rate": 2.9310442517989688e-05, "loss": 2.7053, "step": 14200 }, { "epoch": 2.415434302226755, "grad_norm": 14.965228080749512, "learning_rate": 2.9307609496288742e-05, "loss": 2.7817, "step": 14210 }, { "epoch": 2.4171341152473227, "grad_norm": 19.396984100341797, "learning_rate": 2.93047764745878e-05, "loss": 2.8867, "step": 14220 }, { "epoch": 2.4188339282678903, "grad_norm": 13.568034172058105, "learning_rate": 2.930194345288685e-05, "loss": 2.7873, "step": 14230 }, { "epoch": 2.4205337412884584, "grad_norm": 18.129867553710938, "learning_rate": 2.9299110431185902e-05, "loss": 2.9571, "step": 14240 }, { "epoch": 2.422233554309026, "grad_norm": 9.672052383422852, "learning_rate": 2.929627740948496e-05, "loss": 2.8442, "step": 14250 }, { "epoch": 2.4239333673295937, "grad_norm": 20.799455642700195, "learning_rate": 2.929344438778401e-05, "loss": 2.8166, "step": 14260 }, { "epoch": 2.4256331803501614, "grad_norm": 10.902298927307129, "learning_rate": 2.9290611366083063e-05, "loss": 2.7941, "step": 14270 }, { "epoch": 2.427332993370729, "grad_norm": 43.365535736083984, "learning_rate": 2.928777834438212e-05, "loss": 2.8613, "step": 14280 }, { "epoch": 2.429032806391297, "grad_norm": 9.135724067687988, "learning_rate": 2.9284945322681173e-05, "loss": 2.7652, "step": 14290 }, { "epoch": 2.430732619411865, "grad_norm": 10.571609497070312, "learning_rate": 2.9282112300980223e-05, "loss": 2.8811, "step": 14300 }, { "epoch": 2.4324324324324325, "grad_norm": 12.01801872253418, "learning_rate": 2.927927927927928e-05, "loss": 2.9816, "step": 14310 }, { "epoch": 2.434132245453, "grad_norm": 12.8327054977417, "learning_rate": 2.9276446257578334e-05, "loss": 2.7311, "step": 14320 }, { "epoch": 2.4358320584735678, "grad_norm": 14.213878631591797, "learning_rate": 2.9273613235877387e-05, "loss": 2.7515, "step": 14330 }, { "epoch": 2.4375318714941354, "grad_norm": 15.836729049682617, "learning_rate": 2.927078021417644e-05, "loss": 2.6995, "step": 14340 }, { "epoch": 2.4392316845147035, "grad_norm": 11.212095260620117, "learning_rate": 2.9267947192475494e-05, "loss": 2.7911, "step": 14350 }, { "epoch": 2.440931497535271, "grad_norm": 15.150433540344238, "learning_rate": 2.9265114170774548e-05, "loss": 2.9805, "step": 14360 }, { "epoch": 2.442631310555839, "grad_norm": 17.546390533447266, "learning_rate": 2.9262281149073605e-05, "loss": 2.8396, "step": 14370 }, { "epoch": 2.4443311235764065, "grad_norm": 9.193673133850098, "learning_rate": 2.9259448127372655e-05, "loss": 2.9176, "step": 14380 }, { "epoch": 2.4460309365969746, "grad_norm": 17.76991844177246, "learning_rate": 2.925661510567171e-05, "loss": 2.8723, "step": 14390 }, { "epoch": 2.4477307496175422, "grad_norm": 12.987401962280273, "learning_rate": 2.9253782083970765e-05, "loss": 2.6756, "step": 14400 }, { "epoch": 2.44943056263811, "grad_norm": 14.982175827026367, "learning_rate": 2.9250949062269816e-05, "loss": 2.8633, "step": 14410 }, { "epoch": 2.4511303756586775, "grad_norm": 13.489433288574219, "learning_rate": 2.924811604056887e-05, "loss": 2.7569, "step": 14420 }, { "epoch": 2.452830188679245, "grad_norm": 11.871108055114746, "learning_rate": 2.9245283018867926e-05, "loss": 2.7296, "step": 14430 }, { "epoch": 2.454530001699813, "grad_norm": 11.42324447631836, "learning_rate": 2.924244999716698e-05, "loss": 2.9036, "step": 14440 }, { "epoch": 2.456229814720381, "grad_norm": 45.60625076293945, "learning_rate": 2.9239616975466033e-05, "loss": 2.6544, "step": 14450 }, { "epoch": 2.4579296277409486, "grad_norm": 13.438541412353516, "learning_rate": 2.9236783953765087e-05, "loss": 2.6808, "step": 14460 }, { "epoch": 2.4596294407615162, "grad_norm": 20.196945190429688, "learning_rate": 2.923395093206414e-05, "loss": 3.0021, "step": 14470 }, { "epoch": 2.461329253782084, "grad_norm": 14.438969612121582, "learning_rate": 2.9231117910363197e-05, "loss": 2.8753, "step": 14480 }, { "epoch": 2.4630290668026515, "grad_norm": 21.705339431762695, "learning_rate": 2.9228284888662247e-05, "loss": 2.997, "step": 14490 }, { "epoch": 2.4647288798232196, "grad_norm": 10.646504402160645, "learning_rate": 2.92254518669613e-05, "loss": 2.9409, "step": 14500 }, { "epoch": 2.4664286928437873, "grad_norm": 14.595433235168457, "learning_rate": 2.9222618845260358e-05, "loss": 3.0234, "step": 14510 }, { "epoch": 2.468128505864355, "grad_norm": 11.865845680236816, "learning_rate": 2.921978582355941e-05, "loss": 2.8835, "step": 14520 }, { "epoch": 2.4698283188849226, "grad_norm": 11.04707145690918, "learning_rate": 2.921695280185846e-05, "loss": 2.8815, "step": 14530 }, { "epoch": 2.4715281319054903, "grad_norm": 14.353449821472168, "learning_rate": 2.9214119780157518e-05, "loss": 2.925, "step": 14540 }, { "epoch": 2.473227944926058, "grad_norm": 11.420317649841309, "learning_rate": 2.9211286758456572e-05, "loss": 2.9759, "step": 14550 }, { "epoch": 2.474927757946626, "grad_norm": 7.674433708190918, "learning_rate": 2.9208453736755625e-05, "loss": 3.0752, "step": 14560 }, { "epoch": 2.4766275709671937, "grad_norm": 11.472408294677734, "learning_rate": 2.920562071505468e-05, "loss": 2.7529, "step": 14570 }, { "epoch": 2.4783273839877613, "grad_norm": 8.356481552124023, "learning_rate": 2.9202787693353732e-05, "loss": 2.5103, "step": 14580 }, { "epoch": 2.480027197008329, "grad_norm": 15.05471420288086, "learning_rate": 2.9199954671652786e-05, "loss": 2.7358, "step": 14590 }, { "epoch": 2.481727010028897, "grad_norm": 10.65091323852539, "learning_rate": 2.919712164995184e-05, "loss": 2.715, "step": 14600 }, { "epoch": 2.4834268230494647, "grad_norm": 10.738676071166992, "learning_rate": 2.9194288628250893e-05, "loss": 2.7927, "step": 14610 }, { "epoch": 2.4851266360700324, "grad_norm": 13.772838592529297, "learning_rate": 2.9191455606549946e-05, "loss": 2.855, "step": 14620 }, { "epoch": 2.4868264490906, "grad_norm": 15.618683815002441, "learning_rate": 2.9188622584849003e-05, "loss": 2.6286, "step": 14630 }, { "epoch": 2.4885262621111677, "grad_norm": 11.716090202331543, "learning_rate": 2.9185789563148053e-05, "loss": 2.7632, "step": 14640 }, { "epoch": 2.4902260751317353, "grad_norm": 19.45037078857422, "learning_rate": 2.9182956541447107e-05, "loss": 2.7727, "step": 14650 }, { "epoch": 2.4919258881523034, "grad_norm": 14.004674911499023, "learning_rate": 2.9180123519746164e-05, "loss": 2.7557, "step": 14660 }, { "epoch": 2.493625701172871, "grad_norm": 13.466872215270996, "learning_rate": 2.9177290498045217e-05, "loss": 2.5386, "step": 14670 }, { "epoch": 2.4953255141934387, "grad_norm": 10.292778968811035, "learning_rate": 2.9174457476344267e-05, "loss": 2.8948, "step": 14680 }, { "epoch": 2.4970253272140064, "grad_norm": 63.96037673950195, "learning_rate": 2.9171624454643324e-05, "loss": 2.5958, "step": 14690 }, { "epoch": 2.498725140234574, "grad_norm": 17.872432708740234, "learning_rate": 2.9168791432942378e-05, "loss": 2.6173, "step": 14700 }, { "epoch": 2.500424953255142, "grad_norm": 16.719440460205078, "learning_rate": 2.916595841124143e-05, "loss": 2.789, "step": 14710 }, { "epoch": 2.50212476627571, "grad_norm": 11.73709774017334, "learning_rate": 2.9163125389540485e-05, "loss": 2.8763, "step": 14720 }, { "epoch": 2.5038245792962774, "grad_norm": 12.291711807250977, "learning_rate": 2.916029236783954e-05, "loss": 2.9017, "step": 14730 }, { "epoch": 2.505524392316845, "grad_norm": 19.050195693969727, "learning_rate": 2.9157459346138592e-05, "loss": 3.1181, "step": 14740 }, { "epoch": 2.5072242053374127, "grad_norm": 17.847089767456055, "learning_rate": 2.9154626324437646e-05, "loss": 2.9235, "step": 14750 }, { "epoch": 2.5089240183579804, "grad_norm": 12.514519691467285, "learning_rate": 2.91517933027367e-05, "loss": 2.7227, "step": 14760 }, { "epoch": 2.5106238313785485, "grad_norm": 16.373729705810547, "learning_rate": 2.9148960281035753e-05, "loss": 3.0519, "step": 14770 }, { "epoch": 2.512323644399116, "grad_norm": 9.776836395263672, "learning_rate": 2.914612725933481e-05, "loss": 2.7604, "step": 14780 }, { "epoch": 2.514023457419684, "grad_norm": 12.371933937072754, "learning_rate": 2.914329423763386e-05, "loss": 2.8873, "step": 14790 }, { "epoch": 2.5157232704402515, "grad_norm": 11.051037788391113, "learning_rate": 2.9140461215932913e-05, "loss": 2.7493, "step": 14800 }, { "epoch": 2.5174230834608196, "grad_norm": 10.920777320861816, "learning_rate": 2.913762819423197e-05, "loss": 2.7554, "step": 14810 }, { "epoch": 2.519122896481387, "grad_norm": 11.581474304199219, "learning_rate": 2.9134795172531024e-05, "loss": 2.792, "step": 14820 }, { "epoch": 2.520822709501955, "grad_norm": 14.469858169555664, "learning_rate": 2.9131962150830074e-05, "loss": 2.8267, "step": 14830 }, { "epoch": 2.5225225225225225, "grad_norm": 13.794657707214355, "learning_rate": 2.912912912912913e-05, "loss": 2.7227, "step": 14840 }, { "epoch": 2.52422233554309, "grad_norm": 63.120670318603516, "learning_rate": 2.9126296107428184e-05, "loss": 3.079, "step": 14850 }, { "epoch": 2.525922148563658, "grad_norm": 14.440424919128418, "learning_rate": 2.9123463085727238e-05, "loss": 2.8597, "step": 14860 }, { "epoch": 2.5276219615842255, "grad_norm": 11.273499488830566, "learning_rate": 2.912063006402629e-05, "loss": 2.9523, "step": 14870 }, { "epoch": 2.5293217746047936, "grad_norm": 11.83707332611084, "learning_rate": 2.9117797042325345e-05, "loss": 2.716, "step": 14880 }, { "epoch": 2.5310215876253612, "grad_norm": 10.893123626708984, "learning_rate": 2.9114964020624398e-05, "loss": 2.8152, "step": 14890 }, { "epoch": 2.532721400645929, "grad_norm": 16.430400848388672, "learning_rate": 2.9112130998923455e-05, "loss": 2.839, "step": 14900 }, { "epoch": 2.5344212136664965, "grad_norm": 12.214009284973145, "learning_rate": 2.9109297977222505e-05, "loss": 2.8641, "step": 14910 }, { "epoch": 2.5361210266870646, "grad_norm": 15.415000915527344, "learning_rate": 2.910646495552156e-05, "loss": 2.838, "step": 14920 }, { "epoch": 2.5378208397076323, "grad_norm": 13.341418266296387, "learning_rate": 2.9103631933820616e-05, "loss": 2.8556, "step": 14930 }, { "epoch": 2.5395206527282, "grad_norm": 13.596390724182129, "learning_rate": 2.9100798912119666e-05, "loss": 2.6416, "step": 14940 }, { "epoch": 2.5412204657487676, "grad_norm": 13.357632637023926, "learning_rate": 2.909796589041872e-05, "loss": 2.8682, "step": 14950 }, { "epoch": 2.5429202787693352, "grad_norm": 16.16724967956543, "learning_rate": 2.9095132868717776e-05, "loss": 2.7367, "step": 14960 }, { "epoch": 2.544620091789903, "grad_norm": 15.156610488891602, "learning_rate": 2.909229984701683e-05, "loss": 2.7254, "step": 14970 }, { "epoch": 2.546319904810471, "grad_norm": 22.1298885345459, "learning_rate": 2.908946682531588e-05, "loss": 2.8862, "step": 14980 }, { "epoch": 2.5480197178310386, "grad_norm": 19.958324432373047, "learning_rate": 2.9086633803614937e-05, "loss": 2.7765, "step": 14990 }, { "epoch": 2.5497195308516063, "grad_norm": 19.06591033935547, "learning_rate": 2.908380078191399e-05, "loss": 2.9585, "step": 15000 }, { "epoch": 2.551419343872174, "grad_norm": 11.102466583251953, "learning_rate": 2.9080967760213044e-05, "loss": 2.6014, "step": 15010 }, { "epoch": 2.553119156892742, "grad_norm": 12.911395072937012, "learning_rate": 2.9078134738512097e-05, "loss": 2.9437, "step": 15020 }, { "epoch": 2.5548189699133097, "grad_norm": 13.718931198120117, "learning_rate": 2.907530171681115e-05, "loss": 3.087, "step": 15030 }, { "epoch": 2.5565187829338774, "grad_norm": 21.815622329711914, "learning_rate": 2.9072468695110204e-05, "loss": 2.833, "step": 15040 }, { "epoch": 2.558218595954445, "grad_norm": 16.40778923034668, "learning_rate": 2.906963567340926e-05, "loss": 2.6959, "step": 15050 }, { "epoch": 2.5599184089750127, "grad_norm": 19.967376708984375, "learning_rate": 2.906680265170831e-05, "loss": 2.8473, "step": 15060 }, { "epoch": 2.5616182219955803, "grad_norm": 19.7569522857666, "learning_rate": 2.9063969630007365e-05, "loss": 2.8208, "step": 15070 }, { "epoch": 2.563318035016148, "grad_norm": 24.606426239013672, "learning_rate": 2.9061136608306422e-05, "loss": 3.1514, "step": 15080 }, { "epoch": 2.565017848036716, "grad_norm": 14.308367729187012, "learning_rate": 2.9058303586605472e-05, "loss": 2.8872, "step": 15090 }, { "epoch": 2.5667176610572837, "grad_norm": 8.737726211547852, "learning_rate": 2.9055470564904526e-05, "loss": 3.0695, "step": 15100 }, { "epoch": 2.5684174740778514, "grad_norm": 16.464885711669922, "learning_rate": 2.9052637543203582e-05, "loss": 2.7601, "step": 15110 }, { "epoch": 2.570117287098419, "grad_norm": 10.405939102172852, "learning_rate": 2.9049804521502636e-05, "loss": 2.7705, "step": 15120 }, { "epoch": 2.571817100118987, "grad_norm": 10.639328002929688, "learning_rate": 2.9046971499801686e-05, "loss": 2.6887, "step": 15130 }, { "epoch": 2.5735169131395548, "grad_norm": 16.111753463745117, "learning_rate": 2.9044138478100743e-05, "loss": 2.7157, "step": 15140 }, { "epoch": 2.5752167261601224, "grad_norm": 12.603726387023926, "learning_rate": 2.9041305456399797e-05, "loss": 2.9126, "step": 15150 }, { "epoch": 2.57691653918069, "grad_norm": 12.637350082397461, "learning_rate": 2.903847243469885e-05, "loss": 2.8368, "step": 15160 }, { "epoch": 2.5786163522012577, "grad_norm": 13.611834526062012, "learning_rate": 2.9035639412997904e-05, "loss": 2.9717, "step": 15170 }, { "epoch": 2.5803161652218254, "grad_norm": 16.01730728149414, "learning_rate": 2.9032806391296957e-05, "loss": 2.6575, "step": 15180 }, { "epoch": 2.5820159782423935, "grad_norm": 39.920448303222656, "learning_rate": 2.902997336959601e-05, "loss": 2.591, "step": 15190 }, { "epoch": 2.583715791262961, "grad_norm": 34.913448333740234, "learning_rate": 2.9027140347895068e-05, "loss": 2.9046, "step": 15200 }, { "epoch": 2.585415604283529, "grad_norm": 13.228766441345215, "learning_rate": 2.9024307326194118e-05, "loss": 2.9046, "step": 15210 }, { "epoch": 2.5871154173040964, "grad_norm": 15.633023262023926, "learning_rate": 2.9021474304493175e-05, "loss": 2.8792, "step": 15220 }, { "epoch": 2.5888152303246645, "grad_norm": 11.833178520202637, "learning_rate": 2.9018641282792228e-05, "loss": 2.8699, "step": 15230 }, { "epoch": 2.590515043345232, "grad_norm": 12.282950401306152, "learning_rate": 2.901580826109128e-05, "loss": 2.7355, "step": 15240 }, { "epoch": 2.5922148563658, "grad_norm": 14.521644592285156, "learning_rate": 2.9012975239390335e-05, "loss": 2.7525, "step": 15250 }, { "epoch": 2.5939146693863675, "grad_norm": 16.469783782958984, "learning_rate": 2.901014221768939e-05, "loss": 2.8877, "step": 15260 }, { "epoch": 2.595614482406935, "grad_norm": 16.35924530029297, "learning_rate": 2.9007309195988442e-05, "loss": 2.5056, "step": 15270 }, { "epoch": 2.597314295427503, "grad_norm": 12.969645500183105, "learning_rate": 2.9004476174287496e-05, "loss": 3.026, "step": 15280 }, { "epoch": 2.5990141084480705, "grad_norm": 17.675249099731445, "learning_rate": 2.900164315258655e-05, "loss": 2.5261, "step": 15290 }, { "epoch": 2.6007139214686386, "grad_norm": 18.917381286621094, "learning_rate": 2.8998810130885603e-05, "loss": 2.9214, "step": 15300 }, { "epoch": 2.602413734489206, "grad_norm": 15.683756828308105, "learning_rate": 2.899597710918466e-05, "loss": 2.7203, "step": 15310 }, { "epoch": 2.604113547509774, "grad_norm": 24.733116149902344, "learning_rate": 2.899314408748371e-05, "loss": 2.7359, "step": 15320 }, { "epoch": 2.6058133605303415, "grad_norm": 12.244647979736328, "learning_rate": 2.8990311065782763e-05, "loss": 2.8523, "step": 15330 }, { "epoch": 2.6075131735509096, "grad_norm": 12.167634010314941, "learning_rate": 2.898747804408182e-05, "loss": 2.5541, "step": 15340 }, { "epoch": 2.6092129865714773, "grad_norm": 12.391260147094727, "learning_rate": 2.8984645022380874e-05, "loss": 2.5801, "step": 15350 }, { "epoch": 2.610912799592045, "grad_norm": 15.736270904541016, "learning_rate": 2.8981812000679924e-05, "loss": 2.9572, "step": 15360 }, { "epoch": 2.6126126126126126, "grad_norm": 14.830798149108887, "learning_rate": 2.897897897897898e-05, "loss": 2.8873, "step": 15370 }, { "epoch": 2.6143124256331802, "grad_norm": 20.948596954345703, "learning_rate": 2.8976145957278034e-05, "loss": 2.7402, "step": 15380 }, { "epoch": 2.616012238653748, "grad_norm": 15.689900398254395, "learning_rate": 2.8973312935577088e-05, "loss": 2.5427, "step": 15390 }, { "epoch": 2.617712051674316, "grad_norm": 13.316709518432617, "learning_rate": 2.897047991387614e-05, "loss": 2.8718, "step": 15400 }, { "epoch": 2.6194118646948836, "grad_norm": 10.19112777709961, "learning_rate": 2.8967646892175195e-05, "loss": 2.8067, "step": 15410 }, { "epoch": 2.6211116777154513, "grad_norm": 15.438132286071777, "learning_rate": 2.896481387047425e-05, "loss": 3.0028, "step": 15420 }, { "epoch": 2.622811490736019, "grad_norm": 16.293241500854492, "learning_rate": 2.8961980848773305e-05, "loss": 2.6964, "step": 15430 }, { "epoch": 2.624511303756587, "grad_norm": 19.465682983398438, "learning_rate": 2.8959147827072355e-05, "loss": 2.7759, "step": 15440 }, { "epoch": 2.6262111167771547, "grad_norm": 17.215930938720703, "learning_rate": 2.895631480537141e-05, "loss": 2.6075, "step": 15450 }, { "epoch": 2.6279109297977223, "grad_norm": 12.450313568115234, "learning_rate": 2.8953481783670466e-05, "loss": 2.8143, "step": 15460 }, { "epoch": 2.62961074281829, "grad_norm": 16.188499450683594, "learning_rate": 2.8950648761969516e-05, "loss": 2.8798, "step": 15470 }, { "epoch": 2.6313105558388576, "grad_norm": 12.066252708435059, "learning_rate": 2.894781574026857e-05, "loss": 2.8536, "step": 15480 }, { "epoch": 2.6330103688594253, "grad_norm": 10.903970718383789, "learning_rate": 2.8944982718567626e-05, "loss": 2.7568, "step": 15490 }, { "epoch": 2.634710181879993, "grad_norm": 14.463434219360352, "learning_rate": 2.894214969686668e-05, "loss": 2.7678, "step": 15500 }, { "epoch": 2.636409994900561, "grad_norm": 16.448637008666992, "learning_rate": 2.893931667516573e-05, "loss": 2.9037, "step": 15510 }, { "epoch": 2.6381098079211287, "grad_norm": 9.949262619018555, "learning_rate": 2.8936483653464787e-05, "loss": 2.7347, "step": 15520 }, { "epoch": 2.6398096209416964, "grad_norm": 15.99428653717041, "learning_rate": 2.893365063176384e-05, "loss": 2.9563, "step": 15530 }, { "epoch": 2.641509433962264, "grad_norm": 23.197071075439453, "learning_rate": 2.8930817610062894e-05, "loss": 2.5133, "step": 15540 }, { "epoch": 2.643209246982832, "grad_norm": 10.508752822875977, "learning_rate": 2.8927984588361948e-05, "loss": 2.8306, "step": 15550 }, { "epoch": 2.6449090600033998, "grad_norm": 15.742966651916504, "learning_rate": 2.8925151566661e-05, "loss": 2.7204, "step": 15560 }, { "epoch": 2.6466088730239674, "grad_norm": 13.65090274810791, "learning_rate": 2.8922318544960055e-05, "loss": 2.8305, "step": 15570 }, { "epoch": 2.648308686044535, "grad_norm": 10.796582221984863, "learning_rate": 2.891948552325911e-05, "loss": 2.8226, "step": 15580 }, { "epoch": 2.6500084990651027, "grad_norm": 10.641902923583984, "learning_rate": 2.891665250155816e-05, "loss": 2.839, "step": 15590 }, { "epoch": 2.6517083120856704, "grad_norm": 12.692667961120605, "learning_rate": 2.8913819479857215e-05, "loss": 2.7702, "step": 15600 }, { "epoch": 2.6534081251062385, "grad_norm": 15.219027519226074, "learning_rate": 2.8910986458156272e-05, "loss": 2.7254, "step": 15610 }, { "epoch": 2.655107938126806, "grad_norm": 11.108316421508789, "learning_rate": 2.8908153436455322e-05, "loss": 2.6954, "step": 15620 }, { "epoch": 2.6568077511473738, "grad_norm": 10.162047386169434, "learning_rate": 2.8905320414754376e-05, "loss": 2.7657, "step": 15630 }, { "epoch": 2.6585075641679414, "grad_norm": 10.46260929107666, "learning_rate": 2.8902487393053433e-05, "loss": 2.8135, "step": 15640 }, { "epoch": 2.6602073771885095, "grad_norm": 31.048229217529297, "learning_rate": 2.8899654371352486e-05, "loss": 2.7626, "step": 15650 }, { "epoch": 2.661907190209077, "grad_norm": 12.72147274017334, "learning_rate": 2.8896821349651536e-05, "loss": 2.7684, "step": 15660 }, { "epoch": 2.663607003229645, "grad_norm": 20.557939529418945, "learning_rate": 2.8893988327950593e-05, "loss": 2.8782, "step": 15670 }, { "epoch": 2.6653068162502125, "grad_norm": 10.947854995727539, "learning_rate": 2.8891155306249647e-05, "loss": 2.7753, "step": 15680 }, { "epoch": 2.66700662927078, "grad_norm": 16.962989807128906, "learning_rate": 2.88883222845487e-05, "loss": 2.5883, "step": 15690 }, { "epoch": 2.668706442291348, "grad_norm": 10.200950622558594, "learning_rate": 2.8885489262847754e-05, "loss": 2.6782, "step": 15700 }, { "epoch": 2.6704062553119154, "grad_norm": 11.635366439819336, "learning_rate": 2.8882656241146807e-05, "loss": 2.9329, "step": 15710 }, { "epoch": 2.6721060683324835, "grad_norm": 12.160969734191895, "learning_rate": 2.887982321944586e-05, "loss": 2.806, "step": 15720 }, { "epoch": 2.673805881353051, "grad_norm": 11.37773323059082, "learning_rate": 2.8876990197744918e-05, "loss": 2.9601, "step": 15730 }, { "epoch": 2.675505694373619, "grad_norm": 13.506568908691406, "learning_rate": 2.8874157176043968e-05, "loss": 2.8927, "step": 15740 }, { "epoch": 2.6772055073941865, "grad_norm": 11.044944763183594, "learning_rate": 2.887132415434302e-05, "loss": 2.7521, "step": 15750 }, { "epoch": 2.6789053204147546, "grad_norm": 21.964590072631836, "learning_rate": 2.886849113264208e-05, "loss": 2.8649, "step": 15760 }, { "epoch": 2.6806051334353223, "grad_norm": 32.65195083618164, "learning_rate": 2.8865658110941132e-05, "loss": 2.5443, "step": 15770 }, { "epoch": 2.68230494645589, "grad_norm": 10.580744743347168, "learning_rate": 2.8862825089240182e-05, "loss": 2.871, "step": 15780 }, { "epoch": 2.6840047594764576, "grad_norm": 12.477234840393066, "learning_rate": 2.885999206753924e-05, "loss": 2.9758, "step": 15790 }, { "epoch": 2.685704572497025, "grad_norm": 12.054101943969727, "learning_rate": 2.8857159045838292e-05, "loss": 2.7697, "step": 15800 }, { "epoch": 2.687404385517593, "grad_norm": 11.658247947692871, "learning_rate": 2.8854326024137343e-05, "loss": 2.9711, "step": 15810 }, { "epoch": 2.689104198538161, "grad_norm": 7.931090354919434, "learning_rate": 2.88514930024364e-05, "loss": 2.7452, "step": 15820 }, { "epoch": 2.6908040115587286, "grad_norm": 12.757265090942383, "learning_rate": 2.8848659980735453e-05, "loss": 2.8762, "step": 15830 }, { "epoch": 2.6925038245792963, "grad_norm": 14.50382137298584, "learning_rate": 2.8845826959034507e-05, "loss": 2.7683, "step": 15840 }, { "epoch": 2.694203637599864, "grad_norm": 14.683700561523438, "learning_rate": 2.884299393733356e-05, "loss": 2.8357, "step": 15850 }, { "epoch": 2.695903450620432, "grad_norm": 13.572397232055664, "learning_rate": 2.8840160915632614e-05, "loss": 2.519, "step": 15860 }, { "epoch": 2.6976032636409997, "grad_norm": 12.335494995117188, "learning_rate": 2.8837327893931667e-05, "loss": 2.7949, "step": 15870 }, { "epoch": 2.6993030766615673, "grad_norm": 8.309657096862793, "learning_rate": 2.8834494872230724e-05, "loss": 2.6517, "step": 15880 }, { "epoch": 2.701002889682135, "grad_norm": 14.911039352416992, "learning_rate": 2.8831661850529774e-05, "loss": 2.8813, "step": 15890 }, { "epoch": 2.7027027027027026, "grad_norm": 17.246809005737305, "learning_rate": 2.8828828828828828e-05, "loss": 2.5125, "step": 15900 }, { "epoch": 2.7044025157232703, "grad_norm": 10.117753982543945, "learning_rate": 2.8825995807127885e-05, "loss": 2.7099, "step": 15910 }, { "epoch": 2.706102328743838, "grad_norm": 14.5825834274292, "learning_rate": 2.8823162785426938e-05, "loss": 2.8316, "step": 15920 }, { "epoch": 2.707802141764406, "grad_norm": 19.04536247253418, "learning_rate": 2.882032976372599e-05, "loss": 2.6693, "step": 15930 }, { "epoch": 2.7095019547849737, "grad_norm": 11.582018852233887, "learning_rate": 2.8817496742025045e-05, "loss": 2.729, "step": 15940 }, { "epoch": 2.7112017678055413, "grad_norm": 11.946005821228027, "learning_rate": 2.88146637203241e-05, "loss": 3.0645, "step": 15950 }, { "epoch": 2.712901580826109, "grad_norm": 12.80201244354248, "learning_rate": 2.8811830698623152e-05, "loss": 2.3152, "step": 15960 }, { "epoch": 2.714601393846677, "grad_norm": 15.092326164245605, "learning_rate": 2.8808997676922206e-05, "loss": 2.8812, "step": 15970 }, { "epoch": 2.7163012068672447, "grad_norm": 20.004053115844727, "learning_rate": 2.880616465522126e-05, "loss": 2.8861, "step": 15980 }, { "epoch": 2.7180010198878124, "grad_norm": 14.334980010986328, "learning_rate": 2.8803331633520316e-05, "loss": 2.6828, "step": 15990 }, { "epoch": 2.71970083290838, "grad_norm": 11.706005096435547, "learning_rate": 2.8800498611819366e-05, "loss": 2.816, "step": 16000 }, { "epoch": 2.7214006459289477, "grad_norm": 15.17907428741455, "learning_rate": 2.879766559011842e-05, "loss": 2.6396, "step": 16010 }, { "epoch": 2.7231004589495154, "grad_norm": 18.56740951538086, "learning_rate": 2.8794832568417477e-05, "loss": 2.7229, "step": 16020 }, { "epoch": 2.7248002719700835, "grad_norm": 13.411874771118164, "learning_rate": 2.879199954671653e-05, "loss": 2.8091, "step": 16030 }, { "epoch": 2.726500084990651, "grad_norm": 48.96397018432617, "learning_rate": 2.878916652501558e-05, "loss": 2.7964, "step": 16040 }, { "epoch": 2.7281998980112188, "grad_norm": 16.312105178833008, "learning_rate": 2.8786333503314637e-05, "loss": 2.6406, "step": 16050 }, { "epoch": 2.7298997110317864, "grad_norm": 13.806829452514648, "learning_rate": 2.878350048161369e-05, "loss": 2.7733, "step": 16060 }, { "epoch": 2.7315995240523545, "grad_norm": 12.021053314208984, "learning_rate": 2.8780667459912744e-05, "loss": 2.8165, "step": 16070 }, { "epoch": 2.733299337072922, "grad_norm": 20.696191787719727, "learning_rate": 2.8777834438211798e-05, "loss": 2.8119, "step": 16080 }, { "epoch": 2.73499915009349, "grad_norm": 20.91849136352539, "learning_rate": 2.877500141651085e-05, "loss": 2.5261, "step": 16090 }, { "epoch": 2.7366989631140575, "grad_norm": 14.927297592163086, "learning_rate": 2.8772168394809905e-05, "loss": 2.6605, "step": 16100 }, { "epoch": 2.738398776134625, "grad_norm": 13.946223258972168, "learning_rate": 2.8769335373108962e-05, "loss": 2.8848, "step": 16110 }, { "epoch": 2.7400985891551928, "grad_norm": 11.487722396850586, "learning_rate": 2.8766502351408012e-05, "loss": 2.9153, "step": 16120 }, { "epoch": 2.7417984021757604, "grad_norm": 14.017670631408691, "learning_rate": 2.8763669329707065e-05, "loss": 2.6114, "step": 16130 }, { "epoch": 2.7434982151963285, "grad_norm": 12.269607543945312, "learning_rate": 2.8760836308006122e-05, "loss": 2.9128, "step": 16140 }, { "epoch": 2.745198028216896, "grad_norm": 11.884027481079102, "learning_rate": 2.8758003286305172e-05, "loss": 2.86, "step": 16150 }, { "epoch": 2.746897841237464, "grad_norm": 13.846290588378906, "learning_rate": 2.8755170264604226e-05, "loss": 2.7431, "step": 16160 }, { "epoch": 2.7485976542580315, "grad_norm": 14.165824890136719, "learning_rate": 2.8752337242903283e-05, "loss": 2.8408, "step": 16170 }, { "epoch": 2.7502974672785996, "grad_norm": 16.20914649963379, "learning_rate": 2.8749504221202336e-05, "loss": 2.6577, "step": 16180 }, { "epoch": 2.7519972802991672, "grad_norm": 17.47696876525879, "learning_rate": 2.8746671199501387e-05, "loss": 2.9257, "step": 16190 }, { "epoch": 2.753697093319735, "grad_norm": 10.191811561584473, "learning_rate": 2.8743838177800443e-05, "loss": 2.4183, "step": 16200 }, { "epoch": 2.7553969063403025, "grad_norm": 15.531316757202148, "learning_rate": 2.8741005156099497e-05, "loss": 2.8873, "step": 16210 }, { "epoch": 2.75709671936087, "grad_norm": 14.913631439208984, "learning_rate": 2.873817213439855e-05, "loss": 2.606, "step": 16220 }, { "epoch": 2.758796532381438, "grad_norm": 12.55640983581543, "learning_rate": 2.8735339112697604e-05, "loss": 2.5672, "step": 16230 }, { "epoch": 2.7604963454020055, "grad_norm": 11.368587493896484, "learning_rate": 2.8732506090996658e-05, "loss": 2.8402, "step": 16240 }, { "epoch": 2.7621961584225736, "grad_norm": 15.936051368713379, "learning_rate": 2.872967306929571e-05, "loss": 2.7245, "step": 16250 }, { "epoch": 2.7638959714431413, "grad_norm": 14.614996910095215, "learning_rate": 2.8726840047594768e-05, "loss": 2.7223, "step": 16260 }, { "epoch": 2.765595784463709, "grad_norm": 17.26681137084961, "learning_rate": 2.8724007025893818e-05, "loss": 2.7429, "step": 16270 }, { "epoch": 2.767295597484277, "grad_norm": 9.911303520202637, "learning_rate": 2.872117400419287e-05, "loss": 2.4554, "step": 16280 }, { "epoch": 2.7689954105048447, "grad_norm": 12.456637382507324, "learning_rate": 2.871834098249193e-05, "loss": 2.8545, "step": 16290 }, { "epoch": 2.7706952235254123, "grad_norm": 15.595967292785645, "learning_rate": 2.871550796079098e-05, "loss": 2.7552, "step": 16300 }, { "epoch": 2.77239503654598, "grad_norm": 12.553180694580078, "learning_rate": 2.8712674939090032e-05, "loss": 2.821, "step": 16310 }, { "epoch": 2.7740948495665476, "grad_norm": 7.944934844970703, "learning_rate": 2.870984191738909e-05, "loss": 2.8966, "step": 16320 }, { "epoch": 2.7757946625871153, "grad_norm": 13.280707359313965, "learning_rate": 2.8707008895688143e-05, "loss": 2.8412, "step": 16330 }, { "epoch": 2.777494475607683, "grad_norm": 13.754220962524414, "learning_rate": 2.8704175873987193e-05, "loss": 2.9241, "step": 16340 }, { "epoch": 2.779194288628251, "grad_norm": 10.269477844238281, "learning_rate": 2.870134285228625e-05, "loss": 2.7798, "step": 16350 }, { "epoch": 2.7808941016488187, "grad_norm": 10.21303653717041, "learning_rate": 2.8698509830585303e-05, "loss": 2.7638, "step": 16360 }, { "epoch": 2.7825939146693863, "grad_norm": 9.385387420654297, "learning_rate": 2.8695676808884357e-05, "loss": 2.5741, "step": 16370 }, { "epoch": 2.784293727689954, "grad_norm": 15.679076194763184, "learning_rate": 2.869284378718341e-05, "loss": 2.8182, "step": 16380 }, { "epoch": 2.785993540710522, "grad_norm": 9.848913192749023, "learning_rate": 2.8690010765482464e-05, "loss": 2.6527, "step": 16390 }, { "epoch": 2.7876933537310897, "grad_norm": 10.618515014648438, "learning_rate": 2.8687177743781517e-05, "loss": 2.5328, "step": 16400 }, { "epoch": 2.7893931667516574, "grad_norm": 13.602495193481445, "learning_rate": 2.8684344722080574e-05, "loss": 2.9776, "step": 16410 }, { "epoch": 2.791092979772225, "grad_norm": 13.935531616210938, "learning_rate": 2.8681511700379624e-05, "loss": 2.6589, "step": 16420 }, { "epoch": 2.7927927927927927, "grad_norm": 14.173949241638184, "learning_rate": 2.8678678678678678e-05, "loss": 2.7182, "step": 16430 }, { "epoch": 2.7944926058133603, "grad_norm": 12.830388069152832, "learning_rate": 2.8675845656977735e-05, "loss": 2.8028, "step": 16440 }, { "epoch": 2.796192418833928, "grad_norm": 11.526257514953613, "learning_rate": 2.8673012635276788e-05, "loss": 2.7658, "step": 16450 }, { "epoch": 2.797892231854496, "grad_norm": 10.906553268432617, "learning_rate": 2.867017961357584e-05, "loss": 2.9058, "step": 16460 }, { "epoch": 2.7995920448750637, "grad_norm": 10.394835472106934, "learning_rate": 2.8667346591874895e-05, "loss": 2.9599, "step": 16470 }, { "epoch": 2.8012918578956314, "grad_norm": 17.589014053344727, "learning_rate": 2.866451357017395e-05, "loss": 2.7752, "step": 16480 }, { "epoch": 2.802991670916199, "grad_norm": 21.779001235961914, "learning_rate": 2.8661680548473e-05, "loss": 2.961, "step": 16490 }, { "epoch": 2.804691483936767, "grad_norm": 14.868515968322754, "learning_rate": 2.8658847526772056e-05, "loss": 2.7508, "step": 16500 }, { "epoch": 2.806391296957335, "grad_norm": 15.76915454864502, "learning_rate": 2.865601450507111e-05, "loss": 2.7547, "step": 16510 }, { "epoch": 2.8080911099779025, "grad_norm": 12.315926551818848, "learning_rate": 2.8653181483370163e-05, "loss": 2.9375, "step": 16520 }, { "epoch": 2.80979092299847, "grad_norm": 12.550084114074707, "learning_rate": 2.8650348461669216e-05, "loss": 2.7194, "step": 16530 }, { "epoch": 2.8114907360190378, "grad_norm": 13.733428955078125, "learning_rate": 2.864751543996827e-05, "loss": 2.7525, "step": 16540 }, { "epoch": 2.8131905490396054, "grad_norm": 10.573352813720703, "learning_rate": 2.8644682418267324e-05, "loss": 2.8359, "step": 16550 }, { "epoch": 2.8148903620601735, "grad_norm": 13.854256629943848, "learning_rate": 2.864184939656638e-05, "loss": 2.578, "step": 16560 }, { "epoch": 2.816590175080741, "grad_norm": 11.894034385681152, "learning_rate": 2.863901637486543e-05, "loss": 2.606, "step": 16570 }, { "epoch": 2.818289988101309, "grad_norm": 17.31732749938965, "learning_rate": 2.8636183353164484e-05, "loss": 2.7342, "step": 16580 }, { "epoch": 2.8199898011218765, "grad_norm": 10.895216941833496, "learning_rate": 2.863335033146354e-05, "loss": 2.7663, "step": 16590 }, { "epoch": 2.8216896141424446, "grad_norm": 16.875749588012695, "learning_rate": 2.8630517309762595e-05, "loss": 2.7139, "step": 16600 }, { "epoch": 2.823389427163012, "grad_norm": 23.40958023071289, "learning_rate": 2.8627684288061645e-05, "loss": 2.705, "step": 16610 }, { "epoch": 2.82508924018358, "grad_norm": 10.868183135986328, "learning_rate": 2.86248512663607e-05, "loss": 2.9003, "step": 16620 }, { "epoch": 2.8267890532041475, "grad_norm": 13.735189437866211, "learning_rate": 2.8622018244659755e-05, "loss": 2.5167, "step": 16630 }, { "epoch": 2.828488866224715, "grad_norm": 19.563745498657227, "learning_rate": 2.861918522295881e-05, "loss": 2.5962, "step": 16640 }, { "epoch": 2.830188679245283, "grad_norm": 13.512946128845215, "learning_rate": 2.8616352201257862e-05, "loss": 2.4323, "step": 16650 }, { "epoch": 2.8318884922658505, "grad_norm": 9.571099281311035, "learning_rate": 2.8613519179556916e-05, "loss": 2.7377, "step": 16660 }, { "epoch": 2.8335883052864186, "grad_norm": 11.738482475280762, "learning_rate": 2.8610686157855973e-05, "loss": 2.7413, "step": 16670 }, { "epoch": 2.8352881183069862, "grad_norm": 10.31972599029541, "learning_rate": 2.8607853136155023e-05, "loss": 2.6725, "step": 16680 }, { "epoch": 2.836987931327554, "grad_norm": 18.105436325073242, "learning_rate": 2.8605020114454076e-05, "loss": 2.8055, "step": 16690 }, { "epoch": 2.8386877443481215, "grad_norm": 13.007699966430664, "learning_rate": 2.8602187092753133e-05, "loss": 2.8742, "step": 16700 }, { "epoch": 2.8403875573686896, "grad_norm": 13.742009162902832, "learning_rate": 2.8599354071052187e-05, "loss": 2.9718, "step": 16710 }, { "epoch": 2.8420873703892573, "grad_norm": 23.100309371948242, "learning_rate": 2.8596521049351237e-05, "loss": 2.8293, "step": 16720 }, { "epoch": 2.843787183409825, "grad_norm": 13.903480529785156, "learning_rate": 2.8593688027650294e-05, "loss": 2.6936, "step": 16730 }, { "epoch": 2.8454869964303926, "grad_norm": 20.720117568969727, "learning_rate": 2.8590855005949347e-05, "loss": 2.7281, "step": 16740 }, { "epoch": 2.8471868094509603, "grad_norm": 9.757444381713867, "learning_rate": 2.85880219842484e-05, "loss": 2.8452, "step": 16750 }, { "epoch": 2.848886622471528, "grad_norm": 15.566572189331055, "learning_rate": 2.8585188962547454e-05, "loss": 2.6488, "step": 16760 }, { "epoch": 2.850586435492096, "grad_norm": 12.645938873291016, "learning_rate": 2.8582355940846508e-05, "loss": 2.6136, "step": 16770 }, { "epoch": 2.8522862485126637, "grad_norm": 12.666802406311035, "learning_rate": 2.857952291914556e-05, "loss": 2.7193, "step": 16780 }, { "epoch": 2.8539860615332313, "grad_norm": 14.974541664123535, "learning_rate": 2.8576689897444618e-05, "loss": 2.8867, "step": 16790 }, { "epoch": 2.855685874553799, "grad_norm": 9.867441177368164, "learning_rate": 2.857385687574367e-05, "loss": 2.8232, "step": 16800 }, { "epoch": 2.857385687574367, "grad_norm": 10.934699058532715, "learning_rate": 2.8571023854042722e-05, "loss": 2.8525, "step": 16810 }, { "epoch": 2.8590855005949347, "grad_norm": 8.675003051757812, "learning_rate": 2.856819083234178e-05, "loss": 2.921, "step": 16820 }, { "epoch": 2.8607853136155024, "grad_norm": 8.245339393615723, "learning_rate": 2.856535781064083e-05, "loss": 2.7014, "step": 16830 }, { "epoch": 2.86248512663607, "grad_norm": 11.899715423583984, "learning_rate": 2.8562524788939882e-05, "loss": 2.7578, "step": 16840 }, { "epoch": 2.8641849396566377, "grad_norm": 10.684896469116211, "learning_rate": 2.855969176723894e-05, "loss": 2.9002, "step": 16850 }, { "epoch": 2.8658847526772053, "grad_norm": 13.697254180908203, "learning_rate": 2.8556858745537993e-05, "loss": 2.5716, "step": 16860 }, { "epoch": 2.867584565697773, "grad_norm": 11.468740463256836, "learning_rate": 2.8554025723837043e-05, "loss": 2.5108, "step": 16870 }, { "epoch": 2.869284378718341, "grad_norm": 17.409488677978516, "learning_rate": 2.85511927021361e-05, "loss": 2.9821, "step": 16880 }, { "epoch": 2.8709841917389087, "grad_norm": 9.801129341125488, "learning_rate": 2.8548359680435153e-05, "loss": 2.5621, "step": 16890 }, { "epoch": 2.8726840047594764, "grad_norm": 12.777430534362793, "learning_rate": 2.8545526658734207e-05, "loss": 2.8359, "step": 16900 }, { "epoch": 2.874383817780044, "grad_norm": 15.956353187561035, "learning_rate": 2.854269363703326e-05, "loss": 2.9506, "step": 16910 }, { "epoch": 2.876083630800612, "grad_norm": 19.108848571777344, "learning_rate": 2.8539860615332314e-05, "loss": 2.7765, "step": 16920 }, { "epoch": 2.87778344382118, "grad_norm": 9.955090522766113, "learning_rate": 2.8537027593631367e-05, "loss": 2.5924, "step": 16930 }, { "epoch": 2.8794832568417474, "grad_norm": 21.250228881835938, "learning_rate": 2.8534194571930424e-05, "loss": 2.8185, "step": 16940 }, { "epoch": 2.881183069862315, "grad_norm": 11.907524108886719, "learning_rate": 2.8531361550229475e-05, "loss": 2.7449, "step": 16950 }, { "epoch": 2.8828828828828827, "grad_norm": 13.655138969421387, "learning_rate": 2.8528528528528528e-05, "loss": 2.7446, "step": 16960 }, { "epoch": 2.8845826959034504, "grad_norm": 17.330461502075195, "learning_rate": 2.8525695506827585e-05, "loss": 2.6761, "step": 16970 }, { "epoch": 2.8862825089240185, "grad_norm": 13.43913745880127, "learning_rate": 2.852286248512664e-05, "loss": 2.6031, "step": 16980 }, { "epoch": 2.887982321944586, "grad_norm": 8.561664581298828, "learning_rate": 2.852002946342569e-05, "loss": 2.9594, "step": 16990 }, { "epoch": 2.889682134965154, "grad_norm": 8.831459999084473, "learning_rate": 2.8517196441724746e-05, "loss": 2.6785, "step": 17000 }, { "epoch": 2.8913819479857215, "grad_norm": 8.302952766418457, "learning_rate": 2.85143634200238e-05, "loss": 2.8913, "step": 17010 }, { "epoch": 2.8930817610062896, "grad_norm": 16.493562698364258, "learning_rate": 2.851153039832285e-05, "loss": 2.768, "step": 17020 }, { "epoch": 2.894781574026857, "grad_norm": 18.8092098236084, "learning_rate": 2.8508697376621906e-05, "loss": 2.9697, "step": 17030 }, { "epoch": 2.896481387047425, "grad_norm": 26.17414093017578, "learning_rate": 2.850586435492096e-05, "loss": 2.8126, "step": 17040 }, { "epoch": 2.8981812000679925, "grad_norm": 13.431203842163086, "learning_rate": 2.8503031333220013e-05, "loss": 2.884, "step": 17050 }, { "epoch": 2.89988101308856, "grad_norm": 17.15241813659668, "learning_rate": 2.8500198311519067e-05, "loss": 2.6889, "step": 17060 }, { "epoch": 2.901580826109128, "grad_norm": 13.865191459655762, "learning_rate": 2.849736528981812e-05, "loss": 2.8479, "step": 17070 }, { "epoch": 2.9032806391296955, "grad_norm": 18.616567611694336, "learning_rate": 2.8494532268117174e-05, "loss": 2.8848, "step": 17080 }, { "epoch": 2.9049804521502636, "grad_norm": 26.50035285949707, "learning_rate": 2.849169924641623e-05, "loss": 2.6122, "step": 17090 }, { "epoch": 2.9066802651708312, "grad_norm": 16.0516414642334, "learning_rate": 2.848886622471528e-05, "loss": 2.6432, "step": 17100 }, { "epoch": 2.908380078191399, "grad_norm": 13.082794189453125, "learning_rate": 2.8486033203014334e-05, "loss": 2.8232, "step": 17110 }, { "epoch": 2.9100798912119665, "grad_norm": 40.25065994262695, "learning_rate": 2.848320018131339e-05, "loss": 2.7981, "step": 17120 }, { "epoch": 2.9117797042325346, "grad_norm": 8.122076988220215, "learning_rate": 2.8480367159612445e-05, "loss": 2.726, "step": 17130 }, { "epoch": 2.9134795172531023, "grad_norm": 15.702239036560059, "learning_rate": 2.8477534137911495e-05, "loss": 2.7887, "step": 17140 }, { "epoch": 2.91517933027367, "grad_norm": 12.543558120727539, "learning_rate": 2.8474701116210552e-05, "loss": 2.6436, "step": 17150 }, { "epoch": 2.9168791432942376, "grad_norm": 19.48953628540039, "learning_rate": 2.8471868094509605e-05, "loss": 2.6882, "step": 17160 }, { "epoch": 2.9185789563148052, "grad_norm": 12.257553100585938, "learning_rate": 2.8469035072808655e-05, "loss": 2.8709, "step": 17170 }, { "epoch": 2.920278769335373, "grad_norm": 17.737577438354492, "learning_rate": 2.8466202051107712e-05, "loss": 2.5382, "step": 17180 }, { "epoch": 2.921978582355941, "grad_norm": 41.558311462402344, "learning_rate": 2.8463369029406766e-05, "loss": 2.6663, "step": 17190 }, { "epoch": 2.9236783953765086, "grad_norm": 20.28525733947754, "learning_rate": 2.846053600770582e-05, "loss": 2.7178, "step": 17200 }, { "epoch": 2.9253782083970763, "grad_norm": 11.744444847106934, "learning_rate": 2.8457702986004873e-05, "loss": 2.7517, "step": 17210 }, { "epoch": 2.927078021417644, "grad_norm": 13.137141227722168, "learning_rate": 2.8454869964303926e-05, "loss": 2.9128, "step": 17220 }, { "epoch": 2.928777834438212, "grad_norm": 10.2074613571167, "learning_rate": 2.845203694260298e-05, "loss": 2.6926, "step": 17230 }, { "epoch": 2.9304776474587797, "grad_norm": 28.9866886138916, "learning_rate": 2.8449203920902037e-05, "loss": 2.7695, "step": 17240 }, { "epoch": 2.9321774604793474, "grad_norm": 12.17042064666748, "learning_rate": 2.8446370899201087e-05, "loss": 2.9827, "step": 17250 }, { "epoch": 2.933877273499915, "grad_norm": 14.732719421386719, "learning_rate": 2.844353787750014e-05, "loss": 2.6977, "step": 17260 }, { "epoch": 2.9355770865204827, "grad_norm": 13.628813743591309, "learning_rate": 2.8440704855799197e-05, "loss": 2.7878, "step": 17270 }, { "epoch": 2.9372768995410503, "grad_norm": 13.665791511535645, "learning_rate": 2.843787183409825e-05, "loss": 2.8094, "step": 17280 }, { "epoch": 2.938976712561618, "grad_norm": 9.55314826965332, "learning_rate": 2.84350388123973e-05, "loss": 2.7402, "step": 17290 }, { "epoch": 2.940676525582186, "grad_norm": 18.210834503173828, "learning_rate": 2.8432205790696358e-05, "loss": 2.7584, "step": 17300 }, { "epoch": 2.9423763386027537, "grad_norm": 13.318185806274414, "learning_rate": 2.842937276899541e-05, "loss": 2.8097, "step": 17310 }, { "epoch": 2.9440761516233214, "grad_norm": 8.078121185302734, "learning_rate": 2.8426539747294465e-05, "loss": 2.8919, "step": 17320 }, { "epoch": 2.945775964643889, "grad_norm": 12.787391662597656, "learning_rate": 2.842370672559352e-05, "loss": 2.7217, "step": 17330 }, { "epoch": 2.947475777664457, "grad_norm": 8.066301345825195, "learning_rate": 2.8420873703892572e-05, "loss": 2.7394, "step": 17340 }, { "epoch": 2.9491755906850248, "grad_norm": 13.947998046875, "learning_rate": 2.8418040682191626e-05, "loss": 2.4612, "step": 17350 }, { "epoch": 2.9508754037055924, "grad_norm": 23.37230682373047, "learning_rate": 2.841520766049068e-05, "loss": 2.7866, "step": 17360 }, { "epoch": 2.95257521672616, "grad_norm": 10.061360359191895, "learning_rate": 2.8412374638789733e-05, "loss": 2.6752, "step": 17370 }, { "epoch": 2.9542750297467277, "grad_norm": 20.476701736450195, "learning_rate": 2.8409541617088786e-05, "loss": 2.8052, "step": 17380 }, { "epoch": 2.9559748427672954, "grad_norm": 12.361953735351562, "learning_rate": 2.8406708595387843e-05, "loss": 2.6332, "step": 17390 }, { "epoch": 2.9576746557878635, "grad_norm": 14.232239723205566, "learning_rate": 2.8403875573686893e-05, "loss": 2.6807, "step": 17400 }, { "epoch": 2.959374468808431, "grad_norm": 15.677905082702637, "learning_rate": 2.840104255198595e-05, "loss": 2.7869, "step": 17410 }, { "epoch": 2.961074281828999, "grad_norm": 27.745607376098633, "learning_rate": 2.8398209530285004e-05, "loss": 2.7454, "step": 17420 }, { "epoch": 2.9627740948495664, "grad_norm": 20.698274612426758, "learning_rate": 2.8395376508584057e-05, "loss": 2.5761, "step": 17430 }, { "epoch": 2.9644739078701345, "grad_norm": 14.664587020874023, "learning_rate": 2.839254348688311e-05, "loss": 2.9742, "step": 17440 }, { "epoch": 2.966173720890702, "grad_norm": 21.283777236938477, "learning_rate": 2.8389710465182164e-05, "loss": 2.7902, "step": 17450 }, { "epoch": 2.96787353391127, "grad_norm": 8.326885223388672, "learning_rate": 2.8386877443481218e-05, "loss": 2.7857, "step": 17460 }, { "epoch": 2.9695733469318375, "grad_norm": 120.24752807617188, "learning_rate": 2.8384044421780275e-05, "loss": 2.6754, "step": 17470 }, { "epoch": 2.971273159952405, "grad_norm": 15.73685073852539, "learning_rate": 2.8381211400079325e-05, "loss": 2.7919, "step": 17480 }, { "epoch": 2.972972972972973, "grad_norm": 12.222221374511719, "learning_rate": 2.8378378378378378e-05, "loss": 2.7574, "step": 17490 }, { "epoch": 2.9746727859935405, "grad_norm": 13.957406044006348, "learning_rate": 2.8375545356677435e-05, "loss": 2.8282, "step": 17500 }, { "epoch": 2.9763725990141086, "grad_norm": 15.21127986907959, "learning_rate": 2.8372712334976485e-05, "loss": 2.6057, "step": 17510 }, { "epoch": 2.978072412034676, "grad_norm": 15.818826675415039, "learning_rate": 2.836987931327554e-05, "loss": 2.6939, "step": 17520 }, { "epoch": 2.979772225055244, "grad_norm": 18.877592086791992, "learning_rate": 2.8367046291574596e-05, "loss": 2.6489, "step": 17530 }, { "epoch": 2.9814720380758115, "grad_norm": 19.9813175201416, "learning_rate": 2.836421326987365e-05, "loss": 2.9562, "step": 17540 }, { "epoch": 2.9831718510963796, "grad_norm": 12.806788444519043, "learning_rate": 2.83613802481727e-05, "loss": 2.7764, "step": 17550 }, { "epoch": 2.9848716641169473, "grad_norm": 10.421432495117188, "learning_rate": 2.8358547226471756e-05, "loss": 2.7264, "step": 17560 }, { "epoch": 2.986571477137515, "grad_norm": 20.780668258666992, "learning_rate": 2.835571420477081e-05, "loss": 2.5395, "step": 17570 }, { "epoch": 2.9882712901580826, "grad_norm": 17.732887268066406, "learning_rate": 2.8352881183069863e-05, "loss": 2.8405, "step": 17580 }, { "epoch": 2.9899711031786502, "grad_norm": 9.881047248840332, "learning_rate": 2.8350048161368917e-05, "loss": 2.8976, "step": 17590 }, { "epoch": 2.991670916199218, "grad_norm": 19.34345817565918, "learning_rate": 2.834721513966797e-05, "loss": 2.7159, "step": 17600 }, { "epoch": 2.993370729219786, "grad_norm": 12.467839241027832, "learning_rate": 2.8344382117967024e-05, "loss": 2.865, "step": 17610 }, { "epoch": 2.9950705422403536, "grad_norm": 14.320940971374512, "learning_rate": 2.834154909626608e-05, "loss": 2.8112, "step": 17620 }, { "epoch": 2.9967703552609213, "grad_norm": 18.489004135131836, "learning_rate": 2.833871607456513e-05, "loss": 2.4055, "step": 17630 }, { "epoch": 2.998470168281489, "grad_norm": 10.95834732055664, "learning_rate": 2.8335883052864184e-05, "loss": 2.9587, "step": 17640 }, { "epoch": 3.0, "eval_cer": 1.0, "eval_loss": 3.0626986026763916, "eval_runtime": 1964.8324, "eval_samples_per_second": 0.24, "eval_steps_per_second": 0.24, "step": 17649 }, { "epoch": 3.0001699813020566, "grad_norm": 11.960775375366211, "learning_rate": 2.833305003116324e-05, "loss": 2.8729, "step": 17650 }, { "epoch": 3.0018697943226247, "grad_norm": 18.382959365844727, "learning_rate": 2.8330217009462295e-05, "loss": 2.6096, "step": 17660 }, { "epoch": 3.0035696073431923, "grad_norm": 19.13520050048828, "learning_rate": 2.8327383987761345e-05, "loss": 2.6607, "step": 17670 }, { "epoch": 3.00526942036376, "grad_norm": 12.5165433883667, "learning_rate": 2.8324550966060402e-05, "loss": 2.6506, "step": 17680 }, { "epoch": 3.0069692333843276, "grad_norm": 11.825764656066895, "learning_rate": 2.8321717944359455e-05, "loss": 2.6856, "step": 17690 }, { "epoch": 3.0086690464048953, "grad_norm": 18.700942993164062, "learning_rate": 2.8318884922658506e-05, "loss": 2.6177, "step": 17700 }, { "epoch": 3.0103688594254634, "grad_norm": 12.335508346557617, "learning_rate": 2.8316051900957563e-05, "loss": 2.457, "step": 17710 }, { "epoch": 3.012068672446031, "grad_norm": 15.803837776184082, "learning_rate": 2.8313218879256616e-05, "loss": 2.4457, "step": 17720 }, { "epoch": 3.0137684854665987, "grad_norm": 20.033796310424805, "learning_rate": 2.831038585755567e-05, "loss": 2.5378, "step": 17730 }, { "epoch": 3.0154682984871664, "grad_norm": 16.347166061401367, "learning_rate": 2.8307552835854723e-05, "loss": 2.3706, "step": 17740 }, { "epoch": 3.017168111507734, "grad_norm": 16.90055274963379, "learning_rate": 2.8304719814153777e-05, "loss": 2.6214, "step": 17750 }, { "epoch": 3.018867924528302, "grad_norm": 10.944138526916504, "learning_rate": 2.830188679245283e-05, "loss": 2.6892, "step": 17760 }, { "epoch": 3.0205677375488698, "grad_norm": 19.659269332885742, "learning_rate": 2.8299053770751887e-05, "loss": 2.6472, "step": 17770 }, { "epoch": 3.0222675505694374, "grad_norm": 13.537118911743164, "learning_rate": 2.8296220749050937e-05, "loss": 2.6142, "step": 17780 }, { "epoch": 3.023967363590005, "grad_norm": 14.245484352111816, "learning_rate": 2.829338772734999e-05, "loss": 2.4066, "step": 17790 }, { "epoch": 3.0256671766105727, "grad_norm": 12.913665771484375, "learning_rate": 2.8290554705649048e-05, "loss": 2.6349, "step": 17800 }, { "epoch": 3.0273669896311404, "grad_norm": 17.097455978393555, "learning_rate": 2.82877216839481e-05, "loss": 2.6112, "step": 17810 }, { "epoch": 3.0290668026517085, "grad_norm": 10.17308521270752, "learning_rate": 2.828488866224715e-05, "loss": 2.7577, "step": 17820 }, { "epoch": 3.030766615672276, "grad_norm": 17.82527732849121, "learning_rate": 2.8282055640546208e-05, "loss": 2.6178, "step": 17830 }, { "epoch": 3.0324664286928438, "grad_norm": 14.4093656539917, "learning_rate": 2.8279222618845262e-05, "loss": 2.5296, "step": 17840 }, { "epoch": 3.0341662417134114, "grad_norm": 13.026564598083496, "learning_rate": 2.8276389597144315e-05, "loss": 2.6071, "step": 17850 }, { "epoch": 3.035866054733979, "grad_norm": 15.589499473571777, "learning_rate": 2.827355657544337e-05, "loss": 2.3043, "step": 17860 }, { "epoch": 3.037565867754547, "grad_norm": 11.102391242980957, "learning_rate": 2.8270723553742422e-05, "loss": 2.4842, "step": 17870 }, { "epoch": 3.039265680775115, "grad_norm": 16.27799415588379, "learning_rate": 2.8267890532041476e-05, "loss": 2.5015, "step": 17880 }, { "epoch": 3.0409654937956825, "grad_norm": 13.32157039642334, "learning_rate": 2.826505751034053e-05, "loss": 2.4144, "step": 17890 }, { "epoch": 3.04266530681625, "grad_norm": 14.651309967041016, "learning_rate": 2.8262224488639583e-05, "loss": 2.6494, "step": 17900 }, { "epoch": 3.044365119836818, "grad_norm": 15.366731643676758, "learning_rate": 2.8259391466938636e-05, "loss": 2.531, "step": 17910 }, { "epoch": 3.046064932857386, "grad_norm": 50.80433654785156, "learning_rate": 2.8256558445237693e-05, "loss": 2.4261, "step": 17920 }, { "epoch": 3.0477647458779535, "grad_norm": 13.36491870880127, "learning_rate": 2.8253725423536743e-05, "loss": 2.3595, "step": 17930 }, { "epoch": 3.049464558898521, "grad_norm": 14.305244445800781, "learning_rate": 2.8250892401835797e-05, "loss": 2.6229, "step": 17940 }, { "epoch": 3.051164371919089, "grad_norm": 10.127382278442383, "learning_rate": 2.8248059380134854e-05, "loss": 2.7622, "step": 17950 }, { "epoch": 3.0528641849396565, "grad_norm": 14.266302108764648, "learning_rate": 2.8245226358433907e-05, "loss": 2.5786, "step": 17960 }, { "epoch": 3.0545639979602246, "grad_norm": 13.879210472106934, "learning_rate": 2.8242393336732957e-05, "loss": 2.6901, "step": 17970 }, { "epoch": 3.0562638109807923, "grad_norm": 14.153107643127441, "learning_rate": 2.8239560315032014e-05, "loss": 2.7423, "step": 17980 }, { "epoch": 3.05796362400136, "grad_norm": 13.262338638305664, "learning_rate": 2.8236727293331068e-05, "loss": 2.5019, "step": 17990 }, { "epoch": 3.0596634370219276, "grad_norm": 12.25165843963623, "learning_rate": 2.823389427163012e-05, "loss": 2.7132, "step": 18000 }, { "epoch": 3.061363250042495, "grad_norm": 16.764745712280273, "learning_rate": 2.8231061249929175e-05, "loss": 2.3627, "step": 18010 }, { "epoch": 3.063063063063063, "grad_norm": 12.314844131469727, "learning_rate": 2.822822822822823e-05, "loss": 2.5866, "step": 18020 }, { "epoch": 3.064762876083631, "grad_norm": 20.075586318969727, "learning_rate": 2.8225395206527282e-05, "loss": 2.4793, "step": 18030 }, { "epoch": 3.0664626891041986, "grad_norm": 12.063908576965332, "learning_rate": 2.8222562184826336e-05, "loss": 2.5695, "step": 18040 }, { "epoch": 3.0681625021247663, "grad_norm": 14.587801933288574, "learning_rate": 2.821972916312539e-05, "loss": 2.6383, "step": 18050 }, { "epoch": 3.069862315145334, "grad_norm": 16.202346801757812, "learning_rate": 2.8216896141424443e-05, "loss": 2.3684, "step": 18060 }, { "epoch": 3.0715621281659016, "grad_norm": 21.232540130615234, "learning_rate": 2.82140631197235e-05, "loss": 2.673, "step": 18070 }, { "epoch": 3.0732619411864697, "grad_norm": 10.955320358276367, "learning_rate": 2.821123009802255e-05, "loss": 2.6337, "step": 18080 }, { "epoch": 3.0749617542070373, "grad_norm": 10.452777862548828, "learning_rate": 2.8208397076321603e-05, "loss": 2.5293, "step": 18090 }, { "epoch": 3.076661567227605, "grad_norm": 8.091347694396973, "learning_rate": 2.820556405462066e-05, "loss": 2.7647, "step": 18100 }, { "epoch": 3.0783613802481726, "grad_norm": 13.047364234924316, "learning_rate": 2.8202731032919714e-05, "loss": 2.6046, "step": 18110 }, { "epoch": 3.0800611932687403, "grad_norm": 18.480260848999023, "learning_rate": 2.8199898011218767e-05, "loss": 2.4708, "step": 18120 }, { "epoch": 3.0817610062893084, "grad_norm": 12.680754661560059, "learning_rate": 2.819706498951782e-05, "loss": 2.4848, "step": 18130 }, { "epoch": 3.083460819309876, "grad_norm": 8.14178466796875, "learning_rate": 2.8194231967816874e-05, "loss": 2.3657, "step": 18140 }, { "epoch": 3.0851606323304437, "grad_norm": 19.6214599609375, "learning_rate": 2.819139894611593e-05, "loss": 2.374, "step": 18150 }, { "epoch": 3.0868604453510113, "grad_norm": 88.5333023071289, "learning_rate": 2.818856592441498e-05, "loss": 2.655, "step": 18160 }, { "epoch": 3.088560258371579, "grad_norm": 14.821093559265137, "learning_rate": 2.8185732902714035e-05, "loss": 2.4402, "step": 18170 }, { "epoch": 3.090260071392147, "grad_norm": 14.3113374710083, "learning_rate": 2.818289988101309e-05, "loss": 2.4205, "step": 18180 }, { "epoch": 3.0919598844127147, "grad_norm": 13.385046005249023, "learning_rate": 2.8180066859312145e-05, "loss": 2.5633, "step": 18190 }, { "epoch": 3.0936596974332824, "grad_norm": 12.09650993347168, "learning_rate": 2.8177233837611195e-05, "loss": 2.8125, "step": 18200 }, { "epoch": 3.09535951045385, "grad_norm": 13.876068115234375, "learning_rate": 2.8174400815910252e-05, "loss": 2.5383, "step": 18210 }, { "epoch": 3.0970593234744177, "grad_norm": 17.068199157714844, "learning_rate": 2.8171567794209306e-05, "loss": 2.6839, "step": 18220 }, { "epoch": 3.0987591364949854, "grad_norm": 23.089492797851562, "learning_rate": 2.8168734772508356e-05, "loss": 2.4929, "step": 18230 }, { "epoch": 3.1004589495155535, "grad_norm": 10.830326080322266, "learning_rate": 2.8165901750807413e-05, "loss": 2.678, "step": 18240 }, { "epoch": 3.102158762536121, "grad_norm": 19.599878311157227, "learning_rate": 2.8163068729106466e-05, "loss": 2.4885, "step": 18250 }, { "epoch": 3.1038585755566888, "grad_norm": 12.377781867980957, "learning_rate": 2.816023570740552e-05, "loss": 2.7369, "step": 18260 }, { "epoch": 3.1055583885772564, "grad_norm": 11.76950740814209, "learning_rate": 2.8157402685704573e-05, "loss": 2.798, "step": 18270 }, { "epoch": 3.107258201597824, "grad_norm": 13.996922492980957, "learning_rate": 2.8154569664003627e-05, "loss": 2.3364, "step": 18280 }, { "epoch": 3.108958014618392, "grad_norm": 15.109862327575684, "learning_rate": 2.815173664230268e-05, "loss": 2.3464, "step": 18290 }, { "epoch": 3.11065782763896, "grad_norm": 17.516691207885742, "learning_rate": 2.8148903620601737e-05, "loss": 2.3789, "step": 18300 }, { "epoch": 3.1123576406595275, "grad_norm": 19.186437606811523, "learning_rate": 2.8146070598900787e-05, "loss": 2.4455, "step": 18310 }, { "epoch": 3.114057453680095, "grad_norm": 17.638757705688477, "learning_rate": 2.814323757719984e-05, "loss": 2.5902, "step": 18320 }, { "epoch": 3.1157572667006628, "grad_norm": 10.353005409240723, "learning_rate": 2.8140404555498898e-05, "loss": 2.6049, "step": 18330 }, { "epoch": 3.117457079721231, "grad_norm": 22.720260620117188, "learning_rate": 2.813757153379795e-05, "loss": 2.538, "step": 18340 }, { "epoch": 3.1191568927417985, "grad_norm": 19.533191680908203, "learning_rate": 2.8134738512097e-05, "loss": 2.6817, "step": 18350 }, { "epoch": 3.120856705762366, "grad_norm": 13.397592544555664, "learning_rate": 2.813190549039606e-05, "loss": 2.7985, "step": 18360 }, { "epoch": 3.122556518782934, "grad_norm": 13.252196311950684, "learning_rate": 2.8129072468695112e-05, "loss": 2.649, "step": 18370 }, { "epoch": 3.1242563318035015, "grad_norm": 10.510647773742676, "learning_rate": 2.8126239446994162e-05, "loss": 2.5969, "step": 18380 }, { "epoch": 3.1259561448240696, "grad_norm": 15.01638126373291, "learning_rate": 2.812340642529322e-05, "loss": 2.6636, "step": 18390 }, { "epoch": 3.1276559578446372, "grad_norm": 12.275396347045898, "learning_rate": 2.8120573403592272e-05, "loss": 2.3972, "step": 18400 }, { "epoch": 3.129355770865205, "grad_norm": 31.560585021972656, "learning_rate": 2.8117740381891326e-05, "loss": 2.606, "step": 18410 }, { "epoch": 3.1310555838857725, "grad_norm": 11.178109169006348, "learning_rate": 2.811490736019038e-05, "loss": 2.5613, "step": 18420 }, { "epoch": 3.13275539690634, "grad_norm": 17.630615234375, "learning_rate": 2.8112074338489433e-05, "loss": 2.6156, "step": 18430 }, { "epoch": 3.134455209926908, "grad_norm": 14.215181350708008, "learning_rate": 2.8109241316788487e-05, "loss": 2.6377, "step": 18440 }, { "epoch": 3.136155022947476, "grad_norm": 17.624876022338867, "learning_rate": 2.8106408295087543e-05, "loss": 2.724, "step": 18450 }, { "epoch": 3.1378548359680436, "grad_norm": 13.768404960632324, "learning_rate": 2.8103575273386594e-05, "loss": 2.3577, "step": 18460 }, { "epoch": 3.1395546489886113, "grad_norm": 12.94959545135498, "learning_rate": 2.8100742251685647e-05, "loss": 2.5937, "step": 18470 }, { "epoch": 3.141254462009179, "grad_norm": 13.335476875305176, "learning_rate": 2.8097909229984704e-05, "loss": 2.8398, "step": 18480 }, { "epoch": 3.1429542750297466, "grad_norm": 12.023576736450195, "learning_rate": 2.8095076208283758e-05, "loss": 2.4593, "step": 18490 }, { "epoch": 3.1446540880503147, "grad_norm": 14.270596504211426, "learning_rate": 2.8092243186582808e-05, "loss": 2.5731, "step": 18500 }, { "epoch": 3.1463539010708823, "grad_norm": 12.433355331420898, "learning_rate": 2.8089410164881865e-05, "loss": 2.591, "step": 18510 }, { "epoch": 3.14805371409145, "grad_norm": 12.94585132598877, "learning_rate": 2.8086577143180918e-05, "loss": 2.6879, "step": 18520 }, { "epoch": 3.1497535271120176, "grad_norm": 22.80073356628418, "learning_rate": 2.808374412147997e-05, "loss": 2.2911, "step": 18530 }, { "epoch": 3.1514533401325853, "grad_norm": 10.660249710083008, "learning_rate": 2.8080911099779025e-05, "loss": 2.5889, "step": 18540 }, { "epoch": 3.153153153153153, "grad_norm": 12.588072776794434, "learning_rate": 2.807807807807808e-05, "loss": 2.5532, "step": 18550 }, { "epoch": 3.154852966173721, "grad_norm": 11.19045352935791, "learning_rate": 2.8075245056377132e-05, "loss": 2.6771, "step": 18560 }, { "epoch": 3.1565527791942887, "grad_norm": 14.387115478515625, "learning_rate": 2.8072412034676186e-05, "loss": 2.8182, "step": 18570 }, { "epoch": 3.1582525922148563, "grad_norm": 15.958844184875488, "learning_rate": 2.806957901297524e-05, "loss": 2.6984, "step": 18580 }, { "epoch": 3.159952405235424, "grad_norm": 24.677797317504883, "learning_rate": 2.8066745991274293e-05, "loss": 2.5771, "step": 18590 }, { "epoch": 3.161652218255992, "grad_norm": 17.113595962524414, "learning_rate": 2.806391296957335e-05, "loss": 2.671, "step": 18600 }, { "epoch": 3.1633520312765597, "grad_norm": 15.32148265838623, "learning_rate": 2.80610799478724e-05, "loss": 2.491, "step": 18610 }, { "epoch": 3.1650518442971274, "grad_norm": 9.764541625976562, "learning_rate": 2.8058246926171453e-05, "loss": 2.4665, "step": 18620 }, { "epoch": 3.166751657317695, "grad_norm": 13.52016544342041, "learning_rate": 2.805541390447051e-05, "loss": 2.6198, "step": 18630 }, { "epoch": 3.1684514703382627, "grad_norm": 15.795875549316406, "learning_rate": 2.8052580882769564e-05, "loss": 2.6456, "step": 18640 }, { "epoch": 3.1701512833588303, "grad_norm": 19.911733627319336, "learning_rate": 2.8049747861068614e-05, "loss": 2.443, "step": 18650 }, { "epoch": 3.1718510963793984, "grad_norm": 10.95810317993164, "learning_rate": 2.804691483936767e-05, "loss": 2.6258, "step": 18660 }, { "epoch": 3.173550909399966, "grad_norm": 18.55303192138672, "learning_rate": 2.8044081817666724e-05, "loss": 2.5161, "step": 18670 }, { "epoch": 3.1752507224205337, "grad_norm": 16.863601684570312, "learning_rate": 2.8041248795965778e-05, "loss": 2.5758, "step": 18680 }, { "epoch": 3.1769505354411014, "grad_norm": 20.367435455322266, "learning_rate": 2.803841577426483e-05, "loss": 2.8154, "step": 18690 }, { "epoch": 3.178650348461669, "grad_norm": 12.493614196777344, "learning_rate": 2.8035582752563885e-05, "loss": 2.6882, "step": 18700 }, { "epoch": 3.180350161482237, "grad_norm": 14.103374481201172, "learning_rate": 2.803274973086294e-05, "loss": 2.5073, "step": 18710 }, { "epoch": 3.182049974502805, "grad_norm": 12.509221076965332, "learning_rate": 2.8029916709161992e-05, "loss": 2.5316, "step": 18720 }, { "epoch": 3.1837497875233725, "grad_norm": 12.551146507263184, "learning_rate": 2.8027083687461045e-05, "loss": 2.5104, "step": 18730 }, { "epoch": 3.18544960054394, "grad_norm": 14.723952293395996, "learning_rate": 2.80242506657601e-05, "loss": 2.4464, "step": 18740 }, { "epoch": 3.1871494135645078, "grad_norm": 12.925264358520508, "learning_rate": 2.8021417644059156e-05, "loss": 2.6043, "step": 18750 }, { "epoch": 3.1888492265850754, "grad_norm": 16.673927307128906, "learning_rate": 2.8018584622358206e-05, "loss": 2.6365, "step": 18760 }, { "epoch": 3.1905490396056435, "grad_norm": 11.234399795532227, "learning_rate": 2.801575160065726e-05, "loss": 2.4519, "step": 18770 }, { "epoch": 3.192248852626211, "grad_norm": 14.68161392211914, "learning_rate": 2.8012918578956316e-05, "loss": 2.4528, "step": 18780 }, { "epoch": 3.193948665646779, "grad_norm": 12.272843360900879, "learning_rate": 2.801008555725537e-05, "loss": 2.5996, "step": 18790 }, { "epoch": 3.1956484786673465, "grad_norm": 12.760843276977539, "learning_rate": 2.800725253555442e-05, "loss": 2.7039, "step": 18800 }, { "epoch": 3.197348291687914, "grad_norm": 20.540081024169922, "learning_rate": 2.8004419513853477e-05, "loss": 2.5718, "step": 18810 }, { "epoch": 3.199048104708482, "grad_norm": 11.795619010925293, "learning_rate": 2.800158649215253e-05, "loss": 2.5063, "step": 18820 }, { "epoch": 3.20074791772905, "grad_norm": 19.09040641784668, "learning_rate": 2.7998753470451584e-05, "loss": 2.6935, "step": 18830 }, { "epoch": 3.2024477307496175, "grad_norm": 17.713762283325195, "learning_rate": 2.7995920448750638e-05, "loss": 2.7078, "step": 18840 }, { "epoch": 3.204147543770185, "grad_norm": 11.812003135681152, "learning_rate": 2.799308742704969e-05, "loss": 2.6155, "step": 18850 }, { "epoch": 3.205847356790753, "grad_norm": 11.478965759277344, "learning_rate": 2.7990254405348748e-05, "loss": 2.4686, "step": 18860 }, { "epoch": 3.207547169811321, "grad_norm": 18.453641891479492, "learning_rate": 2.79874213836478e-05, "loss": 2.6355, "step": 18870 }, { "epoch": 3.2092469828318886, "grad_norm": 12.565414428710938, "learning_rate": 2.798458836194685e-05, "loss": 2.6917, "step": 18880 }, { "epoch": 3.2109467958524562, "grad_norm": 12.631887435913086, "learning_rate": 2.798175534024591e-05, "loss": 2.5896, "step": 18890 }, { "epoch": 3.212646608873024, "grad_norm": 10.048651695251465, "learning_rate": 2.7978922318544962e-05, "loss": 2.5815, "step": 18900 }, { "epoch": 3.2143464218935915, "grad_norm": 15.138341903686523, "learning_rate": 2.7976089296844012e-05, "loss": 2.4961, "step": 18910 }, { "epoch": 3.2160462349141596, "grad_norm": 25.508840560913086, "learning_rate": 2.797325627514307e-05, "loss": 2.5455, "step": 18920 }, { "epoch": 3.2177460479347273, "grad_norm": 12.161005973815918, "learning_rate": 2.7970423253442123e-05, "loss": 2.5681, "step": 18930 }, { "epoch": 3.219445860955295, "grad_norm": 13.690650939941406, "learning_rate": 2.7967590231741176e-05, "loss": 2.5838, "step": 18940 }, { "epoch": 3.2211456739758626, "grad_norm": 10.929139137268066, "learning_rate": 2.796475721004023e-05, "loss": 2.6062, "step": 18950 }, { "epoch": 3.2228454869964303, "grad_norm": 13.872018814086914, "learning_rate": 2.7961924188339283e-05, "loss": 2.5141, "step": 18960 }, { "epoch": 3.224545300016998, "grad_norm": 30.550765991210938, "learning_rate": 2.7959091166638337e-05, "loss": 2.4764, "step": 18970 }, { "epoch": 3.226245113037566, "grad_norm": 10.187055587768555, "learning_rate": 2.7956258144937394e-05, "loss": 2.6635, "step": 18980 }, { "epoch": 3.2279449260581337, "grad_norm": 10.699567794799805, "learning_rate": 2.7953425123236444e-05, "loss": 2.4871, "step": 18990 }, { "epoch": 3.2296447390787013, "grad_norm": 17.12593650817871, "learning_rate": 2.7950592101535497e-05, "loss": 2.6177, "step": 19000 }, { "epoch": 3.231344552099269, "grad_norm": 11.129720687866211, "learning_rate": 2.7947759079834554e-05, "loss": 2.6356, "step": 19010 }, { "epoch": 3.2330443651198366, "grad_norm": 22.616043090820312, "learning_rate": 2.7944926058133608e-05, "loss": 2.4137, "step": 19020 }, { "epoch": 3.2347441781404047, "grad_norm": 15.049964904785156, "learning_rate": 2.7942093036432658e-05, "loss": 2.4874, "step": 19030 }, { "epoch": 3.2364439911609724, "grad_norm": 18.636150360107422, "learning_rate": 2.7939260014731715e-05, "loss": 2.6331, "step": 19040 }, { "epoch": 3.23814380418154, "grad_norm": 15.5824613571167, "learning_rate": 2.793642699303077e-05, "loss": 2.4507, "step": 19050 }, { "epoch": 3.2398436172021077, "grad_norm": 11.371658325195312, "learning_rate": 2.7933593971329822e-05, "loss": 2.2848, "step": 19060 }, { "epoch": 3.2415434302226753, "grad_norm": 18.33756446838379, "learning_rate": 2.7930760949628875e-05, "loss": 2.7721, "step": 19070 }, { "epoch": 3.2432432432432434, "grad_norm": 12.06399154663086, "learning_rate": 2.792792792792793e-05, "loss": 2.3267, "step": 19080 }, { "epoch": 3.244943056263811, "grad_norm": 7.672133922576904, "learning_rate": 2.7925094906226982e-05, "loss": 2.2847, "step": 19090 }, { "epoch": 3.2466428692843787, "grad_norm": 15.965909957885742, "learning_rate": 2.7922261884526036e-05, "loss": 2.6232, "step": 19100 }, { "epoch": 3.2483426823049464, "grad_norm": 13.25330638885498, "learning_rate": 2.791942886282509e-05, "loss": 2.5878, "step": 19110 }, { "epoch": 3.250042495325514, "grad_norm": 12.931502342224121, "learning_rate": 2.7916595841124143e-05, "loss": 2.6424, "step": 19120 }, { "epoch": 3.251742308346082, "grad_norm": 9.56619930267334, "learning_rate": 2.79137628194232e-05, "loss": 2.2418, "step": 19130 }, { "epoch": 3.25344212136665, "grad_norm": 14.609004974365234, "learning_rate": 2.791092979772225e-05, "loss": 2.4311, "step": 19140 }, { "epoch": 3.2551419343872174, "grad_norm": 11.048659324645996, "learning_rate": 2.7908096776021304e-05, "loss": 2.7094, "step": 19150 }, { "epoch": 3.256841747407785, "grad_norm": 11.507854461669922, "learning_rate": 2.790526375432036e-05, "loss": 2.6357, "step": 19160 }, { "epoch": 3.2585415604283527, "grad_norm": 9.660496711730957, "learning_rate": 2.7902430732619414e-05, "loss": 2.5323, "step": 19170 }, { "epoch": 3.2602413734489204, "grad_norm": 16.729969024658203, "learning_rate": 2.7899597710918464e-05, "loss": 2.5051, "step": 19180 }, { "epoch": 3.2619411864694885, "grad_norm": 17.025211334228516, "learning_rate": 2.789676468921752e-05, "loss": 2.5671, "step": 19190 }, { "epoch": 3.263640999490056, "grad_norm": 13.490726470947266, "learning_rate": 2.7893931667516575e-05, "loss": 2.7224, "step": 19200 }, { "epoch": 3.265340812510624, "grad_norm": 16.71541404724121, "learning_rate": 2.7891098645815628e-05, "loss": 2.5334, "step": 19210 }, { "epoch": 3.2670406255311915, "grad_norm": 15.258418083190918, "learning_rate": 2.788826562411468e-05, "loss": 2.6279, "step": 19220 }, { "epoch": 3.2687404385517596, "grad_norm": 10.615541458129883, "learning_rate": 2.7885432602413735e-05, "loss": 2.5492, "step": 19230 }, { "epoch": 3.270440251572327, "grad_norm": 12.459065437316895, "learning_rate": 2.788259958071279e-05, "loss": 2.4437, "step": 19240 }, { "epoch": 3.272140064592895, "grad_norm": 12.432964324951172, "learning_rate": 2.7879766559011842e-05, "loss": 2.4468, "step": 19250 }, { "epoch": 3.2738398776134625, "grad_norm": 12.613449096679688, "learning_rate": 2.7876933537310896e-05, "loss": 2.657, "step": 19260 }, { "epoch": 3.27553969063403, "grad_norm": 15.531309127807617, "learning_rate": 2.787410051560995e-05, "loss": 2.3669, "step": 19270 }, { "epoch": 3.277239503654598, "grad_norm": 14.034357070922852, "learning_rate": 2.7871267493909006e-05, "loss": 2.6309, "step": 19280 }, { "epoch": 3.278939316675166, "grad_norm": 16.415084838867188, "learning_rate": 2.7868434472208056e-05, "loss": 2.4397, "step": 19290 }, { "epoch": 3.2806391296957336, "grad_norm": 14.385998725891113, "learning_rate": 2.786560145050711e-05, "loss": 2.7617, "step": 19300 }, { "epoch": 3.2823389427163012, "grad_norm": 12.692252159118652, "learning_rate": 2.7862768428806167e-05, "loss": 2.639, "step": 19310 }, { "epoch": 3.284038755736869, "grad_norm": 11.172627449035645, "learning_rate": 2.785993540710522e-05, "loss": 2.573, "step": 19320 }, { "epoch": 3.2857385687574365, "grad_norm": 7.181903839111328, "learning_rate": 2.785710238540427e-05, "loss": 2.7362, "step": 19330 }, { "epoch": 3.2874383817780046, "grad_norm": 8.897302627563477, "learning_rate": 2.7854269363703327e-05, "loss": 2.8282, "step": 19340 }, { "epoch": 3.2891381947985723, "grad_norm": 17.000045776367188, "learning_rate": 2.785143634200238e-05, "loss": 2.5526, "step": 19350 }, { "epoch": 3.29083800781914, "grad_norm": 16.297666549682617, "learning_rate": 2.7848603320301434e-05, "loss": 2.4757, "step": 19360 }, { "epoch": 3.2925378208397076, "grad_norm": 20.286710739135742, "learning_rate": 2.7845770298600488e-05, "loss": 2.4087, "step": 19370 }, { "epoch": 3.2942376338602752, "grad_norm": 15.94476318359375, "learning_rate": 2.784293727689954e-05, "loss": 2.5372, "step": 19380 }, { "epoch": 3.295937446880843, "grad_norm": 18.52703285217285, "learning_rate": 2.7840104255198595e-05, "loss": 2.4757, "step": 19390 }, { "epoch": 3.297637259901411, "grad_norm": 19.359468460083008, "learning_rate": 2.7837271233497652e-05, "loss": 2.5852, "step": 19400 }, { "epoch": 3.2993370729219786, "grad_norm": 19.695663452148438, "learning_rate": 2.7834438211796702e-05, "loss": 2.6003, "step": 19410 }, { "epoch": 3.3010368859425463, "grad_norm": 11.11328411102295, "learning_rate": 2.7831605190095755e-05, "loss": 2.6069, "step": 19420 }, { "epoch": 3.302736698963114, "grad_norm": 13.140619277954102, "learning_rate": 2.7828772168394812e-05, "loss": 2.4934, "step": 19430 }, { "epoch": 3.3044365119836816, "grad_norm": 10.961370468139648, "learning_rate": 2.7825939146693862e-05, "loss": 2.6941, "step": 19440 }, { "epoch": 3.3061363250042497, "grad_norm": 9.716208457946777, "learning_rate": 2.7823106124992916e-05, "loss": 2.4918, "step": 19450 }, { "epoch": 3.3078361380248174, "grad_norm": 24.728504180908203, "learning_rate": 2.7820273103291973e-05, "loss": 2.6273, "step": 19460 }, { "epoch": 3.309535951045385, "grad_norm": 16.8728084564209, "learning_rate": 2.7817440081591026e-05, "loss": 2.6754, "step": 19470 }, { "epoch": 3.3112357640659527, "grad_norm": 14.418697357177734, "learning_rate": 2.7814607059890077e-05, "loss": 2.4297, "step": 19480 }, { "epoch": 3.3129355770865203, "grad_norm": 10.58024787902832, "learning_rate": 2.7811774038189133e-05, "loss": 2.7088, "step": 19490 }, { "epoch": 3.3146353901070884, "grad_norm": 12.537999153137207, "learning_rate": 2.7808941016488187e-05, "loss": 2.5751, "step": 19500 }, { "epoch": 3.316335203127656, "grad_norm": 15.248635292053223, "learning_rate": 2.780610799478724e-05, "loss": 2.3246, "step": 19510 }, { "epoch": 3.3180350161482237, "grad_norm": 16.769563674926758, "learning_rate": 2.7803274973086294e-05, "loss": 2.4725, "step": 19520 }, { "epoch": 3.3197348291687914, "grad_norm": 12.67318058013916, "learning_rate": 2.7800441951385348e-05, "loss": 2.557, "step": 19530 }, { "epoch": 3.321434642189359, "grad_norm": 14.305805206298828, "learning_rate": 2.77976089296844e-05, "loss": 2.5568, "step": 19540 }, { "epoch": 3.323134455209927, "grad_norm": 9.11830997467041, "learning_rate": 2.7794775907983458e-05, "loss": 2.5653, "step": 19550 }, { "epoch": 3.3248342682304948, "grad_norm": 11.775794982910156, "learning_rate": 2.7791942886282508e-05, "loss": 2.7042, "step": 19560 }, { "epoch": 3.3265340812510624, "grad_norm": 15.201080322265625, "learning_rate": 2.7789109864581565e-05, "loss": 2.5109, "step": 19570 }, { "epoch": 3.32823389427163, "grad_norm": 19.558645248413086, "learning_rate": 2.778627684288062e-05, "loss": 2.3484, "step": 19580 }, { "epoch": 3.3299337072921977, "grad_norm": 12.89198112487793, "learning_rate": 2.778344382117967e-05, "loss": 2.6117, "step": 19590 }, { "epoch": 3.3316335203127654, "grad_norm": 15.35496997833252, "learning_rate": 2.7780610799478726e-05, "loss": 2.5809, "step": 19600 }, { "epoch": 3.3333333333333335, "grad_norm": 15.325492858886719, "learning_rate": 2.777777777777778e-05, "loss": 2.4752, "step": 19610 }, { "epoch": 3.335033146353901, "grad_norm": 18.783750534057617, "learning_rate": 2.7774944756076833e-05, "loss": 2.5242, "step": 19620 }, { "epoch": 3.336732959374469, "grad_norm": 16.74245262145996, "learning_rate": 2.7772111734375886e-05, "loss": 2.6719, "step": 19630 }, { "epoch": 3.3384327723950364, "grad_norm": 10.859732627868652, "learning_rate": 2.776927871267494e-05, "loss": 2.4813, "step": 19640 }, { "epoch": 3.340132585415604, "grad_norm": 14.511825561523438, "learning_rate": 2.7766445690973993e-05, "loss": 2.3343, "step": 19650 }, { "epoch": 3.341832398436172, "grad_norm": 14.423493385314941, "learning_rate": 2.776361266927305e-05, "loss": 2.7461, "step": 19660 }, { "epoch": 3.34353221145674, "grad_norm": 8.046149253845215, "learning_rate": 2.77607796475721e-05, "loss": 2.7522, "step": 19670 }, { "epoch": 3.3452320244773075, "grad_norm": 17.32529640197754, "learning_rate": 2.7757946625871154e-05, "loss": 2.539, "step": 19680 }, { "epoch": 3.346931837497875, "grad_norm": 14.009817123413086, "learning_rate": 2.775511360417021e-05, "loss": 2.6555, "step": 19690 }, { "epoch": 3.348631650518443, "grad_norm": 21.712055206298828, "learning_rate": 2.7752280582469264e-05, "loss": 2.5081, "step": 19700 }, { "epoch": 3.350331463539011, "grad_norm": 27.656951904296875, "learning_rate": 2.7749447560768314e-05, "loss": 2.4066, "step": 19710 }, { "epoch": 3.3520312765595786, "grad_norm": 16.01283836364746, "learning_rate": 2.774661453906737e-05, "loss": 2.5171, "step": 19720 }, { "epoch": 3.353731089580146, "grad_norm": 16.046611785888672, "learning_rate": 2.7743781517366425e-05, "loss": 2.4804, "step": 19730 }, { "epoch": 3.355430902600714, "grad_norm": 14.854998588562012, "learning_rate": 2.7740948495665478e-05, "loss": 2.5575, "step": 19740 }, { "epoch": 3.3571307156212815, "grad_norm": 12.338322639465332, "learning_rate": 2.7738115473964532e-05, "loss": 2.3878, "step": 19750 }, { "epoch": 3.3588305286418496, "grad_norm": 10.492987632751465, "learning_rate": 2.7735282452263585e-05, "loss": 2.1415, "step": 19760 }, { "epoch": 3.3605303416624173, "grad_norm": 12.090642929077148, "learning_rate": 2.773244943056264e-05, "loss": 2.5066, "step": 19770 }, { "epoch": 3.362230154682985, "grad_norm": 15.146027565002441, "learning_rate": 2.7729616408861692e-05, "loss": 2.3205, "step": 19780 }, { "epoch": 3.3639299677035526, "grad_norm": 13.457978248596191, "learning_rate": 2.7726783387160746e-05, "loss": 2.3825, "step": 19790 }, { "epoch": 3.3656297807241202, "grad_norm": 16.169641494750977, "learning_rate": 2.77239503654598e-05, "loss": 2.6055, "step": 19800 }, { "epoch": 3.367329593744688, "grad_norm": 11.565932273864746, "learning_rate": 2.7721117343758856e-05, "loss": 2.6128, "step": 19810 }, { "epoch": 3.369029406765256, "grad_norm": 15.028680801391602, "learning_rate": 2.7718284322057906e-05, "loss": 2.5572, "step": 19820 }, { "epoch": 3.3707292197858236, "grad_norm": 13.394840240478516, "learning_rate": 2.771545130035696e-05, "loss": 2.4381, "step": 19830 }, { "epoch": 3.3724290328063913, "grad_norm": 16.570140838623047, "learning_rate": 2.7712618278656017e-05, "loss": 2.4484, "step": 19840 }, { "epoch": 3.374128845826959, "grad_norm": 11.577371597290039, "learning_rate": 2.770978525695507e-05, "loss": 2.6549, "step": 19850 }, { "epoch": 3.3758286588475266, "grad_norm": 13.198887825012207, "learning_rate": 2.770695223525412e-05, "loss": 2.4221, "step": 19860 }, { "epoch": 3.3775284718680947, "grad_norm": 10.290022850036621, "learning_rate": 2.7704119213553177e-05, "loss": 2.3207, "step": 19870 }, { "epoch": 3.3792282848886623, "grad_norm": 14.533008575439453, "learning_rate": 2.770128619185223e-05, "loss": 2.342, "step": 19880 }, { "epoch": 3.38092809790923, "grad_norm": 14.469834327697754, "learning_rate": 2.7698453170151284e-05, "loss": 2.5982, "step": 19890 }, { "epoch": 3.3826279109297976, "grad_norm": 11.281497955322266, "learning_rate": 2.7695620148450338e-05, "loss": 2.4465, "step": 19900 }, { "epoch": 3.3843277239503653, "grad_norm": 12.113445281982422, "learning_rate": 2.769278712674939e-05, "loss": 2.7005, "step": 19910 }, { "epoch": 3.386027536970933, "grad_norm": 24.76362419128418, "learning_rate": 2.7689954105048445e-05, "loss": 2.4933, "step": 19920 }, { "epoch": 3.387727349991501, "grad_norm": 17.40401840209961, "learning_rate": 2.7687121083347502e-05, "loss": 2.7201, "step": 19930 }, { "epoch": 3.3894271630120687, "grad_norm": 8.218649864196777, "learning_rate": 2.7684288061646552e-05, "loss": 2.5367, "step": 19940 }, { "epoch": 3.3911269760326364, "grad_norm": 9.822829246520996, "learning_rate": 2.7681455039945606e-05, "loss": 2.6434, "step": 19950 }, { "epoch": 3.392826789053204, "grad_norm": 17.643964767456055, "learning_rate": 2.7678622018244663e-05, "loss": 2.5636, "step": 19960 }, { "epoch": 3.394526602073772, "grad_norm": 32.88461685180664, "learning_rate": 2.7675788996543713e-05, "loss": 2.4682, "step": 19970 }, { "epoch": 3.3962264150943398, "grad_norm": 10.80395793914795, "learning_rate": 2.7672955974842766e-05, "loss": 2.4718, "step": 19980 }, { "epoch": 3.3979262281149074, "grad_norm": 12.297900199890137, "learning_rate": 2.7670122953141823e-05, "loss": 2.6027, "step": 19990 }, { "epoch": 3.399626041135475, "grad_norm": 12.185075759887695, "learning_rate": 2.7667289931440877e-05, "loss": 2.4984, "step": 20000 }, { "epoch": 3.4013258541560427, "grad_norm": 16.522762298583984, "learning_rate": 2.7664456909739927e-05, "loss": 2.296, "step": 20010 }, { "epoch": 3.4030256671766104, "grad_norm": 10.348998069763184, "learning_rate": 2.7661623888038984e-05, "loss": 2.7701, "step": 20020 }, { "epoch": 3.4047254801971785, "grad_norm": 11.362290382385254, "learning_rate": 2.7658790866338037e-05, "loss": 2.6897, "step": 20030 }, { "epoch": 3.406425293217746, "grad_norm": 11.027960777282715, "learning_rate": 2.765595784463709e-05, "loss": 2.4888, "step": 20040 }, { "epoch": 3.4081251062383138, "grad_norm": 22.09165382385254, "learning_rate": 2.7653124822936144e-05, "loss": 2.6028, "step": 20050 }, { "epoch": 3.4098249192588814, "grad_norm": 19.45311164855957, "learning_rate": 2.7650291801235198e-05, "loss": 2.3104, "step": 20060 }, { "epoch": 3.411524732279449, "grad_norm": 14.819692611694336, "learning_rate": 2.764745877953425e-05, "loss": 2.3226, "step": 20070 }, { "epoch": 3.413224545300017, "grad_norm": 8.75156021118164, "learning_rate": 2.7644625757833308e-05, "loss": 2.5489, "step": 20080 }, { "epoch": 3.414924358320585, "grad_norm": 8.019845008850098, "learning_rate": 2.764179273613236e-05, "loss": 2.3851, "step": 20090 }, { "epoch": 3.4166241713411525, "grad_norm": 24.087648391723633, "learning_rate": 2.7638959714431412e-05, "loss": 2.6142, "step": 20100 }, { "epoch": 3.41832398436172, "grad_norm": 11.293360710144043, "learning_rate": 2.763612669273047e-05, "loss": 2.3172, "step": 20110 }, { "epoch": 3.420023797382288, "grad_norm": 10.950393676757812, "learning_rate": 2.763329367102952e-05, "loss": 2.3562, "step": 20120 }, { "epoch": 3.4217236104028554, "grad_norm": 19.24530792236328, "learning_rate": 2.7630460649328572e-05, "loss": 2.5683, "step": 20130 }, { "epoch": 3.4234234234234235, "grad_norm": 13.07168960571289, "learning_rate": 2.762762762762763e-05, "loss": 2.7615, "step": 20140 }, { "epoch": 3.425123236443991, "grad_norm": 13.445796966552734, "learning_rate": 2.7624794605926683e-05, "loss": 2.561, "step": 20150 }, { "epoch": 3.426823049464559, "grad_norm": 20.462247848510742, "learning_rate": 2.7621961584225733e-05, "loss": 2.486, "step": 20160 }, { "epoch": 3.4285228624851265, "grad_norm": 15.053642272949219, "learning_rate": 2.761912856252479e-05, "loss": 2.7156, "step": 20170 }, { "epoch": 3.4302226755056946, "grad_norm": 13.998739242553711, "learning_rate": 2.7616295540823843e-05, "loss": 2.7156, "step": 20180 }, { "epoch": 3.4319224885262622, "grad_norm": 10.825549125671387, "learning_rate": 2.7613462519122897e-05, "loss": 2.4992, "step": 20190 }, { "epoch": 3.43362230154683, "grad_norm": 9.147700309753418, "learning_rate": 2.761062949742195e-05, "loss": 2.3764, "step": 20200 }, { "epoch": 3.4353221145673976, "grad_norm": 17.212177276611328, "learning_rate": 2.7607796475721004e-05, "loss": 2.5099, "step": 20210 }, { "epoch": 3.437021927587965, "grad_norm": 17.55217933654785, "learning_rate": 2.7604963454020057e-05, "loss": 2.5186, "step": 20220 }, { "epoch": 3.438721740608533, "grad_norm": 11.938994407653809, "learning_rate": 2.7602130432319114e-05, "loss": 2.689, "step": 20230 }, { "epoch": 3.440421553629101, "grad_norm": 9.559510231018066, "learning_rate": 2.7599297410618165e-05, "loss": 2.7536, "step": 20240 }, { "epoch": 3.4421213666496686, "grad_norm": 15.30740737915039, "learning_rate": 2.7596464388917218e-05, "loss": 2.5573, "step": 20250 }, { "epoch": 3.4438211796702363, "grad_norm": 10.543770790100098, "learning_rate": 2.7593631367216275e-05, "loss": 2.4693, "step": 20260 }, { "epoch": 3.445520992690804, "grad_norm": 17.55699348449707, "learning_rate": 2.759079834551533e-05, "loss": 2.4038, "step": 20270 }, { "epoch": 3.4472208057113716, "grad_norm": 19.99183464050293, "learning_rate": 2.758796532381438e-05, "loss": 2.4889, "step": 20280 }, { "epoch": 3.4489206187319397, "grad_norm": 11.368446350097656, "learning_rate": 2.7585132302113436e-05, "loss": 2.5276, "step": 20290 }, { "epoch": 3.4506204317525073, "grad_norm": 19.87731170654297, "learning_rate": 2.758229928041249e-05, "loss": 2.3995, "step": 20300 }, { "epoch": 3.452320244773075, "grad_norm": 24.401737213134766, "learning_rate": 2.7579466258711543e-05, "loss": 2.4701, "step": 20310 }, { "epoch": 3.4540200577936426, "grad_norm": 12.447510719299316, "learning_rate": 2.7576633237010596e-05, "loss": 2.6309, "step": 20320 }, { "epoch": 3.4557198708142103, "grad_norm": 13.32746410369873, "learning_rate": 2.757380021530965e-05, "loss": 2.7287, "step": 20330 }, { "epoch": 3.457419683834778, "grad_norm": 10.434097290039062, "learning_rate": 2.7570967193608707e-05, "loss": 2.6268, "step": 20340 }, { "epoch": 3.459119496855346, "grad_norm": 8.925886154174805, "learning_rate": 2.7568134171907757e-05, "loss": 2.4776, "step": 20350 }, { "epoch": 3.4608193098759137, "grad_norm": 14.31127643585205, "learning_rate": 2.756530115020681e-05, "loss": 2.584, "step": 20360 }, { "epoch": 3.4625191228964813, "grad_norm": 13.911983489990234, "learning_rate": 2.7562468128505867e-05, "loss": 2.482, "step": 20370 }, { "epoch": 3.464218935917049, "grad_norm": 14.081063270568848, "learning_rate": 2.755963510680492e-05, "loss": 2.5276, "step": 20380 }, { "epoch": 3.465918748937617, "grad_norm": 12.56602668762207, "learning_rate": 2.755680208510397e-05, "loss": 2.414, "step": 20390 }, { "epoch": 3.4676185619581847, "grad_norm": 26.15435028076172, "learning_rate": 2.7553969063403028e-05, "loss": 2.4897, "step": 20400 }, { "epoch": 3.4693183749787524, "grad_norm": 15.941680908203125, "learning_rate": 2.755113604170208e-05, "loss": 2.5455, "step": 20410 }, { "epoch": 3.47101818799932, "grad_norm": 12.02281665802002, "learning_rate": 2.7548303020001135e-05, "loss": 2.572, "step": 20420 }, { "epoch": 3.4727180010198877, "grad_norm": 16.0913143157959, "learning_rate": 2.7545469998300188e-05, "loss": 2.4507, "step": 20430 }, { "epoch": 3.4744178140404554, "grad_norm": 14.180696487426758, "learning_rate": 2.7542636976599242e-05, "loss": 2.6989, "step": 20440 }, { "epoch": 3.4761176270610235, "grad_norm": 17.585739135742188, "learning_rate": 2.7539803954898295e-05, "loss": 2.5443, "step": 20450 }, { "epoch": 3.477817440081591, "grad_norm": 12.313392639160156, "learning_rate": 2.753697093319735e-05, "loss": 2.491, "step": 20460 }, { "epoch": 3.4795172531021588, "grad_norm": 9.478325843811035, "learning_rate": 2.7534137911496402e-05, "loss": 2.4503, "step": 20470 }, { "epoch": 3.4812170661227264, "grad_norm": 15.125774383544922, "learning_rate": 2.7531304889795456e-05, "loss": 2.4418, "step": 20480 }, { "epoch": 3.482916879143294, "grad_norm": 29.94918441772461, "learning_rate": 2.7528471868094513e-05, "loss": 2.3492, "step": 20490 }, { "epoch": 3.484616692163862, "grad_norm": 19.353954315185547, "learning_rate": 2.7525638846393563e-05, "loss": 2.3142, "step": 20500 }, { "epoch": 3.48631650518443, "grad_norm": 19.12765884399414, "learning_rate": 2.7522805824692616e-05, "loss": 2.647, "step": 20510 }, { "epoch": 3.4880163182049975, "grad_norm": 11.060041427612305, "learning_rate": 2.7519972802991673e-05, "loss": 2.5796, "step": 20520 }, { "epoch": 3.489716131225565, "grad_norm": 12.340753555297852, "learning_rate": 2.7517139781290727e-05, "loss": 2.4469, "step": 20530 }, { "epoch": 3.4914159442461328, "grad_norm": 17.970165252685547, "learning_rate": 2.7514306759589777e-05, "loss": 2.3723, "step": 20540 }, { "epoch": 3.4931157572667004, "grad_norm": 17.864534378051758, "learning_rate": 2.7511473737888834e-05, "loss": 2.4956, "step": 20550 }, { "epoch": 3.4948155702872685, "grad_norm": 11.554681777954102, "learning_rate": 2.7508640716187887e-05, "loss": 2.598, "step": 20560 }, { "epoch": 3.496515383307836, "grad_norm": 10.519311904907227, "learning_rate": 2.750580769448694e-05, "loss": 2.3557, "step": 20570 }, { "epoch": 3.498215196328404, "grad_norm": 10.983333587646484, "learning_rate": 2.7502974672785994e-05, "loss": 2.6274, "step": 20580 }, { "epoch": 3.4999150093489715, "grad_norm": 15.879817962646484, "learning_rate": 2.7500141651085048e-05, "loss": 2.2354, "step": 20590 }, { "epoch": 3.5016148223695396, "grad_norm": 16.86945343017578, "learning_rate": 2.74973086293841e-05, "loss": 2.4271, "step": 20600 }, { "epoch": 3.5033146353901072, "grad_norm": 15.617610931396484, "learning_rate": 2.749447560768316e-05, "loss": 2.4625, "step": 20610 }, { "epoch": 3.505014448410675, "grad_norm": 20.582738876342773, "learning_rate": 2.749164258598221e-05, "loss": 2.4452, "step": 20620 }, { "epoch": 3.5067142614312425, "grad_norm": 13.294759750366211, "learning_rate": 2.7488809564281262e-05, "loss": 2.5187, "step": 20630 }, { "epoch": 3.50841407445181, "grad_norm": 14.493097305297852, "learning_rate": 2.748597654258032e-05, "loss": 2.7029, "step": 20640 }, { "epoch": 3.510113887472378, "grad_norm": 15.922017097473145, "learning_rate": 2.748314352087937e-05, "loss": 2.581, "step": 20650 }, { "epoch": 3.5118137004929455, "grad_norm": 13.977204322814941, "learning_rate": 2.7480310499178423e-05, "loss": 2.4972, "step": 20660 }, { "epoch": 3.5135135135135136, "grad_norm": 13.064535140991211, "learning_rate": 2.747747747747748e-05, "loss": 2.5085, "step": 20670 }, { "epoch": 3.5152133265340813, "grad_norm": 14.405777931213379, "learning_rate": 2.7474644455776533e-05, "loss": 2.5882, "step": 20680 }, { "epoch": 3.516913139554649, "grad_norm": 12.826098442077637, "learning_rate": 2.7471811434075583e-05, "loss": 2.5732, "step": 20690 }, { "epoch": 3.518612952575217, "grad_norm": 10.987842559814453, "learning_rate": 2.746897841237464e-05, "loss": 2.7397, "step": 20700 }, { "epoch": 3.5203127655957847, "grad_norm": 18.222000122070312, "learning_rate": 2.7466145390673694e-05, "loss": 2.5925, "step": 20710 }, { "epoch": 3.5220125786163523, "grad_norm": 10.681024551391602, "learning_rate": 2.7463312368972747e-05, "loss": 2.5982, "step": 20720 }, { "epoch": 3.52371239163692, "grad_norm": 19.53609848022461, "learning_rate": 2.74604793472718e-05, "loss": 2.4177, "step": 20730 }, { "epoch": 3.5254122046574876, "grad_norm": 13.539602279663086, "learning_rate": 2.7457646325570854e-05, "loss": 2.5429, "step": 20740 }, { "epoch": 3.5271120176780553, "grad_norm": 13.273680686950684, "learning_rate": 2.7454813303869908e-05, "loss": 2.499, "step": 20750 }, { "epoch": 3.528811830698623, "grad_norm": 17.715267181396484, "learning_rate": 2.7451980282168965e-05, "loss": 2.4914, "step": 20760 }, { "epoch": 3.530511643719191, "grad_norm": 15.195987701416016, "learning_rate": 2.7449147260468015e-05, "loss": 2.5208, "step": 20770 }, { "epoch": 3.5322114567397587, "grad_norm": 16.28022003173828, "learning_rate": 2.7446314238767068e-05, "loss": 2.2199, "step": 20780 }, { "epoch": 3.5339112697603263, "grad_norm": 13.978682518005371, "learning_rate": 2.7443481217066125e-05, "loss": 2.5243, "step": 20790 }, { "epoch": 3.535611082780894, "grad_norm": 13.690159797668457, "learning_rate": 2.7440648195365175e-05, "loss": 2.5985, "step": 20800 }, { "epoch": 3.537310895801462, "grad_norm": 15.770135879516602, "learning_rate": 2.743781517366423e-05, "loss": 2.5779, "step": 20810 }, { "epoch": 3.5390107088220297, "grad_norm": 17.9141845703125, "learning_rate": 2.7434982151963286e-05, "loss": 2.4148, "step": 20820 }, { "epoch": 3.5407105218425974, "grad_norm": 13.56747817993164, "learning_rate": 2.743214913026234e-05, "loss": 2.4877, "step": 20830 }, { "epoch": 3.542410334863165, "grad_norm": 11.91443920135498, "learning_rate": 2.742931610856139e-05, "loss": 2.8032, "step": 20840 }, { "epoch": 3.5441101478837327, "grad_norm": 21.869159698486328, "learning_rate": 2.7426483086860446e-05, "loss": 2.4712, "step": 20850 }, { "epoch": 3.5458099609043003, "grad_norm": 13.963844299316406, "learning_rate": 2.74236500651595e-05, "loss": 2.5815, "step": 20860 }, { "epoch": 3.547509773924868, "grad_norm": 7.978496551513672, "learning_rate": 2.7420817043458553e-05, "loss": 2.1746, "step": 20870 }, { "epoch": 3.549209586945436, "grad_norm": 11.176692008972168, "learning_rate": 2.7417984021757607e-05, "loss": 2.4192, "step": 20880 }, { "epoch": 3.5509093999660037, "grad_norm": 9.793103218078613, "learning_rate": 2.741515100005666e-05, "loss": 2.6036, "step": 20890 }, { "epoch": 3.5526092129865714, "grad_norm": 11.12476634979248, "learning_rate": 2.7412317978355714e-05, "loss": 2.356, "step": 20900 }, { "epoch": 3.554309026007139, "grad_norm": 12.391279220581055, "learning_rate": 2.740948495665477e-05, "loss": 2.5693, "step": 20910 }, { "epoch": 3.556008839027707, "grad_norm": 15.632126808166504, "learning_rate": 2.740665193495382e-05, "loss": 2.5437, "step": 20920 }, { "epoch": 3.557708652048275, "grad_norm": 11.563889503479004, "learning_rate": 2.7403818913252874e-05, "loss": 2.6849, "step": 20930 }, { "epoch": 3.5594084650688425, "grad_norm": 12.856281280517578, "learning_rate": 2.740098589155193e-05, "loss": 2.6181, "step": 20940 }, { "epoch": 3.56110827808941, "grad_norm": 9.11397647857666, "learning_rate": 2.7398152869850985e-05, "loss": 2.4847, "step": 20950 }, { "epoch": 3.5628080911099778, "grad_norm": 58.72993850708008, "learning_rate": 2.7395319848150035e-05, "loss": 2.7938, "step": 20960 }, { "epoch": 3.5645079041305454, "grad_norm": 11.948539733886719, "learning_rate": 2.7392486826449092e-05, "loss": 2.6338, "step": 20970 }, { "epoch": 3.5662077171511135, "grad_norm": 10.356751441955566, "learning_rate": 2.7389653804748145e-05, "loss": 2.5328, "step": 20980 }, { "epoch": 3.567907530171681, "grad_norm": 31.45779037475586, "learning_rate": 2.7386820783047196e-05, "loss": 2.4459, "step": 20990 }, { "epoch": 3.569607343192249, "grad_norm": 19.494022369384766, "learning_rate": 2.7383987761346253e-05, "loss": 2.3207, "step": 21000 }, { "epoch": 3.5713071562128165, "grad_norm": 12.74714183807373, "learning_rate": 2.7381154739645306e-05, "loss": 2.4661, "step": 21010 }, { "epoch": 3.5730069692333846, "grad_norm": 15.437471389770508, "learning_rate": 2.737832171794436e-05, "loss": 2.5546, "step": 21020 }, { "epoch": 3.574706782253952, "grad_norm": 15.330528259277344, "learning_rate": 2.7375488696243413e-05, "loss": 2.4261, "step": 21030 }, { "epoch": 3.57640659527452, "grad_norm": 12.6378173828125, "learning_rate": 2.7372655674542467e-05, "loss": 2.4503, "step": 21040 }, { "epoch": 3.5781064082950875, "grad_norm": 12.43940544128418, "learning_rate": 2.7369822652841524e-05, "loss": 2.5255, "step": 21050 }, { "epoch": 3.579806221315655, "grad_norm": 18.9044246673584, "learning_rate": 2.7366989631140577e-05, "loss": 2.4916, "step": 21060 }, { "epoch": 3.581506034336223, "grad_norm": 15.177953720092773, "learning_rate": 2.7364156609439627e-05, "loss": 2.4525, "step": 21070 }, { "epoch": 3.5832058473567905, "grad_norm": 10.710208892822266, "learning_rate": 2.7361323587738684e-05, "loss": 2.5116, "step": 21080 }, { "epoch": 3.5849056603773586, "grad_norm": 17.64878273010254, "learning_rate": 2.7358490566037738e-05, "loss": 2.574, "step": 21090 }, { "epoch": 3.5866054733979262, "grad_norm": 10.2370023727417, "learning_rate": 2.735565754433679e-05, "loss": 2.6453, "step": 21100 }, { "epoch": 3.588305286418494, "grad_norm": 18.630300521850586, "learning_rate": 2.7352824522635845e-05, "loss": 2.6559, "step": 21110 }, { "epoch": 3.5900050994390615, "grad_norm": 14.302226066589355, "learning_rate": 2.7349991500934898e-05, "loss": 2.4719, "step": 21120 }, { "epoch": 3.5917049124596296, "grad_norm": 10.395439147949219, "learning_rate": 2.7347158479233952e-05, "loss": 2.6502, "step": 21130 }, { "epoch": 3.5934047254801973, "grad_norm": 15.22091007232666, "learning_rate": 2.734432545753301e-05, "loss": 2.3345, "step": 21140 }, { "epoch": 3.595104538500765, "grad_norm": 11.322243690490723, "learning_rate": 2.734149243583206e-05, "loss": 2.4955, "step": 21150 }, { "epoch": 3.5968043515213326, "grad_norm": 11.56309700012207, "learning_rate": 2.7338659414131112e-05, "loss": 2.4455, "step": 21160 }, { "epoch": 3.5985041645419003, "grad_norm": 13.797243118286133, "learning_rate": 2.733582639243017e-05, "loss": 2.6067, "step": 21170 }, { "epoch": 3.600203977562468, "grad_norm": 13.959481239318848, "learning_rate": 2.733299337072922e-05, "loss": 2.3427, "step": 21180 }, { "epoch": 3.601903790583036, "grad_norm": 9.871373176574707, "learning_rate": 2.7330160349028273e-05, "loss": 2.4811, "step": 21190 }, { "epoch": 3.6036036036036037, "grad_norm": 14.184432983398438, "learning_rate": 2.732732732732733e-05, "loss": 2.2272, "step": 21200 }, { "epoch": 3.6053034166241713, "grad_norm": 11.851542472839355, "learning_rate": 2.7324494305626383e-05, "loss": 2.6312, "step": 21210 }, { "epoch": 3.607003229644739, "grad_norm": 10.387330055236816, "learning_rate": 2.7321661283925433e-05, "loss": 2.4045, "step": 21220 }, { "epoch": 3.608703042665307, "grad_norm": 16.388423919677734, "learning_rate": 2.731882826222449e-05, "loss": 2.5788, "step": 21230 }, { "epoch": 3.6104028556858747, "grad_norm": 12.962925910949707, "learning_rate": 2.7315995240523544e-05, "loss": 2.4236, "step": 21240 }, { "epoch": 3.6121026687064424, "grad_norm": 11.404024124145508, "learning_rate": 2.7313162218822597e-05, "loss": 2.559, "step": 21250 }, { "epoch": 3.61380248172701, "grad_norm": 20.738664627075195, "learning_rate": 2.731032919712165e-05, "loss": 1.903, "step": 21260 }, { "epoch": 3.6155022947475777, "grad_norm": 17.740371704101562, "learning_rate": 2.7307496175420704e-05, "loss": 2.4018, "step": 21270 }, { "epoch": 3.6172021077681453, "grad_norm": 17.91849708557129, "learning_rate": 2.7304663153719758e-05, "loss": 2.6501, "step": 21280 }, { "epoch": 3.618901920788713, "grad_norm": 13.879193305969238, "learning_rate": 2.7301830132018815e-05, "loss": 2.5202, "step": 21290 }, { "epoch": 3.620601733809281, "grad_norm": 15.09332275390625, "learning_rate": 2.7298997110317865e-05, "loss": 2.6049, "step": 21300 }, { "epoch": 3.6223015468298487, "grad_norm": 10.53779411315918, "learning_rate": 2.729616408861692e-05, "loss": 2.6978, "step": 21310 }, { "epoch": 3.6240013598504164, "grad_norm": 13.930895805358887, "learning_rate": 2.7293331066915975e-05, "loss": 2.3371, "step": 21320 }, { "epoch": 3.625701172870984, "grad_norm": 14.649184226989746, "learning_rate": 2.7290498045215026e-05, "loss": 2.5303, "step": 21330 }, { "epoch": 3.627400985891552, "grad_norm": 14.418166160583496, "learning_rate": 2.728766502351408e-05, "loss": 2.5276, "step": 21340 }, { "epoch": 3.62910079891212, "grad_norm": 14.255292892456055, "learning_rate": 2.7284832001813136e-05, "loss": 2.5416, "step": 21350 }, { "epoch": 3.6308006119326874, "grad_norm": 13.542830467224121, "learning_rate": 2.728199898011219e-05, "loss": 2.4296, "step": 21360 }, { "epoch": 3.632500424953255, "grad_norm": 17.823244094848633, "learning_rate": 2.727916595841124e-05, "loss": 2.5514, "step": 21370 }, { "epoch": 3.6342002379738227, "grad_norm": 11.288703918457031, "learning_rate": 2.7276332936710297e-05, "loss": 2.4107, "step": 21380 }, { "epoch": 3.6359000509943904, "grad_norm": 19.72150230407715, "learning_rate": 2.727349991500935e-05, "loss": 2.3883, "step": 21390 }, { "epoch": 3.6375998640149585, "grad_norm": 21.401775360107422, "learning_rate": 2.7270666893308404e-05, "loss": 2.4078, "step": 21400 }, { "epoch": 3.639299677035526, "grad_norm": 13.06580638885498, "learning_rate": 2.7267833871607457e-05, "loss": 2.6714, "step": 21410 }, { "epoch": 3.640999490056094, "grad_norm": 19.108867645263672, "learning_rate": 2.726500084990651e-05, "loss": 2.5341, "step": 21420 }, { "epoch": 3.6426993030766615, "grad_norm": 16.567996978759766, "learning_rate": 2.7262167828205564e-05, "loss": 2.5147, "step": 21430 }, { "epoch": 3.6443991160972296, "grad_norm": 17.890066146850586, "learning_rate": 2.725933480650462e-05, "loss": 2.5986, "step": 21440 }, { "epoch": 3.646098929117797, "grad_norm": 17.854211807250977, "learning_rate": 2.725650178480367e-05, "loss": 2.5535, "step": 21450 }, { "epoch": 3.647798742138365, "grad_norm": 13.906492233276367, "learning_rate": 2.7253668763102725e-05, "loss": 2.5204, "step": 21460 }, { "epoch": 3.6494985551589325, "grad_norm": 18.600360870361328, "learning_rate": 2.725083574140178e-05, "loss": 2.5168, "step": 21470 }, { "epoch": 3.6511983681795, "grad_norm": 11.256000518798828, "learning_rate": 2.7248002719700835e-05, "loss": 2.5014, "step": 21480 }, { "epoch": 3.652898181200068, "grad_norm": 17.22154998779297, "learning_rate": 2.7245169697999885e-05, "loss": 2.2846, "step": 21490 }, { "epoch": 3.6545979942206355, "grad_norm": 10.356785774230957, "learning_rate": 2.7242336676298942e-05, "loss": 2.6649, "step": 21500 }, { "epoch": 3.6562978072412036, "grad_norm": 19.649486541748047, "learning_rate": 2.7239503654597996e-05, "loss": 2.3277, "step": 21510 }, { "epoch": 3.657997620261771, "grad_norm": 28.00931739807129, "learning_rate": 2.7236670632897046e-05, "loss": 2.5427, "step": 21520 }, { "epoch": 3.659697433282339, "grad_norm": 13.911473274230957, "learning_rate": 2.7233837611196103e-05, "loss": 2.5328, "step": 21530 }, { "epoch": 3.6613972463029065, "grad_norm": 15.997553825378418, "learning_rate": 2.7231004589495156e-05, "loss": 2.3561, "step": 21540 }, { "epoch": 3.6630970593234746, "grad_norm": 13.952118873596191, "learning_rate": 2.722817156779421e-05, "loss": 2.3553, "step": 21550 }, { "epoch": 3.6647968723440423, "grad_norm": 16.36371612548828, "learning_rate": 2.7225338546093263e-05, "loss": 2.4298, "step": 21560 }, { "epoch": 3.66649668536461, "grad_norm": 8.449952125549316, "learning_rate": 2.7222505524392317e-05, "loss": 2.5359, "step": 21570 }, { "epoch": 3.6681964983851776, "grad_norm": 13.514107704162598, "learning_rate": 2.721967250269137e-05, "loss": 2.3787, "step": 21580 }, { "epoch": 3.6698963114057452, "grad_norm": 11.37895679473877, "learning_rate": 2.7216839480990427e-05, "loss": 2.5457, "step": 21590 }, { "epoch": 3.671596124426313, "grad_norm": 14.041535377502441, "learning_rate": 2.7214006459289477e-05, "loss": 2.4543, "step": 21600 }, { "epoch": 3.673295937446881, "grad_norm": 11.831180572509766, "learning_rate": 2.721117343758853e-05, "loss": 2.4834, "step": 21610 }, { "epoch": 3.6749957504674486, "grad_norm": 9.327508926391602, "learning_rate": 2.7208340415887588e-05, "loss": 2.6624, "step": 21620 }, { "epoch": 3.6766955634880163, "grad_norm": 22.106449127197266, "learning_rate": 2.720550739418664e-05, "loss": 2.3476, "step": 21630 }, { "epoch": 3.678395376508584, "grad_norm": 14.583582878112793, "learning_rate": 2.720267437248569e-05, "loss": 2.497, "step": 21640 }, { "epoch": 3.680095189529152, "grad_norm": 9.870152473449707, "learning_rate": 2.719984135078475e-05, "loss": 2.5693, "step": 21650 }, { "epoch": 3.6817950025497197, "grad_norm": 18.24418067932129, "learning_rate": 2.7197008329083802e-05, "loss": 2.5893, "step": 21660 }, { "epoch": 3.6834948155702874, "grad_norm": 10.3903226852417, "learning_rate": 2.7194175307382852e-05, "loss": 2.2778, "step": 21670 }, { "epoch": 3.685194628590855, "grad_norm": 29.047607421875, "learning_rate": 2.719134228568191e-05, "loss": 2.4473, "step": 21680 }, { "epoch": 3.6868944416114227, "grad_norm": 14.837457656860352, "learning_rate": 2.7188509263980962e-05, "loss": 2.4845, "step": 21690 }, { "epoch": 3.6885942546319903, "grad_norm": 9.199410438537598, "learning_rate": 2.7185676242280016e-05, "loss": 2.4407, "step": 21700 }, { "epoch": 3.690294067652558, "grad_norm": 12.298774719238281, "learning_rate": 2.718284322057907e-05, "loss": 2.5033, "step": 21710 }, { "epoch": 3.691993880673126, "grad_norm": 9.648804664611816, "learning_rate": 2.7180010198878123e-05, "loss": 2.5605, "step": 21720 }, { "epoch": 3.6936936936936937, "grad_norm": 15.608654022216797, "learning_rate": 2.7177177177177177e-05, "loss": 2.5252, "step": 21730 }, { "epoch": 3.6953935067142614, "grad_norm": 15.479043960571289, "learning_rate": 2.7174344155476233e-05, "loss": 2.3163, "step": 21740 }, { "epoch": 3.697093319734829, "grad_norm": 17.16513442993164, "learning_rate": 2.7171511133775284e-05, "loss": 2.6183, "step": 21750 }, { "epoch": 3.698793132755397, "grad_norm": 10.058859825134277, "learning_rate": 2.716867811207434e-05, "loss": 2.4248, "step": 21760 }, { "epoch": 3.7004929457759648, "grad_norm": 12.32784366607666, "learning_rate": 2.7165845090373394e-05, "loss": 2.6206, "step": 21770 }, { "epoch": 3.7021927587965324, "grad_norm": 14.236892700195312, "learning_rate": 2.7163012068672448e-05, "loss": 2.5295, "step": 21780 }, { "epoch": 3.7038925718171, "grad_norm": 13.14928150177002, "learning_rate": 2.71601790469715e-05, "loss": 2.6068, "step": 21790 }, { "epoch": 3.7055923848376677, "grad_norm": 13.840873718261719, "learning_rate": 2.7157346025270555e-05, "loss": 2.8492, "step": 21800 }, { "epoch": 3.7072921978582354, "grad_norm": 31.755569458007812, "learning_rate": 2.7154513003569608e-05, "loss": 2.4806, "step": 21810 }, { "epoch": 3.7089920108788035, "grad_norm": 10.605755805969238, "learning_rate": 2.7151679981868665e-05, "loss": 2.4996, "step": 21820 }, { "epoch": 3.710691823899371, "grad_norm": 16.378156661987305, "learning_rate": 2.7148846960167715e-05, "loss": 2.362, "step": 21830 }, { "epoch": 3.712391636919939, "grad_norm": 10.959246635437012, "learning_rate": 2.714601393846677e-05, "loss": 2.6201, "step": 21840 }, { "epoch": 3.7140914499405064, "grad_norm": 10.061384201049805, "learning_rate": 2.7143180916765826e-05, "loss": 2.6446, "step": 21850 }, { "epoch": 3.7157912629610745, "grad_norm": 14.902463912963867, "learning_rate": 2.7140347895064876e-05, "loss": 2.497, "step": 21860 }, { "epoch": 3.717491075981642, "grad_norm": 12.247943878173828, "learning_rate": 2.713751487336393e-05, "loss": 2.5303, "step": 21870 }, { "epoch": 3.71919088900221, "grad_norm": 16.443593978881836, "learning_rate": 2.7134681851662986e-05, "loss": 2.1537, "step": 21880 }, { "epoch": 3.7208907020227775, "grad_norm": 12.882294654846191, "learning_rate": 2.713184882996204e-05, "loss": 2.5021, "step": 21890 }, { "epoch": 3.722590515043345, "grad_norm": 16.884498596191406, "learning_rate": 2.712901580826109e-05, "loss": 2.3847, "step": 21900 }, { "epoch": 3.724290328063913, "grad_norm": 22.006202697753906, "learning_rate": 2.7126182786560147e-05, "loss": 2.3843, "step": 21910 }, { "epoch": 3.7259901410844805, "grad_norm": 16.074668884277344, "learning_rate": 2.71233497648592e-05, "loss": 2.3688, "step": 21920 }, { "epoch": 3.7276899541050486, "grad_norm": 15.010777473449707, "learning_rate": 2.7120516743158254e-05, "loss": 2.474, "step": 21930 }, { "epoch": 3.729389767125616, "grad_norm": 20.818416595458984, "learning_rate": 2.7117683721457307e-05, "loss": 2.3069, "step": 21940 }, { "epoch": 3.731089580146184, "grad_norm": 12.878812789916992, "learning_rate": 2.711485069975636e-05, "loss": 2.6421, "step": 21950 }, { "epoch": 3.7327893931667515, "grad_norm": 11.193002700805664, "learning_rate": 2.7112017678055414e-05, "loss": 2.7469, "step": 21960 }, { "epoch": 3.7344892061873196, "grad_norm": 18.445573806762695, "learning_rate": 2.710918465635447e-05, "loss": 2.3855, "step": 21970 }, { "epoch": 3.7361890192078873, "grad_norm": 14.309561729431152, "learning_rate": 2.710635163465352e-05, "loss": 2.3533, "step": 21980 }, { "epoch": 3.737888832228455, "grad_norm": 7.492197513580322, "learning_rate": 2.7103518612952575e-05, "loss": 2.3247, "step": 21990 }, { "epoch": 3.7395886452490226, "grad_norm": 36.195213317871094, "learning_rate": 2.7100685591251632e-05, "loss": 2.471, "step": 22000 }, { "epoch": 3.7412884582695902, "grad_norm": 16.52233123779297, "learning_rate": 2.7097852569550682e-05, "loss": 2.4417, "step": 22010 }, { "epoch": 3.742988271290158, "grad_norm": 14.107756614685059, "learning_rate": 2.7095019547849735e-05, "loss": 2.5076, "step": 22020 }, { "epoch": 3.744688084310726, "grad_norm": 11.166213035583496, "learning_rate": 2.7092186526148792e-05, "loss": 2.6561, "step": 22030 }, { "epoch": 3.7463878973312936, "grad_norm": 11.849102020263672, "learning_rate": 2.7089353504447846e-05, "loss": 2.2839, "step": 22040 }, { "epoch": 3.7480877103518613, "grad_norm": 15.069354057312012, "learning_rate": 2.7086520482746896e-05, "loss": 2.348, "step": 22050 }, { "epoch": 3.749787523372429, "grad_norm": 14.576180458068848, "learning_rate": 2.7083687461045953e-05, "loss": 2.4261, "step": 22060 }, { "epoch": 3.751487336392997, "grad_norm": 21.065593719482422, "learning_rate": 2.7080854439345006e-05, "loss": 2.5536, "step": 22070 }, { "epoch": 3.7531871494135647, "grad_norm": 11.853171348571777, "learning_rate": 2.707802141764406e-05, "loss": 2.5792, "step": 22080 }, { "epoch": 3.7548869624341323, "grad_norm": 17.236148834228516, "learning_rate": 2.7075188395943114e-05, "loss": 2.6583, "step": 22090 }, { "epoch": 3.7565867754547, "grad_norm": 14.985791206359863, "learning_rate": 2.7072355374242167e-05, "loss": 2.4536, "step": 22100 }, { "epoch": 3.7582865884752676, "grad_norm": 15.459815979003906, "learning_rate": 2.706952235254122e-05, "loss": 2.3911, "step": 22110 }, { "epoch": 3.7599864014958353, "grad_norm": 14.244633674621582, "learning_rate": 2.7066689330840277e-05, "loss": 2.4295, "step": 22120 }, { "epoch": 3.761686214516403, "grad_norm": 12.302262306213379, "learning_rate": 2.7063856309139328e-05, "loss": 2.5764, "step": 22130 }, { "epoch": 3.763386027536971, "grad_norm": 15.870737075805664, "learning_rate": 2.706102328743838e-05, "loss": 2.4384, "step": 22140 }, { "epoch": 3.7650858405575387, "grad_norm": 15.103078842163086, "learning_rate": 2.7058190265737438e-05, "loss": 2.7051, "step": 22150 }, { "epoch": 3.7667856535781064, "grad_norm": 12.437488555908203, "learning_rate": 2.705535724403649e-05, "loss": 2.5859, "step": 22160 }, { "epoch": 3.768485466598674, "grad_norm": 17.593114852905273, "learning_rate": 2.705252422233554e-05, "loss": 2.5366, "step": 22170 }, { "epoch": 3.770185279619242, "grad_norm": 11.601632118225098, "learning_rate": 2.70496912006346e-05, "loss": 2.4507, "step": 22180 }, { "epoch": 3.7718850926398098, "grad_norm": 13.219247817993164, "learning_rate": 2.7046858178933652e-05, "loss": 2.5247, "step": 22190 }, { "epoch": 3.7735849056603774, "grad_norm": 13.65164566040039, "learning_rate": 2.7044025157232702e-05, "loss": 2.3993, "step": 22200 }, { "epoch": 3.775284718680945, "grad_norm": 18.06324005126953, "learning_rate": 2.704119213553176e-05, "loss": 2.5286, "step": 22210 }, { "epoch": 3.7769845317015127, "grad_norm": 8.08560848236084, "learning_rate": 2.7038359113830813e-05, "loss": 2.3814, "step": 22220 }, { "epoch": 3.7786843447220804, "grad_norm": 12.555126190185547, "learning_rate": 2.7035526092129866e-05, "loss": 2.6446, "step": 22230 }, { "epoch": 3.7803841577426485, "grad_norm": 11.995453834533691, "learning_rate": 2.703269307042892e-05, "loss": 2.4671, "step": 22240 }, { "epoch": 3.782083970763216, "grad_norm": 20.163238525390625, "learning_rate": 2.7029860048727973e-05, "loss": 2.5823, "step": 22250 }, { "epoch": 3.7837837837837838, "grad_norm": 16.358272552490234, "learning_rate": 2.7027027027027027e-05, "loss": 2.5726, "step": 22260 }, { "epoch": 3.7854835968043514, "grad_norm": 20.248151779174805, "learning_rate": 2.7024194005326084e-05, "loss": 2.5433, "step": 22270 }, { "epoch": 3.7871834098249195, "grad_norm": 12.805267333984375, "learning_rate": 2.7021360983625134e-05, "loss": 2.5518, "step": 22280 }, { "epoch": 3.788883222845487, "grad_norm": 8.547613143920898, "learning_rate": 2.7018527961924187e-05, "loss": 2.2107, "step": 22290 }, { "epoch": 3.790583035866055, "grad_norm": 13.398189544677734, "learning_rate": 2.7015694940223244e-05, "loss": 2.6255, "step": 22300 }, { "epoch": 3.7922828488866225, "grad_norm": 18.927350997924805, "learning_rate": 2.7012861918522298e-05, "loss": 2.2277, "step": 22310 }, { "epoch": 3.79398266190719, "grad_norm": 11.91021728515625, "learning_rate": 2.7010028896821348e-05, "loss": 2.3208, "step": 22320 }, { "epoch": 3.795682474927758, "grad_norm": 13.246588706970215, "learning_rate": 2.7007195875120405e-05, "loss": 2.6082, "step": 22330 }, { "epoch": 3.7973822879483254, "grad_norm": 14.768160820007324, "learning_rate": 2.700436285341946e-05, "loss": 2.6036, "step": 22340 }, { "epoch": 3.7990821009688935, "grad_norm": 14.267288208007812, "learning_rate": 2.7001529831718512e-05, "loss": 2.6867, "step": 22350 }, { "epoch": 3.800781913989461, "grad_norm": 10.79065990447998, "learning_rate": 2.6998696810017565e-05, "loss": 2.5325, "step": 22360 }, { "epoch": 3.802481727010029, "grad_norm": 13.401219367980957, "learning_rate": 2.699586378831662e-05, "loss": 2.419, "step": 22370 }, { "epoch": 3.8041815400305965, "grad_norm": 13.993821144104004, "learning_rate": 2.6993030766615672e-05, "loss": 2.5997, "step": 22380 }, { "epoch": 3.8058813530511646, "grad_norm": 11.421035766601562, "learning_rate": 2.6990197744914726e-05, "loss": 2.4707, "step": 22390 }, { "epoch": 3.8075811660717322, "grad_norm": 14.808457374572754, "learning_rate": 2.698736472321378e-05, "loss": 2.518, "step": 22400 }, { "epoch": 3.8092809790923, "grad_norm": 9.914158821105957, "learning_rate": 2.6984531701512833e-05, "loss": 2.5167, "step": 22410 }, { "epoch": 3.8109807921128676, "grad_norm": 14.031305313110352, "learning_rate": 2.698169867981189e-05, "loss": 2.3368, "step": 22420 }, { "epoch": 3.812680605133435, "grad_norm": 11.61070728302002, "learning_rate": 2.697886565811094e-05, "loss": 2.2921, "step": 22430 }, { "epoch": 3.814380418154003, "grad_norm": 13.311564445495605, "learning_rate": 2.6976032636409994e-05, "loss": 2.53, "step": 22440 }, { "epoch": 3.816080231174571, "grad_norm": 14.118348121643066, "learning_rate": 2.697319961470905e-05, "loss": 2.404, "step": 22450 }, { "epoch": 3.8177800441951386, "grad_norm": 11.428529739379883, "learning_rate": 2.6970366593008104e-05, "loss": 2.5871, "step": 22460 }, { "epoch": 3.8194798572157063, "grad_norm": 20.663066864013672, "learning_rate": 2.6967533571307154e-05, "loss": 2.2009, "step": 22470 }, { "epoch": 3.821179670236274, "grad_norm": 17.66748046875, "learning_rate": 2.696470054960621e-05, "loss": 2.4142, "step": 22480 }, { "epoch": 3.822879483256842, "grad_norm": 14.5162353515625, "learning_rate": 2.6961867527905265e-05, "loss": 2.3156, "step": 22490 }, { "epoch": 3.8245792962774097, "grad_norm": 19.621517181396484, "learning_rate": 2.6959034506204318e-05, "loss": 2.5626, "step": 22500 }, { "epoch": 3.8262791092979773, "grad_norm": 14.334818840026855, "learning_rate": 2.695620148450337e-05, "loss": 2.4912, "step": 22510 }, { "epoch": 3.827978922318545, "grad_norm": 10.160931587219238, "learning_rate": 2.6953368462802425e-05, "loss": 2.4818, "step": 22520 }, { "epoch": 3.8296787353391126, "grad_norm": 24.4870662689209, "learning_rate": 2.6950535441101482e-05, "loss": 2.4453, "step": 22530 }, { "epoch": 3.8313785483596803, "grad_norm": 18.143213272094727, "learning_rate": 2.6947702419400532e-05, "loss": 2.7645, "step": 22540 }, { "epoch": 3.833078361380248, "grad_norm": 12.391805648803711, "learning_rate": 2.6944869397699586e-05, "loss": 2.385, "step": 22550 }, { "epoch": 3.834778174400816, "grad_norm": 15.494474411010742, "learning_rate": 2.6942036375998643e-05, "loss": 2.5869, "step": 22560 }, { "epoch": 3.8364779874213837, "grad_norm": 26.39971351623535, "learning_rate": 2.6939203354297696e-05, "loss": 2.2868, "step": 22570 }, { "epoch": 3.8381778004419513, "grad_norm": 21.26929473876953, "learning_rate": 2.6936370332596746e-05, "loss": 2.3616, "step": 22580 }, { "epoch": 3.839877613462519, "grad_norm": 14.719461441040039, "learning_rate": 2.6933537310895803e-05, "loss": 2.4293, "step": 22590 }, { "epoch": 3.841577426483087, "grad_norm": 13.398351669311523, "learning_rate": 2.6930704289194857e-05, "loss": 2.4448, "step": 22600 }, { "epoch": 3.8432772395036547, "grad_norm": 14.614312171936035, "learning_rate": 2.692787126749391e-05, "loss": 2.3545, "step": 22610 }, { "epoch": 3.8449770525242224, "grad_norm": 15.695272445678711, "learning_rate": 2.6925038245792964e-05, "loss": 2.5892, "step": 22620 }, { "epoch": 3.84667686554479, "grad_norm": 10.62768268585205, "learning_rate": 2.6922205224092017e-05, "loss": 2.2953, "step": 22630 }, { "epoch": 3.8483766785653577, "grad_norm": 10.192591667175293, "learning_rate": 2.691937220239107e-05, "loss": 2.3664, "step": 22640 }, { "epoch": 3.8500764915859254, "grad_norm": 15.186498641967773, "learning_rate": 2.6916539180690128e-05, "loss": 2.6613, "step": 22650 }, { "epoch": 3.8517763046064935, "grad_norm": 11.79910659790039, "learning_rate": 2.6913706158989178e-05, "loss": 2.4398, "step": 22660 }, { "epoch": 3.853476117627061, "grad_norm": 12.85796070098877, "learning_rate": 2.691087313728823e-05, "loss": 2.4058, "step": 22670 }, { "epoch": 3.8551759306476288, "grad_norm": 17.515186309814453, "learning_rate": 2.6908040115587288e-05, "loss": 2.2366, "step": 22680 }, { "epoch": 3.8568757436681964, "grad_norm": 9.391556739807129, "learning_rate": 2.6905207093886342e-05, "loss": 2.0685, "step": 22690 }, { "epoch": 3.8585755566887645, "grad_norm": 13.039608001708984, "learning_rate": 2.6902374072185392e-05, "loss": 2.4705, "step": 22700 }, { "epoch": 3.860275369709332, "grad_norm": 14.31422233581543, "learning_rate": 2.689954105048445e-05, "loss": 2.3372, "step": 22710 }, { "epoch": 3.8619751827299, "grad_norm": 13.449305534362793, "learning_rate": 2.6896708028783502e-05, "loss": 2.602, "step": 22720 }, { "epoch": 3.8636749957504675, "grad_norm": 12.727066993713379, "learning_rate": 2.6893875007082552e-05, "loss": 2.6123, "step": 22730 }, { "epoch": 3.865374808771035, "grad_norm": 13.720344543457031, "learning_rate": 2.689104198538161e-05, "loss": 2.3559, "step": 22740 }, { "epoch": 3.8670746217916028, "grad_norm": 9.893750190734863, "learning_rate": 2.6888208963680663e-05, "loss": 2.4706, "step": 22750 }, { "epoch": 3.8687744348121704, "grad_norm": 11.711002349853516, "learning_rate": 2.6885375941979716e-05, "loss": 2.5968, "step": 22760 }, { "epoch": 3.8704742478327385, "grad_norm": 8.904523849487305, "learning_rate": 2.688254292027877e-05, "loss": 2.6385, "step": 22770 }, { "epoch": 3.872174060853306, "grad_norm": 12.260072708129883, "learning_rate": 2.6879709898577823e-05, "loss": 2.1201, "step": 22780 }, { "epoch": 3.873873873873874, "grad_norm": 24.937278747558594, "learning_rate": 2.6876876876876877e-05, "loss": 2.5946, "step": 22790 }, { "epoch": 3.8755736868944415, "grad_norm": 10.963619232177734, "learning_rate": 2.6874043855175934e-05, "loss": 2.4171, "step": 22800 }, { "epoch": 3.8772734999150096, "grad_norm": 13.850129127502441, "learning_rate": 2.6871210833474984e-05, "loss": 2.6222, "step": 22810 }, { "epoch": 3.8789733129355772, "grad_norm": 13.510077476501465, "learning_rate": 2.6868377811774038e-05, "loss": 2.5997, "step": 22820 }, { "epoch": 3.880673125956145, "grad_norm": 12.620234489440918, "learning_rate": 2.6865544790073094e-05, "loss": 2.3964, "step": 22830 }, { "epoch": 3.8823729389767125, "grad_norm": 12.818809509277344, "learning_rate": 2.6862711768372148e-05, "loss": 2.6237, "step": 22840 }, { "epoch": 3.88407275199728, "grad_norm": 16.252368927001953, "learning_rate": 2.6859878746671198e-05, "loss": 2.5185, "step": 22850 }, { "epoch": 3.885772565017848, "grad_norm": 15.449548721313477, "learning_rate": 2.6857045724970255e-05, "loss": 2.5194, "step": 22860 }, { "epoch": 3.8874723780384155, "grad_norm": 8.885400772094727, "learning_rate": 2.685421270326931e-05, "loss": 2.4501, "step": 22870 }, { "epoch": 3.8891721910589836, "grad_norm": 12.717391967773438, "learning_rate": 2.685137968156836e-05, "loss": 2.5582, "step": 22880 }, { "epoch": 3.8908720040795512, "grad_norm": 14.641448974609375, "learning_rate": 2.6848546659867416e-05, "loss": 2.4536, "step": 22890 }, { "epoch": 3.892571817100119, "grad_norm": 18.964000701904297, "learning_rate": 2.684571363816647e-05, "loss": 2.3657, "step": 22900 }, { "epoch": 3.894271630120687, "grad_norm": 12.358839988708496, "learning_rate": 2.6842880616465523e-05, "loss": 2.6301, "step": 22910 }, { "epoch": 3.8959714431412547, "grad_norm": 13.964631080627441, "learning_rate": 2.6840047594764576e-05, "loss": 2.4811, "step": 22920 }, { "epoch": 3.8976712561618223, "grad_norm": 9.471652030944824, "learning_rate": 2.683721457306363e-05, "loss": 2.402, "step": 22930 }, { "epoch": 3.89937106918239, "grad_norm": 17.102338790893555, "learning_rate": 2.6834381551362683e-05, "loss": 2.4191, "step": 22940 }, { "epoch": 3.9010708822029576, "grad_norm": 8.733023643493652, "learning_rate": 2.683154852966174e-05, "loss": 2.5388, "step": 22950 }, { "epoch": 3.9027706952235253, "grad_norm": 16.100685119628906, "learning_rate": 2.682871550796079e-05, "loss": 2.5673, "step": 22960 }, { "epoch": 3.904470508244093, "grad_norm": 11.594075202941895, "learning_rate": 2.6825882486259844e-05, "loss": 2.3666, "step": 22970 }, { "epoch": 3.906170321264661, "grad_norm": 10.99598217010498, "learning_rate": 2.68230494645589e-05, "loss": 2.3445, "step": 22980 }, { "epoch": 3.9078701342852287, "grad_norm": 11.11329174041748, "learning_rate": 2.6820216442857954e-05, "loss": 2.5305, "step": 22990 }, { "epoch": 3.9095699473057963, "grad_norm": 13.784523963928223, "learning_rate": 2.6817383421157004e-05, "loss": 2.4235, "step": 23000 }, { "epoch": 3.911269760326364, "grad_norm": 15.272619247436523, "learning_rate": 2.681455039945606e-05, "loss": 2.5276, "step": 23010 }, { "epoch": 3.912969573346932, "grad_norm": 11.522567749023438, "learning_rate": 2.6811717377755115e-05, "loss": 2.6622, "step": 23020 }, { "epoch": 3.9146693863674997, "grad_norm": 8.863479614257812, "learning_rate": 2.6808884356054168e-05, "loss": 2.3904, "step": 23030 }, { "epoch": 3.9163691993880674, "grad_norm": 14.646090507507324, "learning_rate": 2.6806051334353222e-05, "loss": 2.4074, "step": 23040 }, { "epoch": 3.918069012408635, "grad_norm": 17.113985061645508, "learning_rate": 2.6803218312652275e-05, "loss": 2.3678, "step": 23050 }, { "epoch": 3.9197688254292027, "grad_norm": 10.78476619720459, "learning_rate": 2.680038529095133e-05, "loss": 2.5608, "step": 23060 }, { "epoch": 3.9214686384497703, "grad_norm": 11.270685195922852, "learning_rate": 2.6797552269250382e-05, "loss": 2.3701, "step": 23070 }, { "epoch": 3.923168451470338, "grad_norm": 14.523655891418457, "learning_rate": 2.6794719247549436e-05, "loss": 2.4257, "step": 23080 }, { "epoch": 3.924868264490906, "grad_norm": 16.591203689575195, "learning_rate": 2.679188622584849e-05, "loss": 2.4143, "step": 23090 }, { "epoch": 3.9265680775114737, "grad_norm": 14.211938858032227, "learning_rate": 2.6789053204147546e-05, "loss": 2.4695, "step": 23100 }, { "epoch": 3.9282678905320414, "grad_norm": 13.15544319152832, "learning_rate": 2.6786220182446596e-05, "loss": 2.4125, "step": 23110 }, { "epoch": 3.929967703552609, "grad_norm": 14.035799980163574, "learning_rate": 2.678338716074565e-05, "loss": 2.6683, "step": 23120 }, { "epoch": 3.931667516573177, "grad_norm": 18.08034896850586, "learning_rate": 2.6780554139044707e-05, "loss": 2.1242, "step": 23130 }, { "epoch": 3.933367329593745, "grad_norm": 12.577981948852539, "learning_rate": 2.677772111734376e-05, "loss": 2.4012, "step": 23140 }, { "epoch": 3.9350671426143125, "grad_norm": 14.712690353393555, "learning_rate": 2.677488809564281e-05, "loss": 2.5176, "step": 23150 }, { "epoch": 3.93676695563488, "grad_norm": 11.550294876098633, "learning_rate": 2.6772055073941867e-05, "loss": 2.2671, "step": 23160 }, { "epoch": 3.9384667686554478, "grad_norm": 17.60422134399414, "learning_rate": 2.676922205224092e-05, "loss": 2.5703, "step": 23170 }, { "epoch": 3.9401665816760154, "grad_norm": 11.183786392211914, "learning_rate": 2.6766389030539974e-05, "loss": 2.5781, "step": 23180 }, { "epoch": 3.9418663946965835, "grad_norm": 10.586939811706543, "learning_rate": 2.6763556008839028e-05, "loss": 2.4375, "step": 23190 }, { "epoch": 3.943566207717151, "grad_norm": 12.810685157775879, "learning_rate": 2.676072298713808e-05, "loss": 2.5497, "step": 23200 }, { "epoch": 3.945266020737719, "grad_norm": 16.088232040405273, "learning_rate": 2.6757889965437135e-05, "loss": 2.3625, "step": 23210 }, { "epoch": 3.9469658337582865, "grad_norm": 10.286520957946777, "learning_rate": 2.675505694373619e-05, "loss": 2.5202, "step": 23220 }, { "epoch": 3.9486656467788546, "grad_norm": 13.067448616027832, "learning_rate": 2.6752223922035242e-05, "loss": 2.3088, "step": 23230 }, { "epoch": 3.950365459799422, "grad_norm": 18.10711669921875, "learning_rate": 2.67493909003343e-05, "loss": 2.518, "step": 23240 }, { "epoch": 3.95206527281999, "grad_norm": 18.450674057006836, "learning_rate": 2.6746557878633353e-05, "loss": 2.533, "step": 23250 }, { "epoch": 3.9537650858405575, "grad_norm": 19.39794158935547, "learning_rate": 2.6743724856932403e-05, "loss": 2.3998, "step": 23260 }, { "epoch": 3.955464898861125, "grad_norm": 17.024118423461914, "learning_rate": 2.674089183523146e-05, "loss": 2.4264, "step": 23270 }, { "epoch": 3.957164711881693, "grad_norm": 15.598990440368652, "learning_rate": 2.6738058813530513e-05, "loss": 2.4424, "step": 23280 }, { "epoch": 3.9588645249022605, "grad_norm": 12.922159194946289, "learning_rate": 2.6735225791829567e-05, "loss": 2.3225, "step": 23290 }, { "epoch": 3.9605643379228286, "grad_norm": 13.903735160827637, "learning_rate": 2.673239277012862e-05, "loss": 2.5196, "step": 23300 }, { "epoch": 3.9622641509433962, "grad_norm": 17.330202102661133, "learning_rate": 2.6729559748427674e-05, "loss": 2.3327, "step": 23310 }, { "epoch": 3.963963963963964, "grad_norm": 12.528067588806152, "learning_rate": 2.6726726726726727e-05, "loss": 2.4617, "step": 23320 }, { "epoch": 3.9656637769845315, "grad_norm": 15.155486106872559, "learning_rate": 2.6723893705025784e-05, "loss": 2.4852, "step": 23330 }, { "epoch": 3.9673635900050996, "grad_norm": 11.8526611328125, "learning_rate": 2.6721060683324834e-05, "loss": 2.5512, "step": 23340 }, { "epoch": 3.9690634030256673, "grad_norm": 10.331101417541504, "learning_rate": 2.6718227661623888e-05, "loss": 2.3312, "step": 23350 }, { "epoch": 3.970763216046235, "grad_norm": 18.746896743774414, "learning_rate": 2.6715394639922945e-05, "loss": 2.6391, "step": 23360 }, { "epoch": 3.9724630290668026, "grad_norm": 11.967997550964355, "learning_rate": 2.6712561618221998e-05, "loss": 2.4336, "step": 23370 }, { "epoch": 3.9741628420873703, "grad_norm": 13.892992973327637, "learning_rate": 2.670972859652105e-05, "loss": 2.272, "step": 23380 }, { "epoch": 3.975862655107938, "grad_norm": 21.859458923339844, "learning_rate": 2.6706895574820105e-05, "loss": 2.2319, "step": 23390 }, { "epoch": 3.977562468128506, "grad_norm": 17.529172897338867, "learning_rate": 2.670406255311916e-05, "loss": 2.7763, "step": 23400 }, { "epoch": 3.9792622811490737, "grad_norm": 15.044740676879883, "learning_rate": 2.670122953141821e-05, "loss": 2.1384, "step": 23410 }, { "epoch": 3.9809620941696413, "grad_norm": 16.600955963134766, "learning_rate": 2.6698396509717266e-05, "loss": 2.5825, "step": 23420 }, { "epoch": 3.982661907190209, "grad_norm": 11.853261947631836, "learning_rate": 2.669556348801632e-05, "loss": 2.1872, "step": 23430 }, { "epoch": 3.984361720210777, "grad_norm": 12.143808364868164, "learning_rate": 2.6692730466315373e-05, "loss": 2.4528, "step": 23440 }, { "epoch": 3.9860615332313447, "grad_norm": 20.8082332611084, "learning_rate": 2.6689897444614426e-05, "loss": 2.1864, "step": 23450 }, { "epoch": 3.9877613462519124, "grad_norm": 14.384654998779297, "learning_rate": 2.668706442291348e-05, "loss": 2.4629, "step": 23460 }, { "epoch": 3.98946115927248, "grad_norm": 16.406024932861328, "learning_rate": 2.6684231401212533e-05, "loss": 2.4214, "step": 23470 }, { "epoch": 3.9911609722930477, "grad_norm": 14.410937309265137, "learning_rate": 2.668139837951159e-05, "loss": 2.6099, "step": 23480 }, { "epoch": 3.9928607853136153, "grad_norm": 23.78093719482422, "learning_rate": 2.667856535781064e-05, "loss": 2.4519, "step": 23490 }, { "epoch": 3.994560598334183, "grad_norm": 17.454593658447266, "learning_rate": 2.6675732336109694e-05, "loss": 2.2934, "step": 23500 }, { "epoch": 3.996260411354751, "grad_norm": 12.080737113952637, "learning_rate": 2.667289931440875e-05, "loss": 2.4204, "step": 23510 }, { "epoch": 3.9979602243753187, "grad_norm": 10.993525505065918, "learning_rate": 2.6670066292707804e-05, "loss": 2.4906, "step": 23520 }, { "epoch": 3.9996600373958864, "grad_norm": 21.873205184936523, "learning_rate": 2.6667233271006855e-05, "loss": 2.4732, "step": 23530 }, { "epoch": 4.0, "eval_cer": 1.0107717803030303, "eval_loss": 3.027893304824829, "eval_runtime": 1966.3376, "eval_samples_per_second": 0.24, "eval_steps_per_second": 0.24, "step": 23532 }, { "epoch": 4.0013598504164545, "grad_norm": 15.647177696228027, "learning_rate": 2.666440024930591e-05, "loss": 2.0097, "step": 23540 }, { "epoch": 4.003059663437022, "grad_norm": 11.491499900817871, "learning_rate": 2.6661567227604965e-05, "loss": 2.0861, "step": 23550 }, { "epoch": 4.00475947645759, "grad_norm": 14.63626480102539, "learning_rate": 2.665873420590402e-05, "loss": 2.288, "step": 23560 }, { "epoch": 4.006459289478157, "grad_norm": 14.365877151489258, "learning_rate": 2.6655901184203072e-05, "loss": 2.1793, "step": 23570 }, { "epoch": 4.008159102498725, "grad_norm": 16.16574478149414, "learning_rate": 2.6653068162502126e-05, "loss": 2.3168, "step": 23580 }, { "epoch": 4.009858915519293, "grad_norm": 13.29616928100586, "learning_rate": 2.665023514080118e-05, "loss": 2.1678, "step": 23590 }, { "epoch": 4.01155872853986, "grad_norm": 14.473156929016113, "learning_rate": 2.6647402119100233e-05, "loss": 2.2267, "step": 23600 }, { "epoch": 4.013258541560428, "grad_norm": 15.336457252502441, "learning_rate": 2.6644569097399286e-05, "loss": 2.5671, "step": 23610 }, { "epoch": 4.014958354580996, "grad_norm": 9.564033508300781, "learning_rate": 2.664173607569834e-05, "loss": 2.3845, "step": 23620 }, { "epoch": 4.016658167601564, "grad_norm": 13.754056930541992, "learning_rate": 2.6638903053997397e-05, "loss": 2.1431, "step": 23630 }, { "epoch": 4.018357980622132, "grad_norm": 15.818633079528809, "learning_rate": 2.6636070032296447e-05, "loss": 2.2894, "step": 23640 }, { "epoch": 4.0200577936426996, "grad_norm": 14.710579872131348, "learning_rate": 2.66332370105955e-05, "loss": 2.1381, "step": 23650 }, { "epoch": 4.021757606663267, "grad_norm": 9.809592247009277, "learning_rate": 2.6630403988894557e-05, "loss": 2.3663, "step": 23660 }, { "epoch": 4.023457419683835, "grad_norm": 10.966891288757324, "learning_rate": 2.662757096719361e-05, "loss": 2.3287, "step": 23670 }, { "epoch": 4.0251572327044025, "grad_norm": 13.7824068069458, "learning_rate": 2.662473794549266e-05, "loss": 2.432, "step": 23680 }, { "epoch": 4.02685704572497, "grad_norm": 12.172625541687012, "learning_rate": 2.6621904923791718e-05, "loss": 2.2764, "step": 23690 }, { "epoch": 4.028556858745538, "grad_norm": 24.785621643066406, "learning_rate": 2.661907190209077e-05, "loss": 2.2451, "step": 23700 }, { "epoch": 4.0302566717661055, "grad_norm": 14.728874206542969, "learning_rate": 2.6616238880389825e-05, "loss": 2.2713, "step": 23710 }, { "epoch": 4.031956484786673, "grad_norm": 14.425718307495117, "learning_rate": 2.6613405858688878e-05, "loss": 2.2489, "step": 23720 }, { "epoch": 4.033656297807241, "grad_norm": 20.21390724182129, "learning_rate": 2.6610572836987932e-05, "loss": 2.1435, "step": 23730 }, { "epoch": 4.035356110827809, "grad_norm": 13.580406188964844, "learning_rate": 2.6607739815286985e-05, "loss": 2.4042, "step": 23740 }, { "epoch": 4.037055923848377, "grad_norm": 19.75798988342285, "learning_rate": 2.660490679358604e-05, "loss": 2.0931, "step": 23750 }, { "epoch": 4.038755736868945, "grad_norm": 16.712646484375, "learning_rate": 2.6602073771885092e-05, "loss": 2.4296, "step": 23760 }, { "epoch": 4.040455549889512, "grad_norm": 17.230573654174805, "learning_rate": 2.6599240750184146e-05, "loss": 2.0822, "step": 23770 }, { "epoch": 4.04215536291008, "grad_norm": 14.325222969055176, "learning_rate": 2.6596407728483203e-05, "loss": 2.1061, "step": 23780 }, { "epoch": 4.043855175930648, "grad_norm": 10.797629356384277, "learning_rate": 2.6593574706782253e-05, "loss": 2.1371, "step": 23790 }, { "epoch": 4.045554988951215, "grad_norm": 16.79729461669922, "learning_rate": 2.6590741685081306e-05, "loss": 2.196, "step": 23800 }, { "epoch": 4.047254801971783, "grad_norm": 15.367559432983398, "learning_rate": 2.6587908663380363e-05, "loss": 2.1542, "step": 23810 }, { "epoch": 4.0489546149923505, "grad_norm": 12.742278099060059, "learning_rate": 2.6585075641679417e-05, "loss": 2.3826, "step": 23820 }, { "epoch": 4.050654428012918, "grad_norm": 12.87714672088623, "learning_rate": 2.6582242619978467e-05, "loss": 2.1323, "step": 23830 }, { "epoch": 4.052354241033487, "grad_norm": 8.954129219055176, "learning_rate": 2.6579409598277524e-05, "loss": 2.5854, "step": 23840 }, { "epoch": 4.054054054054054, "grad_norm": 13.439900398254395, "learning_rate": 2.6576576576576577e-05, "loss": 2.3067, "step": 23850 }, { "epoch": 4.055753867074622, "grad_norm": 14.081575393676758, "learning_rate": 2.657374355487563e-05, "loss": 2.3679, "step": 23860 }, { "epoch": 4.05745368009519, "grad_norm": 13.506153106689453, "learning_rate": 2.6570910533174684e-05, "loss": 2.3886, "step": 23870 }, { "epoch": 4.059153493115757, "grad_norm": 11.318458557128906, "learning_rate": 2.6568077511473738e-05, "loss": 2.3039, "step": 23880 }, { "epoch": 4.060853306136325, "grad_norm": 15.755133628845215, "learning_rate": 2.656524448977279e-05, "loss": 2.3883, "step": 23890 }, { "epoch": 4.062553119156893, "grad_norm": 13.753396987915039, "learning_rate": 2.656241146807185e-05, "loss": 2.3304, "step": 23900 }, { "epoch": 4.06425293217746, "grad_norm": 20.50196075439453, "learning_rate": 2.65595784463709e-05, "loss": 2.2584, "step": 23910 }, { "epoch": 4.065952745198028, "grad_norm": 13.40053939819336, "learning_rate": 2.6556745424669952e-05, "loss": 2.124, "step": 23920 }, { "epoch": 4.067652558218596, "grad_norm": 14.994084358215332, "learning_rate": 2.655391240296901e-05, "loss": 2.0326, "step": 23930 }, { "epoch": 4.069352371239163, "grad_norm": 14.780344009399414, "learning_rate": 2.655107938126806e-05, "loss": 2.3032, "step": 23940 }, { "epoch": 4.071052184259732, "grad_norm": 16.02765655517578, "learning_rate": 2.6548246359567116e-05, "loss": 2.5125, "step": 23950 }, { "epoch": 4.0727519972802995, "grad_norm": 12.764716148376465, "learning_rate": 2.654541333786617e-05, "loss": 2.2187, "step": 23960 }, { "epoch": 4.074451810300867, "grad_norm": 11.293611526489258, "learning_rate": 2.6542580316165223e-05, "loss": 2.2024, "step": 23970 }, { "epoch": 4.076151623321435, "grad_norm": 16.95568084716797, "learning_rate": 2.6539747294464277e-05, "loss": 2.0707, "step": 23980 }, { "epoch": 4.077851436342002, "grad_norm": 10.763463020324707, "learning_rate": 2.653691427276333e-05, "loss": 2.1491, "step": 23990 }, { "epoch": 4.07955124936257, "grad_norm": 23.764774322509766, "learning_rate": 2.6534081251062384e-05, "loss": 2.1261, "step": 24000 }, { "epoch": 4.081251062383138, "grad_norm": 10.492151260375977, "learning_rate": 2.653124822936144e-05, "loss": 2.3692, "step": 24010 }, { "epoch": 4.082950875403705, "grad_norm": 14.108599662780762, "learning_rate": 2.652841520766049e-05, "loss": 2.341, "step": 24020 }, { "epoch": 4.084650688424273, "grad_norm": 13.97806453704834, "learning_rate": 2.6525582185959544e-05, "loss": 2.2745, "step": 24030 }, { "epoch": 4.086350501444841, "grad_norm": 17.287996292114258, "learning_rate": 2.65227491642586e-05, "loss": 2.3753, "step": 24040 }, { "epoch": 4.088050314465409, "grad_norm": 19.017263412475586, "learning_rate": 2.6519916142557655e-05, "loss": 2.4048, "step": 24050 }, { "epoch": 4.089750127485977, "grad_norm": 11.419098854064941, "learning_rate": 2.6517083120856705e-05, "loss": 2.0506, "step": 24060 }, { "epoch": 4.0914499405065445, "grad_norm": 14.460087776184082, "learning_rate": 2.651425009915576e-05, "loss": 2.3435, "step": 24070 }, { "epoch": 4.093149753527112, "grad_norm": 16.245834350585938, "learning_rate": 2.6511417077454815e-05, "loss": 2.1412, "step": 24080 }, { "epoch": 4.09484956654768, "grad_norm": 17.101839065551758, "learning_rate": 2.6508584055753865e-05, "loss": 2.4498, "step": 24090 }, { "epoch": 4.0965493795682475, "grad_norm": 17.64244842529297, "learning_rate": 2.6505751034052922e-05, "loss": 2.1844, "step": 24100 }, { "epoch": 4.098249192588815, "grad_norm": 15.467035293579102, "learning_rate": 2.6502918012351976e-05, "loss": 2.3199, "step": 24110 }, { "epoch": 4.099949005609383, "grad_norm": 14.586395263671875, "learning_rate": 2.650008499065103e-05, "loss": 2.0858, "step": 24120 }, { "epoch": 4.1016488186299505, "grad_norm": 10.469539642333984, "learning_rate": 2.6497251968950083e-05, "loss": 2.1433, "step": 24130 }, { "epoch": 4.103348631650518, "grad_norm": 11.193008422851562, "learning_rate": 2.6494418947249136e-05, "loss": 2.434, "step": 24140 }, { "epoch": 4.105048444671086, "grad_norm": 13.210295677185059, "learning_rate": 2.649158592554819e-05, "loss": 2.2469, "step": 24150 }, { "epoch": 4.106748257691654, "grad_norm": 11.115633964538574, "learning_rate": 2.6488752903847247e-05, "loss": 2.3345, "step": 24160 }, { "epoch": 4.108448070712222, "grad_norm": 13.171968460083008, "learning_rate": 2.6485919882146297e-05, "loss": 2.1849, "step": 24170 }, { "epoch": 4.11014788373279, "grad_norm": 19.69135093688965, "learning_rate": 2.648308686044535e-05, "loss": 2.3212, "step": 24180 }, { "epoch": 4.111847696753357, "grad_norm": 15.320941925048828, "learning_rate": 2.6480253838744407e-05, "loss": 2.3773, "step": 24190 }, { "epoch": 4.113547509773925, "grad_norm": 185.60614013671875, "learning_rate": 2.647742081704346e-05, "loss": 2.3367, "step": 24200 }, { "epoch": 4.115247322794493, "grad_norm": 12.23252010345459, "learning_rate": 2.647458779534251e-05, "loss": 2.1614, "step": 24210 }, { "epoch": 4.11694713581506, "grad_norm": 19.04967498779297, "learning_rate": 2.6471754773641568e-05, "loss": 2.1518, "step": 24220 }, { "epoch": 4.118646948835628, "grad_norm": 19.103376388549805, "learning_rate": 2.646892175194062e-05, "loss": 2.2468, "step": 24230 }, { "epoch": 4.1203467618561955, "grad_norm": 15.102468490600586, "learning_rate": 2.6466088730239675e-05, "loss": 2.2762, "step": 24240 }, { "epoch": 4.122046574876763, "grad_norm": 11.237533569335938, "learning_rate": 2.646325570853873e-05, "loss": 2.2648, "step": 24250 }, { "epoch": 4.123746387897332, "grad_norm": 15.408721923828125, "learning_rate": 2.6460422686837782e-05, "loss": 2.3806, "step": 24260 }, { "epoch": 4.125446200917899, "grad_norm": 24.69990348815918, "learning_rate": 2.6457589665136835e-05, "loss": 2.3028, "step": 24270 }, { "epoch": 4.127146013938467, "grad_norm": 34.95774459838867, "learning_rate": 2.645475664343589e-05, "loss": 2.3447, "step": 24280 }, { "epoch": 4.128845826959035, "grad_norm": 10.692275047302246, "learning_rate": 2.6451923621734943e-05, "loss": 2.3336, "step": 24290 }, { "epoch": 4.130545639979602, "grad_norm": 16.030672073364258, "learning_rate": 2.6449090600033996e-05, "loss": 2.4526, "step": 24300 }, { "epoch": 4.13224545300017, "grad_norm": 23.555530548095703, "learning_rate": 2.6446257578333053e-05, "loss": 2.2699, "step": 24310 }, { "epoch": 4.133945266020738, "grad_norm": 15.648760795593262, "learning_rate": 2.6443424556632103e-05, "loss": 2.3044, "step": 24320 }, { "epoch": 4.135645079041305, "grad_norm": 10.828690528869629, "learning_rate": 2.6440591534931157e-05, "loss": 2.4095, "step": 24330 }, { "epoch": 4.137344892061873, "grad_norm": 12.691605567932129, "learning_rate": 2.6437758513230214e-05, "loss": 2.4585, "step": 24340 }, { "epoch": 4.139044705082441, "grad_norm": 15.730829238891602, "learning_rate": 2.6434925491529267e-05, "loss": 2.2285, "step": 24350 }, { "epoch": 4.140744518103008, "grad_norm": 13.154619216918945, "learning_rate": 2.6432092469828317e-05, "loss": 2.3758, "step": 24360 }, { "epoch": 4.142444331123577, "grad_norm": 14.328714370727539, "learning_rate": 2.6429259448127374e-05, "loss": 2.2694, "step": 24370 }, { "epoch": 4.1441441441441444, "grad_norm": 13.436251640319824, "learning_rate": 2.6426426426426428e-05, "loss": 2.3217, "step": 24380 }, { "epoch": 4.145843957164712, "grad_norm": 12.881389617919922, "learning_rate": 2.642359340472548e-05, "loss": 2.4743, "step": 24390 }, { "epoch": 4.14754377018528, "grad_norm": 10.967267036437988, "learning_rate": 2.6420760383024535e-05, "loss": 2.3946, "step": 24400 }, { "epoch": 4.149243583205847, "grad_norm": 14.247036933898926, "learning_rate": 2.6417927361323588e-05, "loss": 2.486, "step": 24410 }, { "epoch": 4.150943396226415, "grad_norm": 16.344980239868164, "learning_rate": 2.641509433962264e-05, "loss": 2.3509, "step": 24420 }, { "epoch": 4.152643209246983, "grad_norm": 17.764673233032227, "learning_rate": 2.6412261317921695e-05, "loss": 2.1691, "step": 24430 }, { "epoch": 4.15434302226755, "grad_norm": 24.275060653686523, "learning_rate": 2.640942829622075e-05, "loss": 2.2805, "step": 24440 }, { "epoch": 4.156042835288118, "grad_norm": 13.026418685913086, "learning_rate": 2.6406595274519802e-05, "loss": 2.2405, "step": 24450 }, { "epoch": 4.157742648308686, "grad_norm": 9.0975923538208, "learning_rate": 2.640376225281886e-05, "loss": 2.3697, "step": 24460 }, { "epoch": 4.159442461329254, "grad_norm": 14.823699951171875, "learning_rate": 2.640092923111791e-05, "loss": 2.1807, "step": 24470 }, { "epoch": 4.161142274349822, "grad_norm": 13.097728729248047, "learning_rate": 2.6398096209416963e-05, "loss": 2.1869, "step": 24480 }, { "epoch": 4.1628420873703895, "grad_norm": 12.699882507324219, "learning_rate": 2.639526318771602e-05, "loss": 2.2216, "step": 24490 }, { "epoch": 4.164541900390957, "grad_norm": 9.557317733764648, "learning_rate": 2.6392430166015073e-05, "loss": 2.2571, "step": 24500 }, { "epoch": 4.166241713411525, "grad_norm": 11.955225944519043, "learning_rate": 2.6389597144314123e-05, "loss": 2.1474, "step": 24510 }, { "epoch": 4.1679415264320925, "grad_norm": 20.433021545410156, "learning_rate": 2.638676412261318e-05, "loss": 2.206, "step": 24520 }, { "epoch": 4.16964133945266, "grad_norm": 17.82169532775879, "learning_rate": 2.6383931100912234e-05, "loss": 2.416, "step": 24530 }, { "epoch": 4.171341152473228, "grad_norm": 13.120054244995117, "learning_rate": 2.6381098079211287e-05, "loss": 2.3054, "step": 24540 }, { "epoch": 4.173040965493795, "grad_norm": 11.275712013244629, "learning_rate": 2.637826505751034e-05, "loss": 2.24, "step": 24550 }, { "epoch": 4.174740778514363, "grad_norm": 11.722530364990234, "learning_rate": 2.6375432035809394e-05, "loss": 2.6349, "step": 24560 }, { "epoch": 4.176440591534931, "grad_norm": 10.39983081817627, "learning_rate": 2.6372599014108448e-05, "loss": 2.2705, "step": 24570 }, { "epoch": 4.178140404555499, "grad_norm": 9.655168533325195, "learning_rate": 2.6369765992407505e-05, "loss": 2.505, "step": 24580 }, { "epoch": 4.179840217576067, "grad_norm": 16.14459991455078, "learning_rate": 2.6366932970706555e-05, "loss": 2.0702, "step": 24590 }, { "epoch": 4.181540030596635, "grad_norm": 14.229939460754395, "learning_rate": 2.636409994900561e-05, "loss": 2.1494, "step": 24600 }, { "epoch": 4.183239843617202, "grad_norm": 15.791484832763672, "learning_rate": 2.6361266927304665e-05, "loss": 2.2841, "step": 24610 }, { "epoch": 4.18493965663777, "grad_norm": 11.772504806518555, "learning_rate": 2.6358433905603716e-05, "loss": 2.2263, "step": 24620 }, { "epoch": 4.1866394696583376, "grad_norm": 21.61369514465332, "learning_rate": 2.635560088390277e-05, "loss": 2.264, "step": 24630 }, { "epoch": 4.188339282678905, "grad_norm": 12.294670104980469, "learning_rate": 2.6352767862201826e-05, "loss": 2.3399, "step": 24640 }, { "epoch": 4.190039095699473, "grad_norm": 16.45985984802246, "learning_rate": 2.634993484050088e-05, "loss": 2.2178, "step": 24650 }, { "epoch": 4.1917389087200405, "grad_norm": 15.67893123626709, "learning_rate": 2.6347101818799933e-05, "loss": 2.3626, "step": 24660 }, { "epoch": 4.193438721740608, "grad_norm": 16.057296752929688, "learning_rate": 2.6344268797098987e-05, "loss": 2.2527, "step": 24670 }, { "epoch": 4.195138534761176, "grad_norm": 8.247102737426758, "learning_rate": 2.634143577539804e-05, "loss": 2.4064, "step": 24680 }, { "epoch": 4.196838347781744, "grad_norm": 42.17365646362305, "learning_rate": 2.6338602753697097e-05, "loss": 2.2423, "step": 24690 }, { "epoch": 4.198538160802312, "grad_norm": 17.704524993896484, "learning_rate": 2.6335769731996147e-05, "loss": 2.1652, "step": 24700 }, { "epoch": 4.20023797382288, "grad_norm": 17.552410125732422, "learning_rate": 2.63329367102952e-05, "loss": 2.1046, "step": 24710 }, { "epoch": 4.201937786843447, "grad_norm": 14.958829879760742, "learning_rate": 2.6330103688594258e-05, "loss": 2.1984, "step": 24720 }, { "epoch": 4.203637599864015, "grad_norm": 14.410178184509277, "learning_rate": 2.632727066689331e-05, "loss": 2.1431, "step": 24730 }, { "epoch": 4.205337412884583, "grad_norm": 11.181432723999023, "learning_rate": 2.632443764519236e-05, "loss": 2.0199, "step": 24740 }, { "epoch": 4.20703722590515, "grad_norm": 10.700401306152344, "learning_rate": 2.6321604623491418e-05, "loss": 2.6109, "step": 24750 }, { "epoch": 4.208737038925718, "grad_norm": 12.765241622924805, "learning_rate": 2.631877160179047e-05, "loss": 2.3047, "step": 24760 }, { "epoch": 4.210436851946286, "grad_norm": 16.898733139038086, "learning_rate": 2.6315938580089525e-05, "loss": 2.3097, "step": 24770 }, { "epoch": 4.212136664966853, "grad_norm": 11.026458740234375, "learning_rate": 2.631310555838858e-05, "loss": 2.4708, "step": 24780 }, { "epoch": 4.213836477987422, "grad_norm": 19.371349334716797, "learning_rate": 2.6310272536687632e-05, "loss": 2.3453, "step": 24790 }, { "epoch": 4.215536291007989, "grad_norm": 10.217220306396484, "learning_rate": 2.6307439514986686e-05, "loss": 2.3099, "step": 24800 }, { "epoch": 4.217236104028557, "grad_norm": 38.473392486572266, "learning_rate": 2.630460649328574e-05, "loss": 2.0605, "step": 24810 }, { "epoch": 4.218935917049125, "grad_norm": 15.35883617401123, "learning_rate": 2.6301773471584793e-05, "loss": 2.1756, "step": 24820 }, { "epoch": 4.220635730069692, "grad_norm": 15.154108047485352, "learning_rate": 2.6298940449883846e-05, "loss": 2.0381, "step": 24830 }, { "epoch": 4.22233554309026, "grad_norm": 15.342743873596191, "learning_rate": 2.6296107428182903e-05, "loss": 2.2168, "step": 24840 }, { "epoch": 4.224035356110828, "grad_norm": 9.34340763092041, "learning_rate": 2.6293274406481953e-05, "loss": 2.0515, "step": 24850 }, { "epoch": 4.225735169131395, "grad_norm": 12.177955627441406, "learning_rate": 2.6290441384781007e-05, "loss": 2.4008, "step": 24860 }, { "epoch": 4.227434982151963, "grad_norm": 8.830852508544922, "learning_rate": 2.6287608363080064e-05, "loss": 2.4332, "step": 24870 }, { "epoch": 4.229134795172531, "grad_norm": 22.880739212036133, "learning_rate": 2.6284775341379117e-05, "loss": 2.1706, "step": 24880 }, { "epoch": 4.230834608193099, "grad_norm": 13.763422966003418, "learning_rate": 2.6281942319678167e-05, "loss": 2.1907, "step": 24890 }, { "epoch": 4.232534421213667, "grad_norm": 12.133538246154785, "learning_rate": 2.6279109297977224e-05, "loss": 2.0203, "step": 24900 }, { "epoch": 4.2342342342342345, "grad_norm": 21.937440872192383, "learning_rate": 2.6276276276276278e-05, "loss": 2.1198, "step": 24910 }, { "epoch": 4.235934047254802, "grad_norm": 12.424885749816895, "learning_rate": 2.627344325457533e-05, "loss": 2.3928, "step": 24920 }, { "epoch": 4.23763386027537, "grad_norm": 10.39197826385498, "learning_rate": 2.6270610232874385e-05, "loss": 2.3464, "step": 24930 }, { "epoch": 4.2393336732959375, "grad_norm": 10.514570236206055, "learning_rate": 2.626777721117344e-05, "loss": 2.3119, "step": 24940 }, { "epoch": 4.241033486316505, "grad_norm": 13.765385627746582, "learning_rate": 2.6264944189472492e-05, "loss": 1.9819, "step": 24950 }, { "epoch": 4.242733299337073, "grad_norm": 15.39087963104248, "learning_rate": 2.6262111167771545e-05, "loss": 2.2568, "step": 24960 }, { "epoch": 4.24443311235764, "grad_norm": 17.612394332885742, "learning_rate": 2.62592781460706e-05, "loss": 2.1395, "step": 24970 }, { "epoch": 4.246132925378208, "grad_norm": 11.216169357299805, "learning_rate": 2.6256445124369652e-05, "loss": 2.4134, "step": 24980 }, { "epoch": 4.247832738398776, "grad_norm": 15.707789421081543, "learning_rate": 2.625361210266871e-05, "loss": 2.178, "step": 24990 }, { "epoch": 4.249532551419344, "grad_norm": 15.232940673828125, "learning_rate": 2.625077908096776e-05, "loss": 2.2237, "step": 25000 }, { "epoch": 4.251232364439912, "grad_norm": 11.97886848449707, "learning_rate": 2.6247946059266813e-05, "loss": 2.1312, "step": 25010 }, { "epoch": 4.25293217746048, "grad_norm": 15.08416748046875, "learning_rate": 2.624511303756587e-05, "loss": 2.2894, "step": 25020 }, { "epoch": 4.254631990481047, "grad_norm": 12.517196655273438, "learning_rate": 2.6242280015864923e-05, "loss": 2.3527, "step": 25030 }, { "epoch": 4.256331803501615, "grad_norm": 14.876199722290039, "learning_rate": 2.6239446994163974e-05, "loss": 2.1648, "step": 25040 }, { "epoch": 4.2580316165221825, "grad_norm": 16.960418701171875, "learning_rate": 2.623661397246303e-05, "loss": 2.2881, "step": 25050 }, { "epoch": 4.25973142954275, "grad_norm": 11.988615036010742, "learning_rate": 2.6233780950762084e-05, "loss": 2.2191, "step": 25060 }, { "epoch": 4.261431242563318, "grad_norm": 20.464754104614258, "learning_rate": 2.6230947929061138e-05, "loss": 2.0938, "step": 25070 }, { "epoch": 4.2631310555838855, "grad_norm": 17.140052795410156, "learning_rate": 2.622811490736019e-05, "loss": 2.1754, "step": 25080 }, { "epoch": 4.264830868604453, "grad_norm": 21.730682373046875, "learning_rate": 2.6225281885659245e-05, "loss": 2.3575, "step": 25090 }, { "epoch": 4.266530681625021, "grad_norm": 14.792156219482422, "learning_rate": 2.6222448863958298e-05, "loss": 2.3022, "step": 25100 }, { "epoch": 4.268230494645589, "grad_norm": 22.789501190185547, "learning_rate": 2.6219615842257355e-05, "loss": 2.2515, "step": 25110 }, { "epoch": 4.269930307666157, "grad_norm": 9.499470710754395, "learning_rate": 2.6216782820556405e-05, "loss": 2.0163, "step": 25120 }, { "epoch": 4.271630120686725, "grad_norm": 10.49502182006836, "learning_rate": 2.621394979885546e-05, "loss": 2.1506, "step": 25130 }, { "epoch": 4.273329933707292, "grad_norm": 14.583684921264648, "learning_rate": 2.6211116777154516e-05, "loss": 2.0616, "step": 25140 }, { "epoch": 4.27502974672786, "grad_norm": 12.48144817352295, "learning_rate": 2.6208283755453566e-05, "loss": 2.3002, "step": 25150 }, { "epoch": 4.276729559748428, "grad_norm": 14.324007987976074, "learning_rate": 2.620545073375262e-05, "loss": 2.1529, "step": 25160 }, { "epoch": 4.278429372768995, "grad_norm": 16.485095977783203, "learning_rate": 2.6202617712051676e-05, "loss": 2.402, "step": 25170 }, { "epoch": 4.280129185789563, "grad_norm": 12.94542407989502, "learning_rate": 2.619978469035073e-05, "loss": 2.2098, "step": 25180 }, { "epoch": 4.281828998810131, "grad_norm": 10.677837371826172, "learning_rate": 2.619695166864978e-05, "loss": 2.2334, "step": 25190 }, { "epoch": 4.283528811830698, "grad_norm": 21.280597686767578, "learning_rate": 2.6194118646948837e-05, "loss": 2.5227, "step": 25200 }, { "epoch": 4.285228624851267, "grad_norm": 9.774826049804688, "learning_rate": 2.619128562524789e-05, "loss": 2.4375, "step": 25210 }, { "epoch": 4.286928437871834, "grad_norm": 15.840423583984375, "learning_rate": 2.6188452603546944e-05, "loss": 2.2117, "step": 25220 }, { "epoch": 4.288628250892402, "grad_norm": 9.156455993652344, "learning_rate": 2.6185619581845997e-05, "loss": 2.0906, "step": 25230 }, { "epoch": 4.29032806391297, "grad_norm": 16.41498565673828, "learning_rate": 2.618278656014505e-05, "loss": 2.2232, "step": 25240 }, { "epoch": 4.292027876933537, "grad_norm": 11.57177448272705, "learning_rate": 2.6179953538444104e-05, "loss": 2.0118, "step": 25250 }, { "epoch": 4.293727689954105, "grad_norm": 11.802271842956543, "learning_rate": 2.617712051674316e-05, "loss": 2.4117, "step": 25260 }, { "epoch": 4.295427502974673, "grad_norm": 19.60314178466797, "learning_rate": 2.617428749504221e-05, "loss": 2.5239, "step": 25270 }, { "epoch": 4.29712731599524, "grad_norm": 11.321030616760254, "learning_rate": 2.6171454473341265e-05, "loss": 2.2562, "step": 25280 }, { "epoch": 4.298827129015808, "grad_norm": 26.703689575195312, "learning_rate": 2.6168621451640322e-05, "loss": 2.2838, "step": 25290 }, { "epoch": 4.300526942036376, "grad_norm": 20.351367950439453, "learning_rate": 2.6165788429939372e-05, "loss": 2.2961, "step": 25300 }, { "epoch": 4.302226755056944, "grad_norm": 13.298752784729004, "learning_rate": 2.6162955408238425e-05, "loss": 2.2714, "step": 25310 }, { "epoch": 4.303926568077512, "grad_norm": 11.888916969299316, "learning_rate": 2.6160122386537482e-05, "loss": 2.2007, "step": 25320 }, { "epoch": 4.3056263810980795, "grad_norm": 17.623191833496094, "learning_rate": 2.6157289364836536e-05, "loss": 2.3406, "step": 25330 }, { "epoch": 4.307326194118647, "grad_norm": 14.010086059570312, "learning_rate": 2.6154456343135586e-05, "loss": 2.3891, "step": 25340 }, { "epoch": 4.309026007139215, "grad_norm": 12.282282829284668, "learning_rate": 2.6151623321434643e-05, "loss": 2.3347, "step": 25350 }, { "epoch": 4.3107258201597825, "grad_norm": 14.78448486328125, "learning_rate": 2.6148790299733696e-05, "loss": 2.2764, "step": 25360 }, { "epoch": 4.31242563318035, "grad_norm": 15.24938678741455, "learning_rate": 2.614595727803275e-05, "loss": 2.2185, "step": 25370 }, { "epoch": 4.314125446200918, "grad_norm": 11.72359561920166, "learning_rate": 2.6143124256331804e-05, "loss": 2.3991, "step": 25380 }, { "epoch": 4.315825259221485, "grad_norm": 7.524674415588379, "learning_rate": 2.6140291234630857e-05, "loss": 2.219, "step": 25390 }, { "epoch": 4.317525072242053, "grad_norm": 14.079030990600586, "learning_rate": 2.613745821292991e-05, "loss": 2.1673, "step": 25400 }, { "epoch": 4.319224885262621, "grad_norm": 12.493011474609375, "learning_rate": 2.6134625191228967e-05, "loss": 2.315, "step": 25410 }, { "epoch": 4.320924698283189, "grad_norm": 14.389622688293457, "learning_rate": 2.6131792169528018e-05, "loss": 2.2189, "step": 25420 }, { "epoch": 4.322624511303757, "grad_norm": 16.896595001220703, "learning_rate": 2.6128959147827074e-05, "loss": 2.1153, "step": 25430 }, { "epoch": 4.324324324324325, "grad_norm": 24.2767391204834, "learning_rate": 2.6126126126126128e-05, "loss": 2.1748, "step": 25440 }, { "epoch": 4.326024137344892, "grad_norm": 10.61505126953125, "learning_rate": 2.612329310442518e-05, "loss": 2.0442, "step": 25450 }, { "epoch": 4.32772395036546, "grad_norm": 23.621551513671875, "learning_rate": 2.6120460082724235e-05, "loss": 2.2447, "step": 25460 }, { "epoch": 4.3294237633860275, "grad_norm": 20.012950897216797, "learning_rate": 2.611762706102329e-05, "loss": 2.3783, "step": 25470 }, { "epoch": 4.331123576406595, "grad_norm": 17.116212844848633, "learning_rate": 2.6114794039322342e-05, "loss": 2.3735, "step": 25480 }, { "epoch": 4.332823389427163, "grad_norm": 15.33406925201416, "learning_rate": 2.6111961017621396e-05, "loss": 2.0735, "step": 25490 }, { "epoch": 4.3345232024477305, "grad_norm": 12.78668212890625, "learning_rate": 2.610912799592045e-05, "loss": 2.0769, "step": 25500 }, { "epoch": 4.336223015468298, "grad_norm": 13.501405715942383, "learning_rate": 2.6106294974219503e-05, "loss": 2.2469, "step": 25510 }, { "epoch": 4.337922828488866, "grad_norm": 12.663679122924805, "learning_rate": 2.610346195251856e-05, "loss": 2.1651, "step": 25520 }, { "epoch": 4.339622641509434, "grad_norm": 11.49737548828125, "learning_rate": 2.610062893081761e-05, "loss": 2.2976, "step": 25530 }, { "epoch": 4.341322454530002, "grad_norm": 18.43829917907715, "learning_rate": 2.6097795909116663e-05, "loss": 2.4449, "step": 25540 }, { "epoch": 4.34302226755057, "grad_norm": 15.443109512329102, "learning_rate": 2.609496288741572e-05, "loss": 2.5147, "step": 25550 }, { "epoch": 4.344722080571137, "grad_norm": 7.739646911621094, "learning_rate": 2.6092129865714774e-05, "loss": 2.292, "step": 25560 }, { "epoch": 4.346421893591705, "grad_norm": 18.12637710571289, "learning_rate": 2.6089296844013824e-05, "loss": 1.9636, "step": 25570 }, { "epoch": 4.348121706612273, "grad_norm": 15.328030586242676, "learning_rate": 2.608646382231288e-05, "loss": 2.4577, "step": 25580 }, { "epoch": 4.34982151963284, "grad_norm": 20.001182556152344, "learning_rate": 2.6083630800611934e-05, "loss": 2.2931, "step": 25590 }, { "epoch": 4.351521332653408, "grad_norm": 14.81929874420166, "learning_rate": 2.6080797778910988e-05, "loss": 2.4226, "step": 25600 }, { "epoch": 4.353221145673976, "grad_norm": 21.00187110900879, "learning_rate": 2.607796475721004e-05, "loss": 2.0514, "step": 25610 }, { "epoch": 4.354920958694543, "grad_norm": 26.441001892089844, "learning_rate": 2.6075131735509095e-05, "loss": 2.1727, "step": 25620 }, { "epoch": 4.356620771715112, "grad_norm": 11.888715744018555, "learning_rate": 2.607229871380815e-05, "loss": 2.1054, "step": 25630 }, { "epoch": 4.358320584735679, "grad_norm": 12.07772445678711, "learning_rate": 2.6069465692107202e-05, "loss": 2.5032, "step": 25640 }, { "epoch": 4.360020397756247, "grad_norm": 16.664857864379883, "learning_rate": 2.6066632670406255e-05, "loss": 2.1729, "step": 25650 }, { "epoch": 4.361720210776815, "grad_norm": 12.09310531616211, "learning_rate": 2.606379964870531e-05, "loss": 2.3226, "step": 25660 }, { "epoch": 4.363420023797382, "grad_norm": 14.079859733581543, "learning_rate": 2.6060966627004366e-05, "loss": 2.2101, "step": 25670 }, { "epoch": 4.36511983681795, "grad_norm": 13.245708465576172, "learning_rate": 2.6058133605303416e-05, "loss": 2.4432, "step": 25680 }, { "epoch": 4.366819649838518, "grad_norm": 14.03307819366455, "learning_rate": 2.605530058360247e-05, "loss": 2.29, "step": 25690 }, { "epoch": 4.368519462859085, "grad_norm": 15.032106399536133, "learning_rate": 2.6052467561901526e-05, "loss": 2.2306, "step": 25700 }, { "epoch": 4.370219275879653, "grad_norm": 18.462369918823242, "learning_rate": 2.604963454020058e-05, "loss": 2.2895, "step": 25710 }, { "epoch": 4.371919088900221, "grad_norm": 19.700651168823242, "learning_rate": 2.604680151849963e-05, "loss": 2.3717, "step": 25720 }, { "epoch": 4.373618901920789, "grad_norm": 17.657167434692383, "learning_rate": 2.6043968496798687e-05, "loss": 2.2574, "step": 25730 }, { "epoch": 4.375318714941357, "grad_norm": 27.598695755004883, "learning_rate": 2.604113547509774e-05, "loss": 2.1486, "step": 25740 }, { "epoch": 4.3770185279619245, "grad_norm": 17.818687438964844, "learning_rate": 2.6038302453396794e-05, "loss": 2.527, "step": 25750 }, { "epoch": 4.378718340982492, "grad_norm": 18.909984588623047, "learning_rate": 2.6035469431695847e-05, "loss": 2.3112, "step": 25760 }, { "epoch": 4.38041815400306, "grad_norm": 14.403288841247559, "learning_rate": 2.60326364099949e-05, "loss": 2.4114, "step": 25770 }, { "epoch": 4.382117967023627, "grad_norm": 20.465295791625977, "learning_rate": 2.6029803388293955e-05, "loss": 2.1378, "step": 25780 }, { "epoch": 4.383817780044195, "grad_norm": 19.43297004699707, "learning_rate": 2.602697036659301e-05, "loss": 2.111, "step": 25790 }, { "epoch": 4.385517593064763, "grad_norm": 13.989618301391602, "learning_rate": 2.602413734489206e-05, "loss": 2.3091, "step": 25800 }, { "epoch": 4.38721740608533, "grad_norm": 9.477527618408203, "learning_rate": 2.6021304323191115e-05, "loss": 2.4123, "step": 25810 }, { "epoch": 4.388917219105898, "grad_norm": 27.072978973388672, "learning_rate": 2.6018471301490172e-05, "loss": 2.1843, "step": 25820 }, { "epoch": 4.390617032126466, "grad_norm": 22.949016571044922, "learning_rate": 2.6015638279789222e-05, "loss": 2.2472, "step": 25830 }, { "epoch": 4.392316845147034, "grad_norm": 13.513030052185059, "learning_rate": 2.6012805258088276e-05, "loss": 2.4587, "step": 25840 }, { "epoch": 4.394016658167602, "grad_norm": 17.625986099243164, "learning_rate": 2.6009972236387333e-05, "loss": 2.3289, "step": 25850 }, { "epoch": 4.3957164711881695, "grad_norm": 12.716322898864746, "learning_rate": 2.6007139214686386e-05, "loss": 2.4657, "step": 25860 }, { "epoch": 4.397416284208737, "grad_norm": 16.405044555664062, "learning_rate": 2.6004306192985436e-05, "loss": 2.1579, "step": 25870 }, { "epoch": 4.399116097229305, "grad_norm": 11.958634376525879, "learning_rate": 2.6001473171284493e-05, "loss": 2.2055, "step": 25880 }, { "epoch": 4.4008159102498725, "grad_norm": 15.069506645202637, "learning_rate": 2.5998640149583547e-05, "loss": 2.3803, "step": 25890 }, { "epoch": 4.40251572327044, "grad_norm": 15.58962345123291, "learning_rate": 2.59958071278826e-05, "loss": 2.1646, "step": 25900 }, { "epoch": 4.404215536291008, "grad_norm": 41.6041259765625, "learning_rate": 2.5992974106181654e-05, "loss": 2.4115, "step": 25910 }, { "epoch": 4.4059153493115755, "grad_norm": 18.866708755493164, "learning_rate": 2.5990141084480707e-05, "loss": 2.3106, "step": 25920 }, { "epoch": 4.407615162332143, "grad_norm": 11.959355354309082, "learning_rate": 2.598730806277976e-05, "loss": 2.31, "step": 25930 }, { "epoch": 4.409314975352711, "grad_norm": 8.479662895202637, "learning_rate": 2.5984475041078818e-05, "loss": 2.3815, "step": 25940 }, { "epoch": 4.411014788373279, "grad_norm": 19.138317108154297, "learning_rate": 2.5981642019377868e-05, "loss": 2.0221, "step": 25950 }, { "epoch": 4.412714601393847, "grad_norm": 20.860227584838867, "learning_rate": 2.597880899767692e-05, "loss": 2.4047, "step": 25960 }, { "epoch": 4.414414414414415, "grad_norm": 14.104951858520508, "learning_rate": 2.5975975975975978e-05, "loss": 1.8183, "step": 25970 }, { "epoch": 4.416114227434982, "grad_norm": 15.445775985717773, "learning_rate": 2.5973142954275032e-05, "loss": 2.4362, "step": 25980 }, { "epoch": 4.41781404045555, "grad_norm": 27.78687858581543, "learning_rate": 2.5970309932574082e-05, "loss": 2.159, "step": 25990 }, { "epoch": 4.419513853476118, "grad_norm": 10.584996223449707, "learning_rate": 2.596747691087314e-05, "loss": 2.1793, "step": 26000 }, { "epoch": 4.421213666496685, "grad_norm": 13.970370292663574, "learning_rate": 2.5964643889172192e-05, "loss": 2.2182, "step": 26010 }, { "epoch": 4.422913479517253, "grad_norm": 18.284212112426758, "learning_rate": 2.5961810867471242e-05, "loss": 2.1423, "step": 26020 }, { "epoch": 4.4246132925378205, "grad_norm": 12.951577186584473, "learning_rate": 2.59589778457703e-05, "loss": 2.0825, "step": 26030 }, { "epoch": 4.426313105558388, "grad_norm": 23.06389617919922, "learning_rate": 2.5956144824069353e-05, "loss": 2.3585, "step": 26040 }, { "epoch": 4.428012918578956, "grad_norm": 14.908980369567871, "learning_rate": 2.5953311802368406e-05, "loss": 2.1801, "step": 26050 }, { "epoch": 4.429712731599524, "grad_norm": 9.899836540222168, "learning_rate": 2.595047878066746e-05, "loss": 2.3778, "step": 26060 }, { "epoch": 4.431412544620092, "grad_norm": 20.40337562561035, "learning_rate": 2.5947645758966513e-05, "loss": 2.2698, "step": 26070 }, { "epoch": 4.43311235764066, "grad_norm": 11.846283912658691, "learning_rate": 2.5944812737265567e-05, "loss": 2.2576, "step": 26080 }, { "epoch": 4.434812170661227, "grad_norm": 13.33751392364502, "learning_rate": 2.5941979715564624e-05, "loss": 2.359, "step": 26090 }, { "epoch": 4.436511983681795, "grad_norm": 11.835699081420898, "learning_rate": 2.5939146693863674e-05, "loss": 2.2402, "step": 26100 }, { "epoch": 4.438211796702363, "grad_norm": 13.805220603942871, "learning_rate": 2.5936313672162728e-05, "loss": 2.2774, "step": 26110 }, { "epoch": 4.43991160972293, "grad_norm": 9.056527137756348, "learning_rate": 2.5933480650461784e-05, "loss": 2.4332, "step": 26120 }, { "epoch": 4.441611422743498, "grad_norm": 11.076848983764648, "learning_rate": 2.5930647628760838e-05, "loss": 2.355, "step": 26130 }, { "epoch": 4.443311235764066, "grad_norm": 11.525144577026367, "learning_rate": 2.592781460705989e-05, "loss": 2.2921, "step": 26140 }, { "epoch": 4.445011048784634, "grad_norm": 25.0610408782959, "learning_rate": 2.5924981585358945e-05, "loss": 2.4227, "step": 26150 }, { "epoch": 4.446710861805202, "grad_norm": 23.41382598876953, "learning_rate": 2.5922148563658e-05, "loss": 2.1137, "step": 26160 }, { "epoch": 4.4484106748257695, "grad_norm": 17.238323211669922, "learning_rate": 2.5919315541957052e-05, "loss": 2.1139, "step": 26170 }, { "epoch": 4.450110487846337, "grad_norm": 13.132096290588379, "learning_rate": 2.5916482520256106e-05, "loss": 2.0893, "step": 26180 }, { "epoch": 4.451810300866905, "grad_norm": 12.69159984588623, "learning_rate": 2.591364949855516e-05, "loss": 2.3296, "step": 26190 }, { "epoch": 4.453510113887472, "grad_norm": 12.589784622192383, "learning_rate": 2.5910816476854216e-05, "loss": 2.2317, "step": 26200 }, { "epoch": 4.45520992690804, "grad_norm": 11.644707679748535, "learning_rate": 2.5907983455153266e-05, "loss": 2.2836, "step": 26210 }, { "epoch": 4.456909739928608, "grad_norm": 24.44771385192871, "learning_rate": 2.590515043345232e-05, "loss": 1.9617, "step": 26220 }, { "epoch": 4.458609552949175, "grad_norm": 11.418966293334961, "learning_rate": 2.5902317411751377e-05, "loss": 2.1907, "step": 26230 }, { "epoch": 4.460309365969743, "grad_norm": 12.791181564331055, "learning_rate": 2.589948439005043e-05, "loss": 2.2034, "step": 26240 }, { "epoch": 4.462009178990311, "grad_norm": 24.992658615112305, "learning_rate": 2.589665136834948e-05, "loss": 2.2477, "step": 26250 }, { "epoch": 4.463708992010879, "grad_norm": 17.085054397583008, "learning_rate": 2.5893818346648537e-05, "loss": 2.413, "step": 26260 }, { "epoch": 4.465408805031447, "grad_norm": 10.559613227844238, "learning_rate": 2.589098532494759e-05, "loss": 2.2271, "step": 26270 }, { "epoch": 4.4671086180520145, "grad_norm": 11.044917106628418, "learning_rate": 2.5888152303246644e-05, "loss": 2.3739, "step": 26280 }, { "epoch": 4.468808431072582, "grad_norm": 10.733954429626465, "learning_rate": 2.5885319281545698e-05, "loss": 2.1384, "step": 26290 }, { "epoch": 4.47050824409315, "grad_norm": 14.83717155456543, "learning_rate": 2.588248625984475e-05, "loss": 2.2675, "step": 26300 }, { "epoch": 4.4722080571137175, "grad_norm": 11.195780754089355, "learning_rate": 2.5879653238143805e-05, "loss": 2.5406, "step": 26310 }, { "epoch": 4.473907870134285, "grad_norm": 14.817870140075684, "learning_rate": 2.587682021644286e-05, "loss": 2.4073, "step": 26320 }, { "epoch": 4.475607683154853, "grad_norm": 15.018265724182129, "learning_rate": 2.5873987194741912e-05, "loss": 2.4459, "step": 26330 }, { "epoch": 4.4773074961754205, "grad_norm": 9.3007230758667, "learning_rate": 2.5871154173040965e-05, "loss": 2.1883, "step": 26340 }, { "epoch": 4.479007309195988, "grad_norm": 15.683428764343262, "learning_rate": 2.5868321151340022e-05, "loss": 2.3679, "step": 26350 }, { "epoch": 4.480707122216556, "grad_norm": 19.535888671875, "learning_rate": 2.5865488129639072e-05, "loss": 2.2354, "step": 26360 }, { "epoch": 4.482406935237124, "grad_norm": 11.722899436950684, "learning_rate": 2.5862655107938126e-05, "loss": 2.1998, "step": 26370 }, { "epoch": 4.484106748257692, "grad_norm": 12.87264347076416, "learning_rate": 2.5859822086237183e-05, "loss": 2.1623, "step": 26380 }, { "epoch": 4.48580656127826, "grad_norm": 13.619871139526367, "learning_rate": 2.5856989064536236e-05, "loss": 2.2561, "step": 26390 }, { "epoch": 4.487506374298827, "grad_norm": 15.271156311035156, "learning_rate": 2.5854156042835286e-05, "loss": 2.3732, "step": 26400 }, { "epoch": 4.489206187319395, "grad_norm": 15.614595413208008, "learning_rate": 2.5851323021134343e-05, "loss": 2.1121, "step": 26410 }, { "epoch": 4.490906000339963, "grad_norm": 16.533004760742188, "learning_rate": 2.5848489999433397e-05, "loss": 2.2769, "step": 26420 }, { "epoch": 4.49260581336053, "grad_norm": 11.835558891296387, "learning_rate": 2.584565697773245e-05, "loss": 2.2853, "step": 26430 }, { "epoch": 4.494305626381098, "grad_norm": 16.266067504882812, "learning_rate": 2.5842823956031504e-05, "loss": 1.9624, "step": 26440 }, { "epoch": 4.4960054394016655, "grad_norm": 14.696431159973145, "learning_rate": 2.5839990934330557e-05, "loss": 2.1066, "step": 26450 }, { "epoch": 4.497705252422233, "grad_norm": 17.55812644958496, "learning_rate": 2.583715791262961e-05, "loss": 2.1547, "step": 26460 }, { "epoch": 4.499405065442801, "grad_norm": 14.07583236694336, "learning_rate": 2.5834324890928668e-05, "loss": 2.2569, "step": 26470 }, { "epoch": 4.501104878463369, "grad_norm": 12.037975311279297, "learning_rate": 2.5831491869227718e-05, "loss": 2.3118, "step": 26480 }, { "epoch": 4.502804691483937, "grad_norm": 15.308354377746582, "learning_rate": 2.582865884752677e-05, "loss": 2.4425, "step": 26490 }, { "epoch": 4.504504504504505, "grad_norm": 18.241077423095703, "learning_rate": 2.582582582582583e-05, "loss": 2.2175, "step": 26500 }, { "epoch": 4.506204317525072, "grad_norm": 13.44802188873291, "learning_rate": 2.582299280412488e-05, "loss": 2.2846, "step": 26510 }, { "epoch": 4.50790413054564, "grad_norm": 15.874268531799316, "learning_rate": 2.5820159782423932e-05, "loss": 2.5755, "step": 26520 }, { "epoch": 4.509603943566208, "grad_norm": 22.144182205200195, "learning_rate": 2.581732676072299e-05, "loss": 2.0865, "step": 26530 }, { "epoch": 4.511303756586775, "grad_norm": 13.8005952835083, "learning_rate": 2.5814493739022043e-05, "loss": 2.3223, "step": 26540 }, { "epoch": 4.513003569607343, "grad_norm": 13.0172700881958, "learning_rate": 2.5811660717321093e-05, "loss": 1.9123, "step": 26550 }, { "epoch": 4.514703382627911, "grad_norm": 12.863749504089355, "learning_rate": 2.580882769562015e-05, "loss": 2.0379, "step": 26560 }, { "epoch": 4.516403195648479, "grad_norm": 26.467546463012695, "learning_rate": 2.5805994673919203e-05, "loss": 2.1155, "step": 26570 }, { "epoch": 4.518103008669046, "grad_norm": 10.387659072875977, "learning_rate": 2.5803161652218257e-05, "loss": 2.2455, "step": 26580 }, { "epoch": 4.5198028216896144, "grad_norm": 14.827202796936035, "learning_rate": 2.580032863051731e-05, "loss": 2.3416, "step": 26590 }, { "epoch": 4.521502634710182, "grad_norm": 17.5122127532959, "learning_rate": 2.5797495608816364e-05, "loss": 2.094, "step": 26600 }, { "epoch": 4.52320244773075, "grad_norm": 15.146675109863281, "learning_rate": 2.5794662587115417e-05, "loss": 2.4338, "step": 26610 }, { "epoch": 4.524902260751317, "grad_norm": 12.422202110290527, "learning_rate": 2.5791829565414474e-05, "loss": 2.1739, "step": 26620 }, { "epoch": 4.526602073771885, "grad_norm": 19.615076065063477, "learning_rate": 2.5788996543713524e-05, "loss": 2.1529, "step": 26630 }, { "epoch": 4.528301886792453, "grad_norm": 22.70585060119629, "learning_rate": 2.5786163522012578e-05, "loss": 1.9134, "step": 26640 }, { "epoch": 4.53000169981302, "grad_norm": 18.374927520751953, "learning_rate": 2.5783330500311635e-05, "loss": 2.0718, "step": 26650 }, { "epoch": 4.531701512833588, "grad_norm": 14.9367094039917, "learning_rate": 2.5780497478610688e-05, "loss": 2.1427, "step": 26660 }, { "epoch": 4.533401325854156, "grad_norm": 9.083553314208984, "learning_rate": 2.577766445690974e-05, "loss": 2.1423, "step": 26670 }, { "epoch": 4.535101138874724, "grad_norm": 15.423566818237305, "learning_rate": 2.5774831435208795e-05, "loss": 2.0403, "step": 26680 }, { "epoch": 4.536800951895292, "grad_norm": 13.10180950164795, "learning_rate": 2.577199841350785e-05, "loss": 2.0519, "step": 26690 }, { "epoch": 4.5385007649158595, "grad_norm": 18.711044311523438, "learning_rate": 2.57691653918069e-05, "loss": 1.8412, "step": 26700 }, { "epoch": 4.540200577936427, "grad_norm": 13.55654525756836, "learning_rate": 2.5766332370105956e-05, "loss": 2.4371, "step": 26710 }, { "epoch": 4.541900390956995, "grad_norm": 11.236729621887207, "learning_rate": 2.576349934840501e-05, "loss": 2.1786, "step": 26720 }, { "epoch": 4.5436002039775625, "grad_norm": 17.656816482543945, "learning_rate": 2.5760666326704063e-05, "loss": 2.4525, "step": 26730 }, { "epoch": 4.54530001699813, "grad_norm": 11.830954551696777, "learning_rate": 2.5757833305003116e-05, "loss": 2.2629, "step": 26740 }, { "epoch": 4.546999830018698, "grad_norm": 13.329683303833008, "learning_rate": 2.575500028330217e-05, "loss": 2.4176, "step": 26750 }, { "epoch": 4.548699643039265, "grad_norm": 15.640417098999023, "learning_rate": 2.5752167261601223e-05, "loss": 2.1681, "step": 26760 }, { "epoch": 4.550399456059833, "grad_norm": 10.433568954467773, "learning_rate": 2.574933423990028e-05, "loss": 2.1669, "step": 26770 }, { "epoch": 4.552099269080401, "grad_norm": 13.31651782989502, "learning_rate": 2.574650121819933e-05, "loss": 2.1722, "step": 26780 }, { "epoch": 4.553799082100969, "grad_norm": 18.492773056030273, "learning_rate": 2.5743668196498384e-05, "loss": 2.2803, "step": 26790 }, { "epoch": 4.555498895121537, "grad_norm": 15.443243980407715, "learning_rate": 2.574083517479744e-05, "loss": 2.1297, "step": 26800 }, { "epoch": 4.557198708142105, "grad_norm": 20.1118221282959, "learning_rate": 2.5738002153096494e-05, "loss": 2.087, "step": 26810 }, { "epoch": 4.558898521162672, "grad_norm": 14.19023323059082, "learning_rate": 2.5735169131395545e-05, "loss": 2.1458, "step": 26820 }, { "epoch": 4.56059833418324, "grad_norm": 15.680213928222656, "learning_rate": 2.57323361096946e-05, "loss": 2.2965, "step": 26830 }, { "epoch": 4.5622981472038076, "grad_norm": 7.7088117599487305, "learning_rate": 2.5729503087993655e-05, "loss": 2.1935, "step": 26840 }, { "epoch": 4.563997960224375, "grad_norm": 17.596187591552734, "learning_rate": 2.572667006629271e-05, "loss": 2.6158, "step": 26850 }, { "epoch": 4.565697773244943, "grad_norm": 11.86677360534668, "learning_rate": 2.5723837044591762e-05, "loss": 2.179, "step": 26860 }, { "epoch": 4.5673975862655105, "grad_norm": 10.977624893188477, "learning_rate": 2.5721004022890816e-05, "loss": 2.2384, "step": 26870 }, { "epoch": 4.569097399286078, "grad_norm": 19.494129180908203, "learning_rate": 2.5718171001189872e-05, "loss": 2.2351, "step": 26880 }, { "epoch": 4.570797212306646, "grad_norm": 14.21782398223877, "learning_rate": 2.5715337979488923e-05, "loss": 2.3367, "step": 26890 }, { "epoch": 4.572497025327214, "grad_norm": 20.869104385375977, "learning_rate": 2.5712504957787976e-05, "loss": 2.0724, "step": 26900 }, { "epoch": 4.574196838347782, "grad_norm": 14.475811004638672, "learning_rate": 2.5709671936087033e-05, "loss": 2.1369, "step": 26910 }, { "epoch": 4.57589665136835, "grad_norm": 10.026018142700195, "learning_rate": 2.5706838914386087e-05, "loss": 2.509, "step": 26920 }, { "epoch": 4.577596464388917, "grad_norm": 12.429583549499512, "learning_rate": 2.5704005892685137e-05, "loss": 2.3921, "step": 26930 }, { "epoch": 4.579296277409485, "grad_norm": 14.97450065612793, "learning_rate": 2.5701172870984194e-05, "loss": 2.2179, "step": 26940 }, { "epoch": 4.580996090430053, "grad_norm": 13.4276704788208, "learning_rate": 2.5698339849283247e-05, "loss": 2.3735, "step": 26950 }, { "epoch": 4.58269590345062, "grad_norm": 17.11408042907715, "learning_rate": 2.56955068275823e-05, "loss": 2.0367, "step": 26960 }, { "epoch": 4.584395716471188, "grad_norm": 13.038308143615723, "learning_rate": 2.5692673805881354e-05, "loss": 2.1157, "step": 26970 }, { "epoch": 4.586095529491756, "grad_norm": 19.291854858398438, "learning_rate": 2.5689840784180408e-05, "loss": 2.0108, "step": 26980 }, { "epoch": 4.587795342512324, "grad_norm": 16.984086990356445, "learning_rate": 2.568700776247946e-05, "loss": 2.1928, "step": 26990 }, { "epoch": 4.589495155532891, "grad_norm": 18.01893424987793, "learning_rate": 2.5684174740778518e-05, "loss": 2.2006, "step": 27000 }, { "epoch": 4.591194968553459, "grad_norm": 15.996216773986816, "learning_rate": 2.5681341719077568e-05, "loss": 2.2425, "step": 27010 }, { "epoch": 4.592894781574027, "grad_norm": 13.303544998168945, "learning_rate": 2.5678508697376622e-05, "loss": 2.3254, "step": 27020 }, { "epoch": 4.594594594594595, "grad_norm": 13.44189453125, "learning_rate": 2.567567567567568e-05, "loss": 2.1365, "step": 27030 }, { "epoch": 4.596294407615162, "grad_norm": 11.440678596496582, "learning_rate": 2.567284265397473e-05, "loss": 2.3061, "step": 27040 }, { "epoch": 4.59799422063573, "grad_norm": 11.60512638092041, "learning_rate": 2.5670009632273782e-05, "loss": 2.4531, "step": 27050 }, { "epoch": 4.599694033656298, "grad_norm": 15.713448524475098, "learning_rate": 2.566717661057284e-05, "loss": 2.1685, "step": 27060 }, { "epoch": 4.601393846676865, "grad_norm": 14.54518985748291, "learning_rate": 2.5664343588871893e-05, "loss": 2.2589, "step": 27070 }, { "epoch": 4.603093659697433, "grad_norm": 11.48324203491211, "learning_rate": 2.5661510567170943e-05, "loss": 2.3323, "step": 27080 }, { "epoch": 4.604793472718001, "grad_norm": 16.52007484436035, "learning_rate": 2.565867754547e-05, "loss": 2.3392, "step": 27090 }, { "epoch": 4.606493285738569, "grad_norm": 13.907919883728027, "learning_rate": 2.5655844523769053e-05, "loss": 2.0035, "step": 27100 }, { "epoch": 4.608193098759137, "grad_norm": 16.001155853271484, "learning_rate": 2.5653011502068107e-05, "loss": 2.3831, "step": 27110 }, { "epoch": 4.6098929117797045, "grad_norm": 15.122842788696289, "learning_rate": 2.565017848036716e-05, "loss": 2.3648, "step": 27120 }, { "epoch": 4.611592724800272, "grad_norm": 15.290109634399414, "learning_rate": 2.5647345458666214e-05, "loss": 2.4195, "step": 27130 }, { "epoch": 4.61329253782084, "grad_norm": 10.308342933654785, "learning_rate": 2.5644512436965267e-05, "loss": 2.2462, "step": 27140 }, { "epoch": 4.6149923508414075, "grad_norm": 17.19900131225586, "learning_rate": 2.5641679415264324e-05, "loss": 2.193, "step": 27150 }, { "epoch": 4.616692163861975, "grad_norm": 13.824296951293945, "learning_rate": 2.5638846393563374e-05, "loss": 2.3012, "step": 27160 }, { "epoch": 4.618391976882543, "grad_norm": 19.414352416992188, "learning_rate": 2.5636013371862428e-05, "loss": 2.2072, "step": 27170 }, { "epoch": 4.62009178990311, "grad_norm": 13.976152420043945, "learning_rate": 2.5633180350161485e-05, "loss": 2.3363, "step": 27180 }, { "epoch": 4.621791602923678, "grad_norm": 20.128952026367188, "learning_rate": 2.563034732846054e-05, "loss": 2.3339, "step": 27190 }, { "epoch": 4.623491415944246, "grad_norm": 13.453858375549316, "learning_rate": 2.562751430675959e-05, "loss": 2.3102, "step": 27200 }, { "epoch": 4.625191228964814, "grad_norm": 15.683561325073242, "learning_rate": 2.5624681285058645e-05, "loss": 2.1326, "step": 27210 }, { "epoch": 4.626891041985382, "grad_norm": 20.78658676147461, "learning_rate": 2.56218482633577e-05, "loss": 1.9583, "step": 27220 }, { "epoch": 4.62859085500595, "grad_norm": 15.675028800964355, "learning_rate": 2.561901524165675e-05, "loss": 2.2321, "step": 27230 }, { "epoch": 4.630290668026517, "grad_norm": 15.34562873840332, "learning_rate": 2.5616182219955806e-05, "loss": 2.3652, "step": 27240 }, { "epoch": 4.631990481047085, "grad_norm": 19.739944458007812, "learning_rate": 2.561334919825486e-05, "loss": 2.1753, "step": 27250 }, { "epoch": 4.6336902940676525, "grad_norm": 20.802574157714844, "learning_rate": 2.5610516176553913e-05, "loss": 2.2789, "step": 27260 }, { "epoch": 4.63539010708822, "grad_norm": 15.954666137695312, "learning_rate": 2.5607683154852967e-05, "loss": 2.338, "step": 27270 }, { "epoch": 4.637089920108788, "grad_norm": 15.703943252563477, "learning_rate": 2.560485013315202e-05, "loss": 2.2001, "step": 27280 }, { "epoch": 4.6387897331293555, "grad_norm": 11.169831275939941, "learning_rate": 2.5602017111451074e-05, "loss": 2.2151, "step": 27290 }, { "epoch": 4.640489546149923, "grad_norm": 9.287684440612793, "learning_rate": 2.559918408975013e-05, "loss": 2.443, "step": 27300 }, { "epoch": 4.642189359170491, "grad_norm": 14.178738594055176, "learning_rate": 2.559635106804918e-05, "loss": 2.4155, "step": 27310 }, { "epoch": 4.643889172191059, "grad_norm": 14.377573013305664, "learning_rate": 2.5593518046348234e-05, "loss": 2.2995, "step": 27320 }, { "epoch": 4.645588985211627, "grad_norm": 17.996530532836914, "learning_rate": 2.559068502464729e-05, "loss": 2.184, "step": 27330 }, { "epoch": 4.647288798232195, "grad_norm": 11.268898963928223, "learning_rate": 2.5587852002946345e-05, "loss": 2.1384, "step": 27340 }, { "epoch": 4.648988611252762, "grad_norm": 14.918800354003906, "learning_rate": 2.5585018981245395e-05, "loss": 2.0758, "step": 27350 }, { "epoch": 4.65068842427333, "grad_norm": 20.173389434814453, "learning_rate": 2.558218595954445e-05, "loss": 2.3869, "step": 27360 }, { "epoch": 4.652388237293898, "grad_norm": 11.849833488464355, "learning_rate": 2.5579352937843505e-05, "loss": 2.2592, "step": 27370 }, { "epoch": 4.654088050314465, "grad_norm": 15.825318336486816, "learning_rate": 2.5576519916142555e-05, "loss": 2.1395, "step": 27380 }, { "epoch": 4.655787863335033, "grad_norm": 11.64792251586914, "learning_rate": 2.5573686894441612e-05, "loss": 2.2286, "step": 27390 }, { "epoch": 4.657487676355601, "grad_norm": 11.19229507446289, "learning_rate": 2.5570853872740666e-05, "loss": 2.3577, "step": 27400 }, { "epoch": 4.659187489376169, "grad_norm": 31.25528335571289, "learning_rate": 2.556802085103972e-05, "loss": 2.3143, "step": 27410 }, { "epoch": 4.660887302396736, "grad_norm": 10.7914457321167, "learning_rate": 2.5565187829338773e-05, "loss": 2.2306, "step": 27420 }, { "epoch": 4.662587115417304, "grad_norm": 15.442253112792969, "learning_rate": 2.5562354807637826e-05, "loss": 2.2422, "step": 27430 }, { "epoch": 4.664286928437872, "grad_norm": 18.514385223388672, "learning_rate": 2.555952178593688e-05, "loss": 2.1121, "step": 27440 }, { "epoch": 4.66598674145844, "grad_norm": 15.596724510192871, "learning_rate": 2.5556688764235937e-05, "loss": 2.1977, "step": 27450 }, { "epoch": 4.667686554479007, "grad_norm": 15.620475769042969, "learning_rate": 2.5553855742534987e-05, "loss": 2.1837, "step": 27460 }, { "epoch": 4.669386367499575, "grad_norm": 17.447885513305664, "learning_rate": 2.555102272083404e-05, "loss": 2.2628, "step": 27470 }, { "epoch": 4.671086180520143, "grad_norm": 11.351727485656738, "learning_rate": 2.5548189699133097e-05, "loss": 2.3117, "step": 27480 }, { "epoch": 4.67278599354071, "grad_norm": 12.575736999511719, "learning_rate": 2.554535667743215e-05, "loss": 2.2786, "step": 27490 }, { "epoch": 4.674485806561278, "grad_norm": 16.68338966369629, "learning_rate": 2.55425236557312e-05, "loss": 2.093, "step": 27500 }, { "epoch": 4.676185619581846, "grad_norm": 12.465678215026855, "learning_rate": 2.5539690634030258e-05, "loss": 2.0399, "step": 27510 }, { "epoch": 4.677885432602414, "grad_norm": 14.20907211303711, "learning_rate": 2.553685761232931e-05, "loss": 2.4675, "step": 27520 }, { "epoch": 4.679585245622982, "grad_norm": 12.663277626037598, "learning_rate": 2.5534024590628365e-05, "loss": 2.1339, "step": 27530 }, { "epoch": 4.6812850586435495, "grad_norm": 12.716651916503906, "learning_rate": 2.553119156892742e-05, "loss": 2.2682, "step": 27540 }, { "epoch": 4.682984871664117, "grad_norm": 16.639711380004883, "learning_rate": 2.5528358547226472e-05, "loss": 1.8358, "step": 27550 }, { "epoch": 4.684684684684685, "grad_norm": 12.37985610961914, "learning_rate": 2.5525525525525525e-05, "loss": 2.079, "step": 27560 }, { "epoch": 4.6863844977052524, "grad_norm": 14.815011024475098, "learning_rate": 2.552269250382458e-05, "loss": 1.9664, "step": 27570 }, { "epoch": 4.68808431072582, "grad_norm": 19.29581069946289, "learning_rate": 2.5519859482123633e-05, "loss": 2.3881, "step": 27580 }, { "epoch": 4.689784123746388, "grad_norm": 14.509621620178223, "learning_rate": 2.5517026460422686e-05, "loss": 2.0363, "step": 27590 }, { "epoch": 4.691483936766955, "grad_norm": 24.21263313293457, "learning_rate": 2.5514193438721743e-05, "loss": 2.2693, "step": 27600 }, { "epoch": 4.693183749787523, "grad_norm": 16.798439025878906, "learning_rate": 2.5511360417020793e-05, "loss": 2.3988, "step": 27610 }, { "epoch": 4.694883562808091, "grad_norm": 12.793252944946289, "learning_rate": 2.550852739531985e-05, "loss": 2.1584, "step": 27620 }, { "epoch": 4.696583375828659, "grad_norm": 11.026993751525879, "learning_rate": 2.5505694373618904e-05, "loss": 1.9593, "step": 27630 }, { "epoch": 4.698283188849227, "grad_norm": 11.542662620544434, "learning_rate": 2.5502861351917957e-05, "loss": 2.2465, "step": 27640 }, { "epoch": 4.699983001869795, "grad_norm": 8.38283634185791, "learning_rate": 2.550002833021701e-05, "loss": 2.228, "step": 27650 }, { "epoch": 4.701682814890362, "grad_norm": 15.251449584960938, "learning_rate": 2.5497195308516064e-05, "loss": 2.1734, "step": 27660 }, { "epoch": 4.70338262791093, "grad_norm": 11.104562759399414, "learning_rate": 2.5494362286815118e-05, "loss": 2.3534, "step": 27670 }, { "epoch": 4.7050824409314975, "grad_norm": 14.65054702758789, "learning_rate": 2.5491529265114175e-05, "loss": 2.3468, "step": 27680 }, { "epoch": 4.706782253952065, "grad_norm": 18.36554527282715, "learning_rate": 2.5488696243413225e-05, "loss": 2.1983, "step": 27690 }, { "epoch": 4.708482066972633, "grad_norm": 10.794525146484375, "learning_rate": 2.5485863221712278e-05, "loss": 2.2249, "step": 27700 }, { "epoch": 4.7101818799932005, "grad_norm": 17.19161605834961, "learning_rate": 2.5483030200011335e-05, "loss": 2.3752, "step": 27710 }, { "epoch": 4.711881693013768, "grad_norm": 14.378907203674316, "learning_rate": 2.5480197178310385e-05, "loss": 2.037, "step": 27720 }, { "epoch": 4.713581506034336, "grad_norm": 21.069181442260742, "learning_rate": 2.547736415660944e-05, "loss": 2.0048, "step": 27730 }, { "epoch": 4.715281319054904, "grad_norm": 14.965779304504395, "learning_rate": 2.5474531134908496e-05, "loss": 2.2524, "step": 27740 }, { "epoch": 4.716981132075472, "grad_norm": 13.682586669921875, "learning_rate": 2.547169811320755e-05, "loss": 2.1027, "step": 27750 }, { "epoch": 4.71868094509604, "grad_norm": 11.380779266357422, "learning_rate": 2.54688650915066e-05, "loss": 2.1758, "step": 27760 }, { "epoch": 4.720380758116607, "grad_norm": 13.355500221252441, "learning_rate": 2.5466032069805656e-05, "loss": 2.2618, "step": 27770 }, { "epoch": 4.722080571137175, "grad_norm": 14.977874755859375, "learning_rate": 2.546319904810471e-05, "loss": 2.1593, "step": 27780 }, { "epoch": 4.723780384157743, "grad_norm": 21.95730972290039, "learning_rate": 2.5460366026403763e-05, "loss": 2.4141, "step": 27790 }, { "epoch": 4.72548019717831, "grad_norm": 16.061199188232422, "learning_rate": 2.5457533004702817e-05, "loss": 2.1785, "step": 27800 }, { "epoch": 4.727180010198878, "grad_norm": 17.422269821166992, "learning_rate": 2.545469998300187e-05, "loss": 2.1841, "step": 27810 }, { "epoch": 4.7288798232194456, "grad_norm": 14.688477516174316, "learning_rate": 2.5451866961300924e-05, "loss": 2.1267, "step": 27820 }, { "epoch": 4.730579636240014, "grad_norm": 12.899093627929688, "learning_rate": 2.544903393959998e-05, "loss": 2.0351, "step": 27830 }, { "epoch": 4.732279449260581, "grad_norm": 18.060775756835938, "learning_rate": 2.544620091789903e-05, "loss": 2.2835, "step": 27840 }, { "epoch": 4.733979262281149, "grad_norm": 16.292985916137695, "learning_rate": 2.5443367896198084e-05, "loss": 2.3665, "step": 27850 }, { "epoch": 4.735679075301717, "grad_norm": 14.38727855682373, "learning_rate": 2.544053487449714e-05, "loss": 2.2063, "step": 27860 }, { "epoch": 4.737378888322285, "grad_norm": 17.877317428588867, "learning_rate": 2.5437701852796195e-05, "loss": 2.2344, "step": 27870 }, { "epoch": 4.739078701342852, "grad_norm": 9.7129487991333, "learning_rate": 2.5434868831095245e-05, "loss": 2.0311, "step": 27880 }, { "epoch": 4.74077851436342, "grad_norm": 20.303287506103516, "learning_rate": 2.5432035809394302e-05, "loss": 2.3774, "step": 27890 }, { "epoch": 4.742478327383988, "grad_norm": 12.306840896606445, "learning_rate": 2.5429202787693355e-05, "loss": 2.2813, "step": 27900 }, { "epoch": 4.744178140404555, "grad_norm": 15.43497371673584, "learning_rate": 2.5426369765992406e-05, "loss": 2.1848, "step": 27910 }, { "epoch": 4.745877953425123, "grad_norm": 13.455979347229004, "learning_rate": 2.5423536744291462e-05, "loss": 2.1208, "step": 27920 }, { "epoch": 4.747577766445691, "grad_norm": 15.489696502685547, "learning_rate": 2.5420703722590516e-05, "loss": 2.1736, "step": 27930 }, { "epoch": 4.749277579466259, "grad_norm": 17.32720184326172, "learning_rate": 2.541787070088957e-05, "loss": 2.166, "step": 27940 }, { "epoch": 4.750977392486827, "grad_norm": 15.67549991607666, "learning_rate": 2.5415037679188623e-05, "loss": 2.4012, "step": 27950 }, { "epoch": 4.7526772055073945, "grad_norm": 12.888555526733398, "learning_rate": 2.5412204657487677e-05, "loss": 2.4098, "step": 27960 }, { "epoch": 4.754377018527962, "grad_norm": 12.006670951843262, "learning_rate": 2.540937163578673e-05, "loss": 2.2027, "step": 27970 }, { "epoch": 4.75607683154853, "grad_norm": 22.52776336669922, "learning_rate": 2.5406538614085787e-05, "loss": 1.9818, "step": 27980 }, { "epoch": 4.757776644569097, "grad_norm": 12.29944896697998, "learning_rate": 2.5403705592384837e-05, "loss": 2.4192, "step": 27990 }, { "epoch": 4.759476457589665, "grad_norm": 11.550341606140137, "learning_rate": 2.540087257068389e-05, "loss": 1.8724, "step": 28000 }, { "epoch": 4.761176270610233, "grad_norm": 20.456151962280273, "learning_rate": 2.5398039548982948e-05, "loss": 2.1514, "step": 28010 }, { "epoch": 4.7628760836308, "grad_norm": 8.103565216064453, "learning_rate": 2.5395206527282e-05, "loss": 2.5117, "step": 28020 }, { "epoch": 4.764575896651368, "grad_norm": 20.0963134765625, "learning_rate": 2.539237350558105e-05, "loss": 2.2112, "step": 28030 }, { "epoch": 4.766275709671936, "grad_norm": 14.410978317260742, "learning_rate": 2.5389540483880108e-05, "loss": 2.201, "step": 28040 }, { "epoch": 4.767975522692504, "grad_norm": 14.493188858032227, "learning_rate": 2.538670746217916e-05, "loss": 2.3729, "step": 28050 }, { "epoch": 4.769675335713072, "grad_norm": 10.576849937438965, "learning_rate": 2.5383874440478215e-05, "loss": 1.9188, "step": 28060 }, { "epoch": 4.7713751487336395, "grad_norm": 21.036521911621094, "learning_rate": 2.538104141877727e-05, "loss": 2.5272, "step": 28070 }, { "epoch": 4.773074961754207, "grad_norm": 11.790367126464844, "learning_rate": 2.5378208397076322e-05, "loss": 2.3872, "step": 28080 }, { "epoch": 4.774774774774775, "grad_norm": 18.773630142211914, "learning_rate": 2.5375375375375376e-05, "loss": 2.1002, "step": 28090 }, { "epoch": 4.7764745877953425, "grad_norm": 15.763036727905273, "learning_rate": 2.537254235367443e-05, "loss": 2.1319, "step": 28100 }, { "epoch": 4.77817440081591, "grad_norm": 13.98082447052002, "learning_rate": 2.5369709331973483e-05, "loss": 2.4528, "step": 28110 }, { "epoch": 4.779874213836478, "grad_norm": 19.348445892333984, "learning_rate": 2.5366876310272536e-05, "loss": 2.3682, "step": 28120 }, { "epoch": 4.7815740268570455, "grad_norm": 9.615653991699219, "learning_rate": 2.5364043288571593e-05, "loss": 2.2003, "step": 28130 }, { "epoch": 4.783273839877613, "grad_norm": 16.982501983642578, "learning_rate": 2.5361210266870643e-05, "loss": 2.3669, "step": 28140 }, { "epoch": 4.784973652898181, "grad_norm": 20.931251525878906, "learning_rate": 2.5358377245169697e-05, "loss": 2.1086, "step": 28150 }, { "epoch": 4.786673465918749, "grad_norm": 11.713793754577637, "learning_rate": 2.5355544223468754e-05, "loss": 2.3518, "step": 28160 }, { "epoch": 4.788373278939317, "grad_norm": 12.680601119995117, "learning_rate": 2.5352711201767807e-05, "loss": 2.1605, "step": 28170 }, { "epoch": 4.790073091959885, "grad_norm": 9.129535675048828, "learning_rate": 2.5349878180066857e-05, "loss": 2.2558, "step": 28180 }, { "epoch": 4.791772904980452, "grad_norm": 20.752958297729492, "learning_rate": 2.5347045158365914e-05, "loss": 2.2795, "step": 28190 }, { "epoch": 4.79347271800102, "grad_norm": 18.221040725708008, "learning_rate": 2.5344212136664968e-05, "loss": 2.2846, "step": 28200 }, { "epoch": 4.795172531021588, "grad_norm": 12.761719703674316, "learning_rate": 2.534137911496402e-05, "loss": 2.3457, "step": 28210 }, { "epoch": 4.796872344042155, "grad_norm": 13.45468521118164, "learning_rate": 2.5338546093263075e-05, "loss": 2.1876, "step": 28220 }, { "epoch": 4.798572157062723, "grad_norm": 17.27958869934082, "learning_rate": 2.533571307156213e-05, "loss": 2.3038, "step": 28230 }, { "epoch": 4.8002719700832905, "grad_norm": 12.064440727233887, "learning_rate": 2.5332880049861182e-05, "loss": 2.2534, "step": 28240 }, { "epoch": 4.801971783103858, "grad_norm": 14.833060264587402, "learning_rate": 2.5330047028160235e-05, "loss": 2.1915, "step": 28250 }, { "epoch": 4.803671596124426, "grad_norm": 15.052633285522461, "learning_rate": 2.532721400645929e-05, "loss": 2.1365, "step": 28260 }, { "epoch": 4.805371409144994, "grad_norm": 20.70868682861328, "learning_rate": 2.5324380984758342e-05, "loss": 2.1574, "step": 28270 }, { "epoch": 4.807071222165562, "grad_norm": 14.162760734558105, "learning_rate": 2.53215479630574e-05, "loss": 2.4573, "step": 28280 }, { "epoch": 4.80877103518613, "grad_norm": 10.454894065856934, "learning_rate": 2.531871494135645e-05, "loss": 2.4242, "step": 28290 }, { "epoch": 4.810470848206697, "grad_norm": 15.09305477142334, "learning_rate": 2.5315881919655503e-05, "loss": 2.3075, "step": 28300 }, { "epoch": 4.812170661227265, "grad_norm": 16.08052635192871, "learning_rate": 2.531304889795456e-05, "loss": 2.4341, "step": 28310 }, { "epoch": 4.813870474247833, "grad_norm": 12.681238174438477, "learning_rate": 2.5310215876253613e-05, "loss": 1.9905, "step": 28320 }, { "epoch": 4.8155702872684, "grad_norm": 18.061288833618164, "learning_rate": 2.5307382854552667e-05, "loss": 1.937, "step": 28330 }, { "epoch": 4.817270100288968, "grad_norm": 14.81062126159668, "learning_rate": 2.530454983285172e-05, "loss": 2.3909, "step": 28340 }, { "epoch": 4.818969913309536, "grad_norm": 13.493488311767578, "learning_rate": 2.5301716811150774e-05, "loss": 2.0782, "step": 28350 }, { "epoch": 4.820669726330104, "grad_norm": 16.296987533569336, "learning_rate": 2.529888378944983e-05, "loss": 2.1379, "step": 28360 }, { "epoch": 4.822369539350672, "grad_norm": 14.055363655090332, "learning_rate": 2.529605076774888e-05, "loss": 2.3005, "step": 28370 }, { "epoch": 4.8240693523712395, "grad_norm": 9.274168968200684, "learning_rate": 2.5293217746047935e-05, "loss": 2.1905, "step": 28380 }, { "epoch": 4.825769165391807, "grad_norm": 12.437247276306152, "learning_rate": 2.529038472434699e-05, "loss": 2.2313, "step": 28390 }, { "epoch": 4.827468978412375, "grad_norm": 14.63685131072998, "learning_rate": 2.5287551702646045e-05, "loss": 1.9823, "step": 28400 }, { "epoch": 4.829168791432942, "grad_norm": 11.96121883392334, "learning_rate": 2.5284718680945095e-05, "loss": 2.5701, "step": 28410 }, { "epoch": 4.83086860445351, "grad_norm": 18.396533966064453, "learning_rate": 2.5281885659244152e-05, "loss": 2.2508, "step": 28420 }, { "epoch": 4.832568417474078, "grad_norm": 11.413497924804688, "learning_rate": 2.5279052637543206e-05, "loss": 2.1615, "step": 28430 }, { "epoch": 4.834268230494645, "grad_norm": 19.623367309570312, "learning_rate": 2.5276219615842256e-05, "loss": 2.2571, "step": 28440 }, { "epoch": 4.835968043515213, "grad_norm": 17.4354248046875, "learning_rate": 2.5273386594141313e-05, "loss": 2.1032, "step": 28450 }, { "epoch": 4.837667856535781, "grad_norm": 13.9458646774292, "learning_rate": 2.5270553572440366e-05, "loss": 2.2861, "step": 28460 }, { "epoch": 4.839367669556349, "grad_norm": 11.198529243469238, "learning_rate": 2.526772055073942e-05, "loss": 2.2632, "step": 28470 }, { "epoch": 4.841067482576917, "grad_norm": 22.909013748168945, "learning_rate": 2.5264887529038473e-05, "loss": 1.8862, "step": 28480 }, { "epoch": 4.8427672955974845, "grad_norm": 15.858660697937012, "learning_rate": 2.5262054507337527e-05, "loss": 2.2221, "step": 28490 }, { "epoch": 4.844467108618052, "grad_norm": 16.699731826782227, "learning_rate": 2.525922148563658e-05, "loss": 2.1892, "step": 28500 }, { "epoch": 4.84616692163862, "grad_norm": 11.318259239196777, "learning_rate": 2.5256388463935637e-05, "loss": 2.1939, "step": 28510 }, { "epoch": 4.8478667346591875, "grad_norm": 18.718111038208008, "learning_rate": 2.5253555442234687e-05, "loss": 2.1391, "step": 28520 }, { "epoch": 4.849566547679755, "grad_norm": 11.114636421203613, "learning_rate": 2.525072242053374e-05, "loss": 2.4402, "step": 28530 }, { "epoch": 4.851266360700323, "grad_norm": 13.807464599609375, "learning_rate": 2.5247889398832798e-05, "loss": 1.9232, "step": 28540 }, { "epoch": 4.8529661737208905, "grad_norm": 14.688129425048828, "learning_rate": 2.524505637713185e-05, "loss": 2.3288, "step": 28550 }, { "epoch": 4.854665986741458, "grad_norm": 13.31218147277832, "learning_rate": 2.52422233554309e-05, "loss": 2.1392, "step": 28560 }, { "epoch": 4.856365799762026, "grad_norm": 14.56001091003418, "learning_rate": 2.5239390333729958e-05, "loss": 2.1624, "step": 28570 }, { "epoch": 4.858065612782594, "grad_norm": 16.884674072265625, "learning_rate": 2.5236557312029012e-05, "loss": 2.1586, "step": 28580 }, { "epoch": 4.859765425803162, "grad_norm": 16.025707244873047, "learning_rate": 2.5233724290328062e-05, "loss": 2.3007, "step": 28590 }, { "epoch": 4.86146523882373, "grad_norm": 20.99744415283203, "learning_rate": 2.523089126862712e-05, "loss": 2.0814, "step": 28600 }, { "epoch": 4.863165051844297, "grad_norm": 11.138815879821777, "learning_rate": 2.5228058246926172e-05, "loss": 2.2362, "step": 28610 }, { "epoch": 4.864864864864865, "grad_norm": 14.97313404083252, "learning_rate": 2.5225225225225226e-05, "loss": 2.0159, "step": 28620 }, { "epoch": 4.866564677885433, "grad_norm": 20.484594345092773, "learning_rate": 2.522239220352428e-05, "loss": 1.9971, "step": 28630 }, { "epoch": 4.868264490906, "grad_norm": 18.723228454589844, "learning_rate": 2.5219559181823333e-05, "loss": 2.2357, "step": 28640 }, { "epoch": 4.869964303926568, "grad_norm": 8.300471305847168, "learning_rate": 2.5216726160122386e-05, "loss": 2.2406, "step": 28650 }, { "epoch": 4.8716641169471355, "grad_norm": 14.977460861206055, "learning_rate": 2.5213893138421443e-05, "loss": 2.3506, "step": 28660 }, { "epoch": 4.873363929967703, "grad_norm": 12.596651077270508, "learning_rate": 2.5211060116720493e-05, "loss": 2.1441, "step": 28670 }, { "epoch": 4.875063742988271, "grad_norm": 11.352949142456055, "learning_rate": 2.5208227095019547e-05, "loss": 2.1702, "step": 28680 }, { "epoch": 4.876763556008839, "grad_norm": 14.120526313781738, "learning_rate": 2.5205394073318604e-05, "loss": 2.0656, "step": 28690 }, { "epoch": 4.878463369029407, "grad_norm": 15.089561462402344, "learning_rate": 2.5202561051617657e-05, "loss": 2.2094, "step": 28700 }, { "epoch": 4.880163182049975, "grad_norm": 16.652999877929688, "learning_rate": 2.5199728029916708e-05, "loss": 2.3961, "step": 28710 }, { "epoch": 4.881862995070542, "grad_norm": 16.40583610534668, "learning_rate": 2.5196895008215764e-05, "loss": 2.2381, "step": 28720 }, { "epoch": 4.88356280809111, "grad_norm": 13.707261085510254, "learning_rate": 2.5194061986514818e-05, "loss": 2.3943, "step": 28730 }, { "epoch": 4.885262621111678, "grad_norm": 20.7974910736084, "learning_rate": 2.519122896481387e-05, "loss": 2.1104, "step": 28740 }, { "epoch": 4.886962434132245, "grad_norm": 15.423148155212402, "learning_rate": 2.5188395943112925e-05, "loss": 2.2636, "step": 28750 }, { "epoch": 4.888662247152813, "grad_norm": 18.606325149536133, "learning_rate": 2.518556292141198e-05, "loss": 2.1158, "step": 28760 }, { "epoch": 4.890362060173381, "grad_norm": 13.12887191772461, "learning_rate": 2.5182729899711032e-05, "loss": 2.1698, "step": 28770 }, { "epoch": 4.892061873193949, "grad_norm": 17.290616989135742, "learning_rate": 2.5179896878010086e-05, "loss": 2.3944, "step": 28780 }, { "epoch": 4.893761686214516, "grad_norm": 9.861157417297363, "learning_rate": 2.517706385630914e-05, "loss": 2.255, "step": 28790 }, { "epoch": 4.8954614992350844, "grad_norm": 12.581522941589355, "learning_rate": 2.5174230834608193e-05, "loss": 2.1793, "step": 28800 }, { "epoch": 4.897161312255652, "grad_norm": 17.388702392578125, "learning_rate": 2.517139781290725e-05, "loss": 2.0072, "step": 28810 }, { "epoch": 4.89886112527622, "grad_norm": 16.057144165039062, "learning_rate": 2.51685647912063e-05, "loss": 2.2745, "step": 28820 }, { "epoch": 4.900560938296787, "grad_norm": 25.02967643737793, "learning_rate": 2.5165731769505353e-05, "loss": 1.9809, "step": 28830 }, { "epoch": 4.902260751317355, "grad_norm": 18.68120574951172, "learning_rate": 2.516289874780441e-05, "loss": 2.2028, "step": 28840 }, { "epoch": 4.903960564337923, "grad_norm": 10.898262977600098, "learning_rate": 2.5160065726103464e-05, "loss": 2.0816, "step": 28850 }, { "epoch": 4.90566037735849, "grad_norm": 14.306493759155273, "learning_rate": 2.5157232704402514e-05, "loss": 2.1811, "step": 28860 }, { "epoch": 4.907360190379058, "grad_norm": 18.25521469116211, "learning_rate": 2.515439968270157e-05, "loss": 2.0221, "step": 28870 }, { "epoch": 4.909060003399626, "grad_norm": 12.781434059143066, "learning_rate": 2.5151566661000624e-05, "loss": 2.203, "step": 28880 }, { "epoch": 4.910759816420194, "grad_norm": 14.312202453613281, "learning_rate": 2.5148733639299678e-05, "loss": 2.4161, "step": 28890 }, { "epoch": 4.912459629440762, "grad_norm": 11.503962516784668, "learning_rate": 2.514590061759873e-05, "loss": 1.94, "step": 28900 }, { "epoch": 4.9141594424613295, "grad_norm": 10.70704460144043, "learning_rate": 2.5143067595897785e-05, "loss": 2.2105, "step": 28910 }, { "epoch": 4.915859255481897, "grad_norm": 12.81826114654541, "learning_rate": 2.514023457419684e-05, "loss": 2.228, "step": 28920 }, { "epoch": 4.917559068502465, "grad_norm": 14.183276176452637, "learning_rate": 2.5137401552495892e-05, "loss": 2.0127, "step": 28930 }, { "epoch": 4.9192588815230325, "grad_norm": 10.062665939331055, "learning_rate": 2.5134568530794945e-05, "loss": 2.3796, "step": 28940 }, { "epoch": 4.9209586945436, "grad_norm": 20.244503021240234, "learning_rate": 2.5131735509094e-05, "loss": 2.41, "step": 28950 }, { "epoch": 4.922658507564168, "grad_norm": 14.224032402038574, "learning_rate": 2.5128902487393056e-05, "loss": 2.245, "step": 28960 }, { "epoch": 4.924358320584735, "grad_norm": 16.23502540588379, "learning_rate": 2.5126069465692106e-05, "loss": 2.0455, "step": 28970 }, { "epoch": 4.926058133605303, "grad_norm": 26.250978469848633, "learning_rate": 2.512323644399116e-05, "loss": 2.1652, "step": 28980 }, { "epoch": 4.927757946625871, "grad_norm": 16.17625617980957, "learning_rate": 2.5120403422290216e-05, "loss": 2.1201, "step": 28990 }, { "epoch": 4.929457759646439, "grad_norm": 12.665478706359863, "learning_rate": 2.511757040058927e-05, "loss": 2.1506, "step": 29000 }, { "epoch": 4.931157572667007, "grad_norm": 12.898954391479492, "learning_rate": 2.511473737888832e-05, "loss": 1.9279, "step": 29010 }, { "epoch": 4.932857385687575, "grad_norm": 13.176307678222656, "learning_rate": 2.5111904357187377e-05, "loss": 2.2959, "step": 29020 }, { "epoch": 4.934557198708142, "grad_norm": 14.218398094177246, "learning_rate": 2.510907133548643e-05, "loss": 2.2502, "step": 29030 }, { "epoch": 4.93625701172871, "grad_norm": 18.287282943725586, "learning_rate": 2.5106238313785484e-05, "loss": 2.1055, "step": 29040 }, { "epoch": 4.9379568247492776, "grad_norm": 10.48314094543457, "learning_rate": 2.5103405292084537e-05, "loss": 2.3685, "step": 29050 }, { "epoch": 4.939656637769845, "grad_norm": 14.179285049438477, "learning_rate": 2.510057227038359e-05, "loss": 2.2216, "step": 29060 }, { "epoch": 4.941356450790413, "grad_norm": 19.26877212524414, "learning_rate": 2.5097739248682648e-05, "loss": 1.9094, "step": 29070 }, { "epoch": 4.9430562638109805, "grad_norm": 17.476970672607422, "learning_rate": 2.50949062269817e-05, "loss": 2.3326, "step": 29080 }, { "epoch": 4.944756076831548, "grad_norm": 12.415572166442871, "learning_rate": 2.509207320528075e-05, "loss": 2.3997, "step": 29090 }, { "epoch": 4.946455889852116, "grad_norm": 13.54542350769043, "learning_rate": 2.508924018357981e-05, "loss": 2.251, "step": 29100 }, { "epoch": 4.948155702872684, "grad_norm": 19.03134536743164, "learning_rate": 2.5086407161878862e-05, "loss": 2.3983, "step": 29110 }, { "epoch": 4.949855515893252, "grad_norm": 18.4527530670166, "learning_rate": 2.5083574140177912e-05, "loss": 2.2413, "step": 29120 }, { "epoch": 4.95155532891382, "grad_norm": 14.551301956176758, "learning_rate": 2.508074111847697e-05, "loss": 2.2743, "step": 29130 }, { "epoch": 4.953255141934387, "grad_norm": 19.176218032836914, "learning_rate": 2.5077908096776023e-05, "loss": 2.4139, "step": 29140 }, { "epoch": 4.954954954954955, "grad_norm": 12.670951843261719, "learning_rate": 2.5075075075075076e-05, "loss": 2.3735, "step": 29150 }, { "epoch": 4.956654767975523, "grad_norm": 16.58173179626465, "learning_rate": 2.507224205337413e-05, "loss": 2.2513, "step": 29160 }, { "epoch": 4.95835458099609, "grad_norm": 9.278830528259277, "learning_rate": 2.5069409031673183e-05, "loss": 2.2561, "step": 29170 }, { "epoch": 4.960054394016658, "grad_norm": 15.093437194824219, "learning_rate": 2.5066576009972237e-05, "loss": 2.2782, "step": 29180 }, { "epoch": 4.961754207037226, "grad_norm": 11.65147590637207, "learning_rate": 2.5063742988271294e-05, "loss": 2.2144, "step": 29190 }, { "epoch": 4.963454020057794, "grad_norm": 14.172957420349121, "learning_rate": 2.5060909966570344e-05, "loss": 2.4142, "step": 29200 }, { "epoch": 4.965153833078361, "grad_norm": 14.656672477722168, "learning_rate": 2.5058076944869397e-05, "loss": 2.2402, "step": 29210 }, { "epoch": 4.966853646098929, "grad_norm": 13.513054847717285, "learning_rate": 2.5055243923168454e-05, "loss": 2.2648, "step": 29220 }, { "epoch": 4.968553459119497, "grad_norm": 12.231895446777344, "learning_rate": 2.5052410901467508e-05, "loss": 2.3469, "step": 29230 }, { "epoch": 4.970253272140065, "grad_norm": 18.237876892089844, "learning_rate": 2.5049577879766558e-05, "loss": 2.4138, "step": 29240 }, { "epoch": 4.971953085160632, "grad_norm": 15.547760963439941, "learning_rate": 2.5046744858065615e-05, "loss": 2.2154, "step": 29250 }, { "epoch": 4.9736528981812, "grad_norm": 15.874534606933594, "learning_rate": 2.5043911836364668e-05, "loss": 2.303, "step": 29260 }, { "epoch": 4.975352711201768, "grad_norm": 17.315467834472656, "learning_rate": 2.5041078814663722e-05, "loss": 2.2842, "step": 29270 }, { "epoch": 4.977052524222335, "grad_norm": 41.28723907470703, "learning_rate": 2.5038245792962775e-05, "loss": 2.2395, "step": 29280 }, { "epoch": 4.978752337242903, "grad_norm": 11.374451637268066, "learning_rate": 2.503541277126183e-05, "loss": 2.3052, "step": 29290 }, { "epoch": 4.980452150263471, "grad_norm": 13.740416526794434, "learning_rate": 2.5032579749560882e-05, "loss": 2.1598, "step": 29300 }, { "epoch": 4.982151963284039, "grad_norm": 19.067028045654297, "learning_rate": 2.5029746727859936e-05, "loss": 1.9189, "step": 29310 }, { "epoch": 4.983851776304607, "grad_norm": 12.165393829345703, "learning_rate": 2.502691370615899e-05, "loss": 1.9102, "step": 29320 }, { "epoch": 4.9855515893251745, "grad_norm": 16.515356063842773, "learning_rate": 2.5024080684458043e-05, "loss": 2.3783, "step": 29330 }, { "epoch": 4.987251402345742, "grad_norm": 14.55142879486084, "learning_rate": 2.50212476627571e-05, "loss": 2.1969, "step": 29340 }, { "epoch": 4.98895121536631, "grad_norm": 9.150641441345215, "learning_rate": 2.501841464105615e-05, "loss": 1.9231, "step": 29350 }, { "epoch": 4.9906510283868775, "grad_norm": 15.112797737121582, "learning_rate": 2.5015581619355203e-05, "loss": 2.2177, "step": 29360 }, { "epoch": 4.992350841407445, "grad_norm": 21.56864356994629, "learning_rate": 2.501274859765426e-05, "loss": 2.1922, "step": 29370 }, { "epoch": 4.994050654428013, "grad_norm": 11.881202697753906, "learning_rate": 2.5009915575953314e-05, "loss": 2.1672, "step": 29380 }, { "epoch": 4.99575046744858, "grad_norm": 228.30819702148438, "learning_rate": 2.5007082554252364e-05, "loss": 2.223, "step": 29390 }, { "epoch": 4.997450280469148, "grad_norm": 15.640515327453613, "learning_rate": 2.500424953255142e-05, "loss": 2.1071, "step": 29400 }, { "epoch": 4.999150093489716, "grad_norm": 17.799734115600586, "learning_rate": 2.5001416510850474e-05, "loss": 2.1076, "step": 29410 }, { "epoch": 5.0, "eval_cer": 0.9997238005050505, "eval_loss": 2.5228967666625977, "eval_runtime": 1952.2216, "eval_samples_per_second": 0.241, "eval_steps_per_second": 0.241, "step": 29415 }, { "epoch": 5.000849906510284, "grad_norm": 40.376487731933594, "learning_rate": 2.4998583489149528e-05, "loss": 2.0064, "step": 29420 }, { "epoch": 5.002549719530852, "grad_norm": 16.476049423217773, "learning_rate": 2.499575046744858e-05, "loss": 2.1792, "step": 29430 }, { "epoch": 5.00424953255142, "grad_norm": 18.852617263793945, "learning_rate": 2.4992917445747635e-05, "loss": 2.1834, "step": 29440 }, { "epoch": 5.005949345571987, "grad_norm": 13.432628631591797, "learning_rate": 2.499008442404669e-05, "loss": 1.9092, "step": 29450 }, { "epoch": 5.007649158592555, "grad_norm": 15.379107475280762, "learning_rate": 2.4987251402345742e-05, "loss": 1.9499, "step": 29460 }, { "epoch": 5.0093489716131225, "grad_norm": 16.193134307861328, "learning_rate": 2.4984418380644796e-05, "loss": 2.3028, "step": 29470 }, { "epoch": 5.01104878463369, "grad_norm": 23.69245719909668, "learning_rate": 2.498158535894385e-05, "loss": 1.829, "step": 29480 }, { "epoch": 5.012748597654258, "grad_norm": 15.596738815307617, "learning_rate": 2.4978752337242906e-05, "loss": 1.9852, "step": 29490 }, { "epoch": 5.0144484106748255, "grad_norm": 13.742416381835938, "learning_rate": 2.4975919315541956e-05, "loss": 2.1602, "step": 29500 }, { "epoch": 5.016148223695393, "grad_norm": 15.602206230163574, "learning_rate": 2.497308629384101e-05, "loss": 1.9934, "step": 29510 }, { "epoch": 5.017848036715961, "grad_norm": 24.187435150146484, "learning_rate": 2.4970253272140067e-05, "loss": 1.9036, "step": 29520 }, { "epoch": 5.019547849736529, "grad_norm": 9.037596702575684, "learning_rate": 2.496742025043912e-05, "loss": 2.103, "step": 29530 }, { "epoch": 5.021247662757097, "grad_norm": 17.772626876831055, "learning_rate": 2.496458722873817e-05, "loss": 2.0606, "step": 29540 }, { "epoch": 5.022947475777665, "grad_norm": 13.835237503051758, "learning_rate": 2.4961754207037227e-05, "loss": 1.889, "step": 29550 }, { "epoch": 5.024647288798232, "grad_norm": 11.892874717712402, "learning_rate": 2.495892118533628e-05, "loss": 2.092, "step": 29560 }, { "epoch": 5.0263471018188, "grad_norm": 17.55487060546875, "learning_rate": 2.4956088163635334e-05, "loss": 1.9798, "step": 29570 }, { "epoch": 5.028046914839368, "grad_norm": 18.38755226135254, "learning_rate": 2.4953255141934388e-05, "loss": 2.079, "step": 29580 }, { "epoch": 5.029746727859935, "grad_norm": 14.455432891845703, "learning_rate": 2.495042212023344e-05, "loss": 1.8019, "step": 29590 }, { "epoch": 5.031446540880503, "grad_norm": 14.965415954589844, "learning_rate": 2.4947589098532495e-05, "loss": 1.9672, "step": 29600 }, { "epoch": 5.033146353901071, "grad_norm": 15.349989891052246, "learning_rate": 2.494475607683155e-05, "loss": 2.0696, "step": 29610 }, { "epoch": 5.034846166921638, "grad_norm": 18.725234985351562, "learning_rate": 2.4941923055130602e-05, "loss": 2.0227, "step": 29620 }, { "epoch": 5.036545979942207, "grad_norm": 12.853507041931152, "learning_rate": 2.4939090033429655e-05, "loss": 1.9678, "step": 29630 }, { "epoch": 5.038245792962774, "grad_norm": 20.28145408630371, "learning_rate": 2.4936257011728712e-05, "loss": 2.1441, "step": 29640 }, { "epoch": 5.039945605983342, "grad_norm": 13.758895874023438, "learning_rate": 2.4933423990027762e-05, "loss": 2.0462, "step": 29650 }, { "epoch": 5.04164541900391, "grad_norm": 17.336627960205078, "learning_rate": 2.4930590968326816e-05, "loss": 1.9016, "step": 29660 }, { "epoch": 5.043345232024477, "grad_norm": 13.159982681274414, "learning_rate": 2.4927757946625873e-05, "loss": 2.1252, "step": 29670 }, { "epoch": 5.045045045045045, "grad_norm": 28.227134704589844, "learning_rate": 2.4924924924924926e-05, "loss": 1.9536, "step": 29680 }, { "epoch": 5.046744858065613, "grad_norm": 18.88231658935547, "learning_rate": 2.4922091903223976e-05, "loss": 2.0931, "step": 29690 }, { "epoch": 5.04844467108618, "grad_norm": 13.04302978515625, "learning_rate": 2.4919258881523033e-05, "loss": 1.862, "step": 29700 }, { "epoch": 5.050144484106748, "grad_norm": 18.392154693603516, "learning_rate": 2.4916425859822087e-05, "loss": 1.9496, "step": 29710 }, { "epoch": 5.051844297127316, "grad_norm": 12.831497192382812, "learning_rate": 2.491359283812114e-05, "loss": 2.0584, "step": 29720 }, { "epoch": 5.053544110147883, "grad_norm": 17.11581039428711, "learning_rate": 2.4910759816420194e-05, "loss": 2.0931, "step": 29730 }, { "epoch": 5.055243923168452, "grad_norm": 16.418113708496094, "learning_rate": 2.4907926794719247e-05, "loss": 2.0095, "step": 29740 }, { "epoch": 5.0569437361890195, "grad_norm": 11.651277542114258, "learning_rate": 2.49050937730183e-05, "loss": 2.1977, "step": 29750 }, { "epoch": 5.058643549209587, "grad_norm": 12.28815746307373, "learning_rate": 2.4902260751317358e-05, "loss": 1.7735, "step": 29760 }, { "epoch": 5.060343362230155, "grad_norm": 16.806232452392578, "learning_rate": 2.4899427729616408e-05, "loss": 2.0305, "step": 29770 }, { "epoch": 5.0620431752507224, "grad_norm": 17.132110595703125, "learning_rate": 2.4896594707915465e-05, "loss": 1.8336, "step": 29780 }, { "epoch": 5.06374298827129, "grad_norm": 12.580150604248047, "learning_rate": 2.489376168621452e-05, "loss": 2.0351, "step": 29790 }, { "epoch": 5.065442801291858, "grad_norm": 22.256860733032227, "learning_rate": 2.489092866451357e-05, "loss": 1.9856, "step": 29800 }, { "epoch": 5.067142614312425, "grad_norm": 19.98979377746582, "learning_rate": 2.4888095642812625e-05, "loss": 1.991, "step": 29810 }, { "epoch": 5.068842427332993, "grad_norm": 13.477813720703125, "learning_rate": 2.488526262111168e-05, "loss": 1.8026, "step": 29820 }, { "epoch": 5.070542240353561, "grad_norm": 15.77297306060791, "learning_rate": 2.4882429599410733e-05, "loss": 2.131, "step": 29830 }, { "epoch": 5.072242053374129, "grad_norm": 13.305922508239746, "learning_rate": 2.4879596577709786e-05, "loss": 1.7692, "step": 29840 }, { "epoch": 5.073941866394697, "grad_norm": 16.2801570892334, "learning_rate": 2.487676355600884e-05, "loss": 2.0232, "step": 29850 }, { "epoch": 5.075641679415265, "grad_norm": 13.066911697387695, "learning_rate": 2.4873930534307893e-05, "loss": 1.8522, "step": 29860 }, { "epoch": 5.077341492435832, "grad_norm": 16.643680572509766, "learning_rate": 2.487109751260695e-05, "loss": 2.1191, "step": 29870 }, { "epoch": 5.0790413054564, "grad_norm": 18.251684188842773, "learning_rate": 2.4868264490906e-05, "loss": 1.9959, "step": 29880 }, { "epoch": 5.0807411184769675, "grad_norm": 16.823585510253906, "learning_rate": 2.4865431469205054e-05, "loss": 1.864, "step": 29890 }, { "epoch": 5.082440931497535, "grad_norm": 14.413436889648438, "learning_rate": 2.486259844750411e-05, "loss": 1.9309, "step": 29900 }, { "epoch": 5.084140744518103, "grad_norm": 16.14173126220703, "learning_rate": 2.4859765425803164e-05, "loss": 1.9487, "step": 29910 }, { "epoch": 5.0858405575386705, "grad_norm": 14.719501495361328, "learning_rate": 2.4856932404102214e-05, "loss": 1.9817, "step": 29920 }, { "epoch": 5.087540370559238, "grad_norm": 13.721915245056152, "learning_rate": 2.485409938240127e-05, "loss": 2.073, "step": 29930 }, { "epoch": 5.089240183579806, "grad_norm": 14.56579303741455, "learning_rate": 2.4851266360700325e-05, "loss": 2.0805, "step": 29940 }, { "epoch": 5.090939996600374, "grad_norm": 14.597428321838379, "learning_rate": 2.4848433338999378e-05, "loss": 2.1189, "step": 29950 }, { "epoch": 5.092639809620942, "grad_norm": 19.9304141998291, "learning_rate": 2.484560031729843e-05, "loss": 1.7679, "step": 29960 }, { "epoch": 5.09433962264151, "grad_norm": 16.381330490112305, "learning_rate": 2.4842767295597485e-05, "loss": 2.1141, "step": 29970 }, { "epoch": 5.096039435662077, "grad_norm": 10.608003616333008, "learning_rate": 2.483993427389654e-05, "loss": 2.1338, "step": 29980 }, { "epoch": 5.097739248682645, "grad_norm": 14.970489501953125, "learning_rate": 2.4837101252195592e-05, "loss": 2.1901, "step": 29990 }, { "epoch": 5.099439061703213, "grad_norm": 12.920001029968262, "learning_rate": 2.4834268230494646e-05, "loss": 1.9615, "step": 30000 }, { "epoch": 5.10113887472378, "grad_norm": 17.813844680786133, "learning_rate": 2.48314352087937e-05, "loss": 1.744, "step": 30010 }, { "epoch": 5.102838687744348, "grad_norm": 12.46806812286377, "learning_rate": 2.4828602187092756e-05, "loss": 2.1012, "step": 30020 }, { "epoch": 5.1045385007649156, "grad_norm": 11.255243301391602, "learning_rate": 2.4825769165391806e-05, "loss": 2.1205, "step": 30030 }, { "epoch": 5.106238313785483, "grad_norm": 17.74955177307129, "learning_rate": 2.482293614369086e-05, "loss": 1.7092, "step": 30040 }, { "epoch": 5.107938126806052, "grad_norm": 15.57275390625, "learning_rate": 2.4820103121989917e-05, "loss": 2.1714, "step": 30050 }, { "epoch": 5.109637939826619, "grad_norm": 18.86517906188965, "learning_rate": 2.481727010028897e-05, "loss": 2.0649, "step": 30060 }, { "epoch": 5.111337752847187, "grad_norm": 12.6953763961792, "learning_rate": 2.481443707858802e-05, "loss": 1.9269, "step": 30070 }, { "epoch": 5.113037565867755, "grad_norm": 9.304295539855957, "learning_rate": 2.4811604056887077e-05, "loss": 2.0445, "step": 30080 }, { "epoch": 5.114737378888322, "grad_norm": 12.675256729125977, "learning_rate": 2.480877103518613e-05, "loss": 2.2639, "step": 30090 }, { "epoch": 5.11643719190889, "grad_norm": 16.96477699279785, "learning_rate": 2.4805938013485184e-05, "loss": 1.7774, "step": 30100 }, { "epoch": 5.118137004929458, "grad_norm": 10.700572967529297, "learning_rate": 2.4803104991784238e-05, "loss": 2.0893, "step": 30110 }, { "epoch": 5.119836817950025, "grad_norm": 13.502501487731934, "learning_rate": 2.480027197008329e-05, "loss": 2.0726, "step": 30120 }, { "epoch": 5.121536630970593, "grad_norm": 20.230186462402344, "learning_rate": 2.4797438948382345e-05, "loss": 2.0679, "step": 30130 }, { "epoch": 5.123236443991161, "grad_norm": 21.13994598388672, "learning_rate": 2.47946059266814e-05, "loss": 1.976, "step": 30140 }, { "epoch": 5.124936257011728, "grad_norm": 13.022753715515137, "learning_rate": 2.4791772904980452e-05, "loss": 2.1472, "step": 30150 }, { "epoch": 5.126636070032297, "grad_norm": 16.72687530517578, "learning_rate": 2.4788939883279506e-05, "loss": 1.9427, "step": 30160 }, { "epoch": 5.1283358830528645, "grad_norm": 14.340001106262207, "learning_rate": 2.4786106861578562e-05, "loss": 2.1849, "step": 30170 }, { "epoch": 5.130035696073432, "grad_norm": 11.094368934631348, "learning_rate": 2.4783273839877613e-05, "loss": 2.2307, "step": 30180 }, { "epoch": 5.131735509094, "grad_norm": 10.469944953918457, "learning_rate": 2.4780440818176666e-05, "loss": 1.9819, "step": 30190 }, { "epoch": 5.133435322114567, "grad_norm": 11.298221588134766, "learning_rate": 2.4777607796475723e-05, "loss": 2.0499, "step": 30200 }, { "epoch": 5.135135135135135, "grad_norm": 23.22077178955078, "learning_rate": 2.4774774774774777e-05, "loss": 2.0392, "step": 30210 }, { "epoch": 5.136834948155703, "grad_norm": 13.720732688903809, "learning_rate": 2.4771941753073827e-05, "loss": 2.016, "step": 30220 }, { "epoch": 5.13853476117627, "grad_norm": 18.81827735900879, "learning_rate": 2.4769108731372884e-05, "loss": 2.0222, "step": 30230 }, { "epoch": 5.140234574196838, "grad_norm": 13.575865745544434, "learning_rate": 2.4766275709671937e-05, "loss": 1.9237, "step": 30240 }, { "epoch": 5.141934387217406, "grad_norm": 11.65638256072998, "learning_rate": 2.476344268797099e-05, "loss": 2.2289, "step": 30250 }, { "epoch": 5.143634200237974, "grad_norm": 11.03844928741455, "learning_rate": 2.4760609666270044e-05, "loss": 2.1322, "step": 30260 }, { "epoch": 5.145334013258542, "grad_norm": 16.059619903564453, "learning_rate": 2.4757776644569098e-05, "loss": 2.0353, "step": 30270 }, { "epoch": 5.1470338262791095, "grad_norm": 12.103804588317871, "learning_rate": 2.475494362286815e-05, "loss": 2.2291, "step": 30280 }, { "epoch": 5.148733639299677, "grad_norm": 11.211617469787598, "learning_rate": 2.4752110601167208e-05, "loss": 2.0635, "step": 30290 }, { "epoch": 5.150433452320245, "grad_norm": 14.35761833190918, "learning_rate": 2.4749277579466258e-05, "loss": 2.1063, "step": 30300 }, { "epoch": 5.1521332653408125, "grad_norm": 12.413166999816895, "learning_rate": 2.4746444557765312e-05, "loss": 2.0302, "step": 30310 }, { "epoch": 5.15383307836138, "grad_norm": 24.389432907104492, "learning_rate": 2.474361153606437e-05, "loss": 2.0035, "step": 30320 }, { "epoch": 5.155532891381948, "grad_norm": 12.988105773925781, "learning_rate": 2.474077851436342e-05, "loss": 2.0319, "step": 30330 }, { "epoch": 5.1572327044025155, "grad_norm": 10.37813949584961, "learning_rate": 2.4737945492662472e-05, "loss": 2.0504, "step": 30340 }, { "epoch": 5.158932517423083, "grad_norm": 23.07668113708496, "learning_rate": 2.473511247096153e-05, "loss": 2.1343, "step": 30350 }, { "epoch": 5.160632330443651, "grad_norm": 9.826295852661133, "learning_rate": 2.4732279449260583e-05, "loss": 2.1404, "step": 30360 }, { "epoch": 5.162332143464219, "grad_norm": 17.205293655395508, "learning_rate": 2.4729446427559633e-05, "loss": 1.9389, "step": 30370 }, { "epoch": 5.164031956484787, "grad_norm": 12.459248542785645, "learning_rate": 2.472661340585869e-05, "loss": 1.9871, "step": 30380 }, { "epoch": 5.165731769505355, "grad_norm": 10.704066276550293, "learning_rate": 2.4723780384157743e-05, "loss": 1.8633, "step": 30390 }, { "epoch": 5.167431582525922, "grad_norm": 18.223133087158203, "learning_rate": 2.4720947362456797e-05, "loss": 2.0816, "step": 30400 }, { "epoch": 5.16913139554649, "grad_norm": 21.463241577148438, "learning_rate": 2.471811434075585e-05, "loss": 1.9079, "step": 30410 }, { "epoch": 5.170831208567058, "grad_norm": 12.787384033203125, "learning_rate": 2.4715281319054904e-05, "loss": 2.1007, "step": 30420 }, { "epoch": 5.172531021587625, "grad_norm": 11.604846954345703, "learning_rate": 2.4712448297353957e-05, "loss": 1.9919, "step": 30430 }, { "epoch": 5.174230834608193, "grad_norm": 15.462738990783691, "learning_rate": 2.4709615275653014e-05, "loss": 1.9653, "step": 30440 }, { "epoch": 5.1759306476287605, "grad_norm": 14.789737701416016, "learning_rate": 2.4706782253952064e-05, "loss": 1.9292, "step": 30450 }, { "epoch": 5.177630460649328, "grad_norm": 15.18970775604248, "learning_rate": 2.4703949232251118e-05, "loss": 1.8196, "step": 30460 }, { "epoch": 5.179330273669897, "grad_norm": 15.878073692321777, "learning_rate": 2.4701116210550175e-05, "loss": 2.0707, "step": 30470 }, { "epoch": 5.181030086690464, "grad_norm": 16.915775299072266, "learning_rate": 2.469828318884923e-05, "loss": 1.9031, "step": 30480 }, { "epoch": 5.182729899711032, "grad_norm": 12.8226900100708, "learning_rate": 2.469545016714828e-05, "loss": 2.1885, "step": 30490 }, { "epoch": 5.1844297127316, "grad_norm": 14.25072193145752, "learning_rate": 2.4692617145447335e-05, "loss": 2.2215, "step": 30500 }, { "epoch": 5.186129525752167, "grad_norm": 12.938172340393066, "learning_rate": 2.468978412374639e-05, "loss": 2.1822, "step": 30510 }, { "epoch": 5.187829338772735, "grad_norm": 19.77758026123047, "learning_rate": 2.4686951102045442e-05, "loss": 2.0131, "step": 30520 }, { "epoch": 5.189529151793303, "grad_norm": 12.821144104003906, "learning_rate": 2.4684118080344496e-05, "loss": 2.0482, "step": 30530 }, { "epoch": 5.19122896481387, "grad_norm": 14.924592971801758, "learning_rate": 2.468128505864355e-05, "loss": 2.0447, "step": 30540 }, { "epoch": 5.192928777834438, "grad_norm": 13.144856452941895, "learning_rate": 2.4678452036942606e-05, "loss": 2.2886, "step": 30550 }, { "epoch": 5.194628590855006, "grad_norm": 19.180370330810547, "learning_rate": 2.4675619015241657e-05, "loss": 2.1314, "step": 30560 }, { "epoch": 5.196328403875573, "grad_norm": 10.70704174041748, "learning_rate": 2.467278599354071e-05, "loss": 1.9059, "step": 30570 }, { "epoch": 5.198028216896142, "grad_norm": 14.03639030456543, "learning_rate": 2.4669952971839767e-05, "loss": 1.9066, "step": 30580 }, { "epoch": 5.1997280299167095, "grad_norm": 11.641141891479492, "learning_rate": 2.466711995013882e-05, "loss": 2.0294, "step": 30590 }, { "epoch": 5.201427842937277, "grad_norm": 15.236434936523438, "learning_rate": 2.466428692843787e-05, "loss": 1.8508, "step": 30600 }, { "epoch": 5.203127655957845, "grad_norm": 15.7008056640625, "learning_rate": 2.4661453906736928e-05, "loss": 1.9737, "step": 30610 }, { "epoch": 5.204827468978412, "grad_norm": 14.40638542175293, "learning_rate": 2.465862088503598e-05, "loss": 1.909, "step": 30620 }, { "epoch": 5.20652728199898, "grad_norm": 14.113566398620605, "learning_rate": 2.4655787863335035e-05, "loss": 1.8442, "step": 30630 }, { "epoch": 5.208227095019548, "grad_norm": 20.515148162841797, "learning_rate": 2.4652954841634088e-05, "loss": 2.0526, "step": 30640 }, { "epoch": 5.209926908040115, "grad_norm": 9.288618087768555, "learning_rate": 2.465012181993314e-05, "loss": 2.2459, "step": 30650 }, { "epoch": 5.211626721060683, "grad_norm": 12.802734375, "learning_rate": 2.4647288798232195e-05, "loss": 2.0252, "step": 30660 }, { "epoch": 5.213326534081251, "grad_norm": 16.266611099243164, "learning_rate": 2.464445577653125e-05, "loss": 2.11, "step": 30670 }, { "epoch": 5.215026347101819, "grad_norm": 12.239753723144531, "learning_rate": 2.4641622754830302e-05, "loss": 1.9114, "step": 30680 }, { "epoch": 5.216726160122387, "grad_norm": 15.912616729736328, "learning_rate": 2.4638789733129356e-05, "loss": 2.1008, "step": 30690 }, { "epoch": 5.2184259731429545, "grad_norm": 8.774721145629883, "learning_rate": 2.4635956711428413e-05, "loss": 2.1752, "step": 30700 }, { "epoch": 5.220125786163522, "grad_norm": 18.931612014770508, "learning_rate": 2.4633123689727463e-05, "loss": 2.0082, "step": 30710 }, { "epoch": 5.22182559918409, "grad_norm": 13.65514087677002, "learning_rate": 2.4630290668026516e-05, "loss": 2.0354, "step": 30720 }, { "epoch": 5.2235254122046575, "grad_norm": 19.122676849365234, "learning_rate": 2.4627457646325573e-05, "loss": 1.9359, "step": 30730 }, { "epoch": 5.225225225225225, "grad_norm": 18.958864212036133, "learning_rate": 2.4624624624624627e-05, "loss": 1.8224, "step": 30740 }, { "epoch": 5.226925038245793, "grad_norm": 16.679901123046875, "learning_rate": 2.4621791602923677e-05, "loss": 1.9238, "step": 30750 }, { "epoch": 5.2286248512663605, "grad_norm": 14.325608253479004, "learning_rate": 2.4618958581222734e-05, "loss": 1.9792, "step": 30760 }, { "epoch": 5.230324664286928, "grad_norm": 20.012744903564453, "learning_rate": 2.4616125559521787e-05, "loss": 2.0069, "step": 30770 }, { "epoch": 5.232024477307496, "grad_norm": 24.182737350463867, "learning_rate": 2.461329253782084e-05, "loss": 1.8144, "step": 30780 }, { "epoch": 5.233724290328064, "grad_norm": 8.957504272460938, "learning_rate": 2.4610459516119894e-05, "loss": 1.9753, "step": 30790 }, { "epoch": 5.235424103348632, "grad_norm": 19.028839111328125, "learning_rate": 2.4607626494418948e-05, "loss": 2.2021, "step": 30800 }, { "epoch": 5.2371239163692, "grad_norm": 18.093425750732422, "learning_rate": 2.4604793472718e-05, "loss": 2.1329, "step": 30810 }, { "epoch": 5.238823729389767, "grad_norm": 16.10199546813965, "learning_rate": 2.4601960451017058e-05, "loss": 1.7996, "step": 30820 }, { "epoch": 5.240523542410335, "grad_norm": 12.657090187072754, "learning_rate": 2.459912742931611e-05, "loss": 2.062, "step": 30830 }, { "epoch": 5.242223355430903, "grad_norm": 18.606454849243164, "learning_rate": 2.4596294407615162e-05, "loss": 2.1318, "step": 30840 }, { "epoch": 5.24392316845147, "grad_norm": 15.214423179626465, "learning_rate": 2.459346138591422e-05, "loss": 1.8495, "step": 30850 }, { "epoch": 5.245622981472038, "grad_norm": 35.4034538269043, "learning_rate": 2.459062836421327e-05, "loss": 2.0096, "step": 30860 }, { "epoch": 5.2473227944926055, "grad_norm": 13.308048248291016, "learning_rate": 2.4587795342512323e-05, "loss": 2.2336, "step": 30870 }, { "epoch": 5.249022607513173, "grad_norm": 9.21270751953125, "learning_rate": 2.458496232081138e-05, "loss": 2.1903, "step": 30880 }, { "epoch": 5.250722420533741, "grad_norm": 17.142845153808594, "learning_rate": 2.4582129299110433e-05, "loss": 2.0602, "step": 30890 }, { "epoch": 5.252422233554309, "grad_norm": 12.785419464111328, "learning_rate": 2.4579296277409483e-05, "loss": 2.1485, "step": 30900 }, { "epoch": 5.254122046574877, "grad_norm": 12.019719123840332, "learning_rate": 2.457646325570854e-05, "loss": 1.8231, "step": 30910 }, { "epoch": 5.255821859595445, "grad_norm": 14.50732421875, "learning_rate": 2.4573630234007594e-05, "loss": 2.0778, "step": 30920 }, { "epoch": 5.257521672616012, "grad_norm": 16.69218635559082, "learning_rate": 2.4570797212306647e-05, "loss": 1.9394, "step": 30930 }, { "epoch": 5.25922148563658, "grad_norm": 10.333846092224121, "learning_rate": 2.45679641906057e-05, "loss": 2.1165, "step": 30940 }, { "epoch": 5.260921298657148, "grad_norm": 15.772370338439941, "learning_rate": 2.4565131168904754e-05, "loss": 2.2043, "step": 30950 }, { "epoch": 5.262621111677715, "grad_norm": 11.466081619262695, "learning_rate": 2.4562298147203808e-05, "loss": 1.9813, "step": 30960 }, { "epoch": 5.264320924698283, "grad_norm": 25.013059616088867, "learning_rate": 2.4559465125502864e-05, "loss": 2.0076, "step": 30970 }, { "epoch": 5.266020737718851, "grad_norm": 12.280976295471191, "learning_rate": 2.4556632103801915e-05, "loss": 2.1333, "step": 30980 }, { "epoch": 5.267720550739418, "grad_norm": 11.5034761428833, "learning_rate": 2.4553799082100968e-05, "loss": 1.812, "step": 30990 }, { "epoch": 5.269420363759987, "grad_norm": 19.803993225097656, "learning_rate": 2.4550966060400025e-05, "loss": 2.0814, "step": 31000 }, { "epoch": 5.2711201767805544, "grad_norm": 20.18877410888672, "learning_rate": 2.4548133038699075e-05, "loss": 1.9659, "step": 31010 }, { "epoch": 5.272819989801122, "grad_norm": 23.925050735473633, "learning_rate": 2.454530001699813e-05, "loss": 2.058, "step": 31020 }, { "epoch": 5.27451980282169, "grad_norm": 14.723148345947266, "learning_rate": 2.4542466995297186e-05, "loss": 2.2141, "step": 31030 }, { "epoch": 5.276219615842257, "grad_norm": 15.293269157409668, "learning_rate": 2.453963397359624e-05, "loss": 2.0548, "step": 31040 }, { "epoch": 5.277919428862825, "grad_norm": 15.983235359191895, "learning_rate": 2.453680095189529e-05, "loss": 2.093, "step": 31050 }, { "epoch": 5.279619241883393, "grad_norm": 24.535961151123047, "learning_rate": 2.4533967930194346e-05, "loss": 2.2199, "step": 31060 }, { "epoch": 5.28131905490396, "grad_norm": 10.788337707519531, "learning_rate": 2.45311349084934e-05, "loss": 1.9889, "step": 31070 }, { "epoch": 5.283018867924528, "grad_norm": 13.632800102233887, "learning_rate": 2.4528301886792453e-05, "loss": 2.1431, "step": 31080 }, { "epoch": 5.284718680945096, "grad_norm": 16.877639770507812, "learning_rate": 2.4525468865091507e-05, "loss": 1.9277, "step": 31090 }, { "epoch": 5.286418493965664, "grad_norm": 13.862821578979492, "learning_rate": 2.452263584339056e-05, "loss": 2.0013, "step": 31100 }, { "epoch": 5.288118306986232, "grad_norm": 11.103711128234863, "learning_rate": 2.4519802821689614e-05, "loss": 2.2023, "step": 31110 }, { "epoch": 5.2898181200067995, "grad_norm": 16.351045608520508, "learning_rate": 2.451696979998867e-05, "loss": 2.1326, "step": 31120 }, { "epoch": 5.291517933027367, "grad_norm": 12.629387855529785, "learning_rate": 2.451413677828772e-05, "loss": 1.9491, "step": 31130 }, { "epoch": 5.293217746047935, "grad_norm": 12.919779777526855, "learning_rate": 2.4511303756586774e-05, "loss": 2.0373, "step": 31140 }, { "epoch": 5.2949175590685025, "grad_norm": 11.600210189819336, "learning_rate": 2.450847073488583e-05, "loss": 2.0107, "step": 31150 }, { "epoch": 5.29661737208907, "grad_norm": 12.369119644165039, "learning_rate": 2.4505637713184885e-05, "loss": 2.0151, "step": 31160 }, { "epoch": 5.298317185109638, "grad_norm": 13.624479293823242, "learning_rate": 2.4502804691483935e-05, "loss": 2.0722, "step": 31170 }, { "epoch": 5.300016998130205, "grad_norm": 17.92643928527832, "learning_rate": 2.4499971669782992e-05, "loss": 1.939, "step": 31180 }, { "epoch": 5.301716811150773, "grad_norm": 20.28795051574707, "learning_rate": 2.4497138648082045e-05, "loss": 2.0107, "step": 31190 }, { "epoch": 5.303416624171341, "grad_norm": 11.792516708374023, "learning_rate": 2.4494305626381096e-05, "loss": 1.8151, "step": 31200 }, { "epoch": 5.305116437191909, "grad_norm": 13.696178436279297, "learning_rate": 2.4491472604680152e-05, "loss": 2.187, "step": 31210 }, { "epoch": 5.306816250212477, "grad_norm": 15.575268745422363, "learning_rate": 2.4488639582979206e-05, "loss": 1.9206, "step": 31220 }, { "epoch": 5.308516063233045, "grad_norm": 20.934324264526367, "learning_rate": 2.448580656127826e-05, "loss": 2.064, "step": 31230 }, { "epoch": 5.310215876253612, "grad_norm": 13.865323066711426, "learning_rate": 2.4482973539577313e-05, "loss": 1.8794, "step": 31240 }, { "epoch": 5.31191568927418, "grad_norm": 13.18526554107666, "learning_rate": 2.4480140517876366e-05, "loss": 1.9511, "step": 31250 }, { "epoch": 5.3136155022947476, "grad_norm": 17.019824981689453, "learning_rate": 2.4477307496175423e-05, "loss": 1.8237, "step": 31260 }, { "epoch": 5.315315315315315, "grad_norm": 15.434456825256348, "learning_rate": 2.4474474474474477e-05, "loss": 2.0129, "step": 31270 }, { "epoch": 5.317015128335883, "grad_norm": 14.78494644165039, "learning_rate": 2.4471641452773527e-05, "loss": 2.0842, "step": 31280 }, { "epoch": 5.3187149413564505, "grad_norm": 13.625773429870605, "learning_rate": 2.4468808431072584e-05, "loss": 2.0137, "step": 31290 }, { "epoch": 5.320414754377018, "grad_norm": 13.517020225524902, "learning_rate": 2.4465975409371637e-05, "loss": 1.9554, "step": 31300 }, { "epoch": 5.322114567397586, "grad_norm": 13.854567527770996, "learning_rate": 2.446314238767069e-05, "loss": 1.9764, "step": 31310 }, { "epoch": 5.323814380418154, "grad_norm": 15.256377220153809, "learning_rate": 2.4460309365969745e-05, "loss": 2.0289, "step": 31320 }, { "epoch": 5.325514193438722, "grad_norm": 12.369717597961426, "learning_rate": 2.4457476344268798e-05, "loss": 1.9169, "step": 31330 }, { "epoch": 5.32721400645929, "grad_norm": 14.434927940368652, "learning_rate": 2.445464332256785e-05, "loss": 2.1149, "step": 31340 }, { "epoch": 5.328913819479857, "grad_norm": 18.51875877380371, "learning_rate": 2.4451810300866905e-05, "loss": 2.0771, "step": 31350 }, { "epoch": 5.330613632500425, "grad_norm": 13.235398292541504, "learning_rate": 2.444897727916596e-05, "loss": 2.0415, "step": 31360 }, { "epoch": 5.332313445520993, "grad_norm": 19.00577163696289, "learning_rate": 2.4446144257465012e-05, "loss": 2.1704, "step": 31370 }, { "epoch": 5.33401325854156, "grad_norm": 14.757222175598145, "learning_rate": 2.444331123576407e-05, "loss": 2.0011, "step": 31380 }, { "epoch": 5.335713071562128, "grad_norm": 16.208240509033203, "learning_rate": 2.444047821406312e-05, "loss": 1.8156, "step": 31390 }, { "epoch": 5.337412884582696, "grad_norm": 15.392440795898438, "learning_rate": 2.4437645192362173e-05, "loss": 2.0705, "step": 31400 }, { "epoch": 5.339112697603263, "grad_norm": 13.415069580078125, "learning_rate": 2.443481217066123e-05, "loss": 2.0278, "step": 31410 }, { "epoch": 5.340812510623832, "grad_norm": 17.657024383544922, "learning_rate": 2.4431979148960283e-05, "loss": 2.1709, "step": 31420 }, { "epoch": 5.342512323644399, "grad_norm": 16.56374168395996, "learning_rate": 2.4429146127259333e-05, "loss": 1.9195, "step": 31430 }, { "epoch": 5.344212136664967, "grad_norm": 20.3821964263916, "learning_rate": 2.442631310555839e-05, "loss": 2.0427, "step": 31440 }, { "epoch": 5.345911949685535, "grad_norm": 11.965658187866211, "learning_rate": 2.4423480083857444e-05, "loss": 2.0105, "step": 31450 }, { "epoch": 5.347611762706102, "grad_norm": 19.427982330322266, "learning_rate": 2.4420647062156497e-05, "loss": 2.0717, "step": 31460 }, { "epoch": 5.34931157572667, "grad_norm": 13.791108131408691, "learning_rate": 2.441781404045555e-05, "loss": 1.9036, "step": 31470 }, { "epoch": 5.351011388747238, "grad_norm": 13.205303192138672, "learning_rate": 2.4414981018754604e-05, "loss": 1.9843, "step": 31480 }, { "epoch": 5.352711201767805, "grad_norm": 18.393753051757812, "learning_rate": 2.4412147997053658e-05, "loss": 2.2182, "step": 31490 }, { "epoch": 5.354411014788373, "grad_norm": 18.93613624572754, "learning_rate": 2.4409314975352715e-05, "loss": 2.0924, "step": 31500 }, { "epoch": 5.356110827808941, "grad_norm": 12.122233390808105, "learning_rate": 2.4406481953651765e-05, "loss": 2.2581, "step": 31510 }, { "epoch": 5.357810640829509, "grad_norm": 14.119996070861816, "learning_rate": 2.440364893195082e-05, "loss": 2.1069, "step": 31520 }, { "epoch": 5.359510453850077, "grad_norm": 18.097808837890625, "learning_rate": 2.4400815910249875e-05, "loss": 2.169, "step": 31530 }, { "epoch": 5.3612102668706445, "grad_norm": 19.288084030151367, "learning_rate": 2.4397982888548925e-05, "loss": 1.9817, "step": 31540 }, { "epoch": 5.362910079891212, "grad_norm": 26.571765899658203, "learning_rate": 2.439514986684798e-05, "loss": 2.0139, "step": 31550 }, { "epoch": 5.36460989291178, "grad_norm": 13.849544525146484, "learning_rate": 2.4392316845147036e-05, "loss": 2.2215, "step": 31560 }, { "epoch": 5.3663097059323475, "grad_norm": 11.627181053161621, "learning_rate": 2.438948382344609e-05, "loss": 1.8829, "step": 31570 }, { "epoch": 5.368009518952915, "grad_norm": 15.898221015930176, "learning_rate": 2.438665080174514e-05, "loss": 2.065, "step": 31580 }, { "epoch": 5.369709331973483, "grad_norm": 27.73211097717285, "learning_rate": 2.4383817780044196e-05, "loss": 2.2874, "step": 31590 }, { "epoch": 5.37140914499405, "grad_norm": 10.012053489685059, "learning_rate": 2.438098475834325e-05, "loss": 2.1084, "step": 31600 }, { "epoch": 5.373108958014618, "grad_norm": 16.23117446899414, "learning_rate": 2.4378151736642303e-05, "loss": 1.9471, "step": 31610 }, { "epoch": 5.374808771035186, "grad_norm": 14.669291496276855, "learning_rate": 2.4375318714941357e-05, "loss": 1.8294, "step": 31620 }, { "epoch": 5.376508584055754, "grad_norm": 12.6187744140625, "learning_rate": 2.437248569324041e-05, "loss": 1.9482, "step": 31630 }, { "epoch": 5.378208397076322, "grad_norm": 18.1112060546875, "learning_rate": 2.4369652671539464e-05, "loss": 1.9244, "step": 31640 }, { "epoch": 5.37990821009689, "grad_norm": 14.218338012695312, "learning_rate": 2.436681964983852e-05, "loss": 1.8896, "step": 31650 }, { "epoch": 5.381608023117457, "grad_norm": 13.928812980651855, "learning_rate": 2.436398662813757e-05, "loss": 2.043, "step": 31660 }, { "epoch": 5.383307836138025, "grad_norm": 167.6199493408203, "learning_rate": 2.4361153606436625e-05, "loss": 2.1909, "step": 31670 }, { "epoch": 5.3850076491585925, "grad_norm": 18.69173812866211, "learning_rate": 2.435832058473568e-05, "loss": 1.9887, "step": 31680 }, { "epoch": 5.38670746217916, "grad_norm": 14.17557430267334, "learning_rate": 2.4355487563034735e-05, "loss": 1.9826, "step": 31690 }, { "epoch": 5.388407275199728, "grad_norm": 11.90911865234375, "learning_rate": 2.4352654541333785e-05, "loss": 1.9486, "step": 31700 }, { "epoch": 5.3901070882202955, "grad_norm": 16.919662475585938, "learning_rate": 2.4349821519632842e-05, "loss": 2.0501, "step": 31710 }, { "epoch": 5.391806901240863, "grad_norm": 10.915186882019043, "learning_rate": 2.4346988497931896e-05, "loss": 2.06, "step": 31720 }, { "epoch": 5.393506714261431, "grad_norm": 9.337605476379395, "learning_rate": 2.4344155476230946e-05, "loss": 2.1948, "step": 31730 }, { "epoch": 5.395206527281999, "grad_norm": 14.10026741027832, "learning_rate": 2.4341322454530003e-05, "loss": 1.988, "step": 31740 }, { "epoch": 5.396906340302567, "grad_norm": 9.553101539611816, "learning_rate": 2.4338489432829056e-05, "loss": 2.1694, "step": 31750 }, { "epoch": 5.398606153323135, "grad_norm": 12.986848831176758, "learning_rate": 2.433565641112811e-05, "loss": 1.9437, "step": 31760 }, { "epoch": 5.400305966343702, "grad_norm": 12.820804595947266, "learning_rate": 2.4332823389427163e-05, "loss": 2.2592, "step": 31770 }, { "epoch": 5.40200577936427, "grad_norm": 20.285995483398438, "learning_rate": 2.4329990367726217e-05, "loss": 2.155, "step": 31780 }, { "epoch": 5.403705592384838, "grad_norm": 13.481979370117188, "learning_rate": 2.432715734602527e-05, "loss": 2.21, "step": 31790 }, { "epoch": 5.405405405405405, "grad_norm": 16.310945510864258, "learning_rate": 2.4324324324324327e-05, "loss": 2.0568, "step": 31800 }, { "epoch": 5.407105218425973, "grad_norm": 13.557487487792969, "learning_rate": 2.4321491302623377e-05, "loss": 1.8936, "step": 31810 }, { "epoch": 5.408805031446541, "grad_norm": 48.634910583496094, "learning_rate": 2.431865828092243e-05, "loss": 2.0942, "step": 31820 }, { "epoch": 5.410504844467108, "grad_norm": 16.52172088623047, "learning_rate": 2.4315825259221488e-05, "loss": 1.8763, "step": 31830 }, { "epoch": 5.412204657487677, "grad_norm": 17.30777931213379, "learning_rate": 2.431299223752054e-05, "loss": 2.2195, "step": 31840 }, { "epoch": 5.413904470508244, "grad_norm": 9.664292335510254, "learning_rate": 2.431015921581959e-05, "loss": 2.045, "step": 31850 }, { "epoch": 5.415604283528812, "grad_norm": 12.382792472839355, "learning_rate": 2.4307326194118648e-05, "loss": 2.1192, "step": 31860 }, { "epoch": 5.41730409654938, "grad_norm": 13.83986759185791, "learning_rate": 2.4304493172417702e-05, "loss": 2.0269, "step": 31870 }, { "epoch": 5.419003909569947, "grad_norm": 18.866735458374023, "learning_rate": 2.4301660150716752e-05, "loss": 2.0459, "step": 31880 }, { "epoch": 5.420703722590515, "grad_norm": 14.897253036499023, "learning_rate": 2.429882712901581e-05, "loss": 2.0619, "step": 31890 }, { "epoch": 5.422403535611083, "grad_norm": 13.293437957763672, "learning_rate": 2.4295994107314862e-05, "loss": 1.9723, "step": 31900 }, { "epoch": 5.42410334863165, "grad_norm": 13.245969772338867, "learning_rate": 2.4293161085613916e-05, "loss": 2.1094, "step": 31910 }, { "epoch": 5.425803161652218, "grad_norm": 13.2190580368042, "learning_rate": 2.429032806391297e-05, "loss": 2.0819, "step": 31920 }, { "epoch": 5.427502974672786, "grad_norm": 15.133106231689453, "learning_rate": 2.4287495042212023e-05, "loss": 1.885, "step": 31930 }, { "epoch": 5.429202787693354, "grad_norm": 13.942723274230957, "learning_rate": 2.4284662020511076e-05, "loss": 2.0138, "step": 31940 }, { "epoch": 5.430902600713922, "grad_norm": 15.033976554870605, "learning_rate": 2.4281828998810133e-05, "loss": 2.1915, "step": 31950 }, { "epoch": 5.4326024137344895, "grad_norm": 13.68939208984375, "learning_rate": 2.4278995977109183e-05, "loss": 1.9926, "step": 31960 }, { "epoch": 5.434302226755057, "grad_norm": 16.104524612426758, "learning_rate": 2.427616295540824e-05, "loss": 1.9855, "step": 31970 }, { "epoch": 5.436002039775625, "grad_norm": 15.612683296203613, "learning_rate": 2.4273329933707294e-05, "loss": 2.3265, "step": 31980 }, { "epoch": 5.4377018527961924, "grad_norm": 22.287181854248047, "learning_rate": 2.4270496912006347e-05, "loss": 1.8496, "step": 31990 }, { "epoch": 5.43940166581676, "grad_norm": 17.277822494506836, "learning_rate": 2.42676638903054e-05, "loss": 1.8392, "step": 32000 }, { "epoch": 5.441101478837328, "grad_norm": 18.50213623046875, "learning_rate": 2.4264830868604454e-05, "loss": 2.0149, "step": 32010 }, { "epoch": 5.442801291857895, "grad_norm": 12.158804893493652, "learning_rate": 2.4261997846903508e-05, "loss": 2.1709, "step": 32020 }, { "epoch": 5.444501104878463, "grad_norm": 18.404708862304688, "learning_rate": 2.4259164825202565e-05, "loss": 2.1459, "step": 32030 }, { "epoch": 5.446200917899031, "grad_norm": 20.64546012878418, "learning_rate": 2.4256331803501615e-05, "loss": 1.9961, "step": 32040 }, { "epoch": 5.447900730919599, "grad_norm": 16.860727310180664, "learning_rate": 2.425349878180067e-05, "loss": 2.0864, "step": 32050 }, { "epoch": 5.449600543940167, "grad_norm": 42.74403762817383, "learning_rate": 2.4250665760099725e-05, "loss": 1.777, "step": 32060 }, { "epoch": 5.451300356960735, "grad_norm": 16.17795181274414, "learning_rate": 2.4247832738398776e-05, "loss": 1.9757, "step": 32070 }, { "epoch": 5.453000169981302, "grad_norm": 13.985077857971191, "learning_rate": 2.424499971669783e-05, "loss": 2.1088, "step": 32080 }, { "epoch": 5.45469998300187, "grad_norm": 13.686083793640137, "learning_rate": 2.4242166694996886e-05, "loss": 2.072, "step": 32090 }, { "epoch": 5.4563997960224375, "grad_norm": 14.421215057373047, "learning_rate": 2.423933367329594e-05, "loss": 1.9896, "step": 32100 }, { "epoch": 5.458099609043005, "grad_norm": 15.166714668273926, "learning_rate": 2.423650065159499e-05, "loss": 2.2613, "step": 32110 }, { "epoch": 5.459799422063573, "grad_norm": 14.505013465881348, "learning_rate": 2.4233667629894047e-05, "loss": 1.9679, "step": 32120 }, { "epoch": 5.4614992350841405, "grad_norm": 10.415392875671387, "learning_rate": 2.42308346081931e-05, "loss": 2.1833, "step": 32130 }, { "epoch": 5.463199048104708, "grad_norm": 22.134206771850586, "learning_rate": 2.4228001586492154e-05, "loss": 1.915, "step": 32140 }, { "epoch": 5.464898861125276, "grad_norm": 15.08398723602295, "learning_rate": 2.4225168564791207e-05, "loss": 2.2485, "step": 32150 }, { "epoch": 5.466598674145844, "grad_norm": 16.62482452392578, "learning_rate": 2.422233554309026e-05, "loss": 2.0897, "step": 32160 }, { "epoch": 5.468298487166412, "grad_norm": 12.453364372253418, "learning_rate": 2.4219502521389314e-05, "loss": 2.0625, "step": 32170 }, { "epoch": 5.46999830018698, "grad_norm": 13.412162780761719, "learning_rate": 2.421666949968837e-05, "loss": 1.8392, "step": 32180 }, { "epoch": 5.471698113207547, "grad_norm": 14.145347595214844, "learning_rate": 2.421383647798742e-05, "loss": 2.0865, "step": 32190 }, { "epoch": 5.473397926228115, "grad_norm": 14.190401077270508, "learning_rate": 2.4211003456286475e-05, "loss": 2.0428, "step": 32200 }, { "epoch": 5.475097739248683, "grad_norm": 16.24627113342285, "learning_rate": 2.4208170434585532e-05, "loss": 2.0701, "step": 32210 }, { "epoch": 5.47679755226925, "grad_norm": 10.654953002929688, "learning_rate": 2.4205337412884582e-05, "loss": 1.9692, "step": 32220 }, { "epoch": 5.478497365289818, "grad_norm": 17.35126495361328, "learning_rate": 2.4202504391183635e-05, "loss": 1.6751, "step": 32230 }, { "epoch": 5.4801971783103856, "grad_norm": 16.325904846191406, "learning_rate": 2.4199671369482692e-05, "loss": 2.0936, "step": 32240 }, { "epoch": 5.481896991330953, "grad_norm": 15.48924446105957, "learning_rate": 2.4196838347781746e-05, "loss": 1.9181, "step": 32250 }, { "epoch": 5.483596804351522, "grad_norm": 16.065269470214844, "learning_rate": 2.4194005326080796e-05, "loss": 1.8754, "step": 32260 }, { "epoch": 5.485296617372089, "grad_norm": 13.577295303344727, "learning_rate": 2.4191172304379853e-05, "loss": 2.082, "step": 32270 }, { "epoch": 5.486996430392657, "grad_norm": 17.03448486328125, "learning_rate": 2.4188339282678906e-05, "loss": 2.1561, "step": 32280 }, { "epoch": 5.488696243413225, "grad_norm": 11.927349090576172, "learning_rate": 2.418550626097796e-05, "loss": 2.0534, "step": 32290 }, { "epoch": 5.490396056433792, "grad_norm": 10.880023002624512, "learning_rate": 2.4182673239277013e-05, "loss": 2.0476, "step": 32300 }, { "epoch": 5.49209586945436, "grad_norm": 20.29450225830078, "learning_rate": 2.4179840217576067e-05, "loss": 2.0156, "step": 32310 }, { "epoch": 5.493795682474928, "grad_norm": 14.51128101348877, "learning_rate": 2.417700719587512e-05, "loss": 1.748, "step": 32320 }, { "epoch": 5.495495495495495, "grad_norm": 16.613557815551758, "learning_rate": 2.4174174174174177e-05, "loss": 2.1745, "step": 32330 }, { "epoch": 5.497195308516063, "grad_norm": 12.173905372619629, "learning_rate": 2.4171341152473227e-05, "loss": 1.918, "step": 32340 }, { "epoch": 5.498895121536631, "grad_norm": 17.3160457611084, "learning_rate": 2.416850813077228e-05, "loss": 2.0018, "step": 32350 }, { "epoch": 5.500594934557199, "grad_norm": 14.244891166687012, "learning_rate": 2.4165675109071338e-05, "loss": 2.1323, "step": 32360 }, { "epoch": 5.502294747577767, "grad_norm": 11.763253211975098, "learning_rate": 2.416284208737039e-05, "loss": 2.1419, "step": 32370 }, { "epoch": 5.5039945605983345, "grad_norm": 10.473369598388672, "learning_rate": 2.416000906566944e-05, "loss": 2.0262, "step": 32380 }, { "epoch": 5.505694373618902, "grad_norm": 12.732915878295898, "learning_rate": 2.41571760439685e-05, "loss": 2.2395, "step": 32390 }, { "epoch": 5.50739418663947, "grad_norm": 12.86601448059082, "learning_rate": 2.4154343022267552e-05, "loss": 2.0385, "step": 32400 }, { "epoch": 5.509093999660037, "grad_norm": 10.054957389831543, "learning_rate": 2.4151510000566602e-05, "loss": 2.0466, "step": 32410 }, { "epoch": 5.510793812680605, "grad_norm": 8.920711517333984, "learning_rate": 2.414867697886566e-05, "loss": 2.253, "step": 32420 }, { "epoch": 5.512493625701173, "grad_norm": 12.441006660461426, "learning_rate": 2.4145843957164713e-05, "loss": 2.2666, "step": 32430 }, { "epoch": 5.51419343872174, "grad_norm": 14.194223403930664, "learning_rate": 2.4143010935463766e-05, "loss": 2.0766, "step": 32440 }, { "epoch": 5.515893251742308, "grad_norm": 15.88615608215332, "learning_rate": 2.414017791376282e-05, "loss": 2.1498, "step": 32450 }, { "epoch": 5.517593064762876, "grad_norm": 17.797481536865234, "learning_rate": 2.4137344892061873e-05, "loss": 2.1458, "step": 32460 }, { "epoch": 5.519292877783444, "grad_norm": 21.83942222595215, "learning_rate": 2.4134511870360927e-05, "loss": 1.7716, "step": 32470 }, { "epoch": 5.520992690804012, "grad_norm": 12.104551315307617, "learning_rate": 2.4131678848659984e-05, "loss": 1.9805, "step": 32480 }, { "epoch": 5.5226925038245795, "grad_norm": 11.56383228302002, "learning_rate": 2.4128845826959034e-05, "loss": 1.9192, "step": 32490 }, { "epoch": 5.524392316845147, "grad_norm": 16.10425567626953, "learning_rate": 2.4126012805258087e-05, "loss": 2.0726, "step": 32500 }, { "epoch": 5.526092129865715, "grad_norm": 12.801939010620117, "learning_rate": 2.4123179783557144e-05, "loss": 2.1551, "step": 32510 }, { "epoch": 5.5277919428862825, "grad_norm": 13.412434577941895, "learning_rate": 2.4120346761856198e-05, "loss": 1.9776, "step": 32520 }, { "epoch": 5.52949175590685, "grad_norm": 10.80190658569336, "learning_rate": 2.4117513740155248e-05, "loss": 1.9426, "step": 32530 }, { "epoch": 5.531191568927418, "grad_norm": 10.209626197814941, "learning_rate": 2.4114680718454305e-05, "loss": 1.8415, "step": 32540 }, { "epoch": 5.5328913819479855, "grad_norm": 11.754371643066406, "learning_rate": 2.4111847696753358e-05, "loss": 1.849, "step": 32550 }, { "epoch": 5.534591194968553, "grad_norm": 12.89746379852295, "learning_rate": 2.410901467505241e-05, "loss": 2.0585, "step": 32560 }, { "epoch": 5.536291007989121, "grad_norm": 13.86859130859375, "learning_rate": 2.4106181653351465e-05, "loss": 1.8877, "step": 32570 }, { "epoch": 5.537990821009689, "grad_norm": 15.000542640686035, "learning_rate": 2.410334863165052e-05, "loss": 1.9979, "step": 32580 }, { "epoch": 5.539690634030257, "grad_norm": 13.153345108032227, "learning_rate": 2.4100515609949572e-05, "loss": 1.8856, "step": 32590 }, { "epoch": 5.541390447050825, "grad_norm": 15.968968391418457, "learning_rate": 2.4097682588248626e-05, "loss": 2.0866, "step": 32600 }, { "epoch": 5.543090260071392, "grad_norm": 13.4451265335083, "learning_rate": 2.409484956654768e-05, "loss": 2.4, "step": 32610 }, { "epoch": 5.54479007309196, "grad_norm": 20.49262237548828, "learning_rate": 2.4092016544846733e-05, "loss": 1.919, "step": 32620 }, { "epoch": 5.546489886112528, "grad_norm": 9.317785263061523, "learning_rate": 2.408918352314579e-05, "loss": 2.1228, "step": 32630 }, { "epoch": 5.548189699133095, "grad_norm": 17.423051834106445, "learning_rate": 2.408635050144484e-05, "loss": 1.9462, "step": 32640 }, { "epoch": 5.549889512153663, "grad_norm": 20.441162109375, "learning_rate": 2.4083517479743893e-05, "loss": 1.9853, "step": 32650 }, { "epoch": 5.5515893251742305, "grad_norm": 18.545825958251953, "learning_rate": 2.408068445804295e-05, "loss": 1.9039, "step": 32660 }, { "epoch": 5.553289138194798, "grad_norm": 10.72579574584961, "learning_rate": 2.4077851436342004e-05, "loss": 1.7265, "step": 32670 }, { "epoch": 5.554988951215366, "grad_norm": 12.74655818939209, "learning_rate": 2.4075018414641054e-05, "loss": 1.994, "step": 32680 }, { "epoch": 5.556688764235934, "grad_norm": 16.926740646362305, "learning_rate": 2.407218539294011e-05, "loss": 2.0228, "step": 32690 }, { "epoch": 5.558388577256502, "grad_norm": 18.113325119018555, "learning_rate": 2.4069352371239164e-05, "loss": 2.0105, "step": 32700 }, { "epoch": 5.56008839027707, "grad_norm": 10.752408027648926, "learning_rate": 2.4066519349538218e-05, "loss": 2.0226, "step": 32710 }, { "epoch": 5.561788203297637, "grad_norm": 16.92767906188965, "learning_rate": 2.406368632783727e-05, "loss": 2.1148, "step": 32720 }, { "epoch": 5.563488016318205, "grad_norm": 14.090277671813965, "learning_rate": 2.4060853306136325e-05, "loss": 2.0435, "step": 32730 }, { "epoch": 5.565187829338773, "grad_norm": 20.288225173950195, "learning_rate": 2.4058020284435382e-05, "loss": 1.9112, "step": 32740 }, { "epoch": 5.56688764235934, "grad_norm": 14.685218811035156, "learning_rate": 2.4055187262734432e-05, "loss": 1.9946, "step": 32750 }, { "epoch": 5.568587455379908, "grad_norm": 13.889580726623535, "learning_rate": 2.4052354241033486e-05, "loss": 2.0239, "step": 32760 }, { "epoch": 5.570287268400476, "grad_norm": 13.272894859313965, "learning_rate": 2.4049521219332542e-05, "loss": 2.1175, "step": 32770 }, { "epoch": 5.571987081421044, "grad_norm": 11.6832857131958, "learning_rate": 2.4046688197631596e-05, "loss": 1.9354, "step": 32780 }, { "epoch": 5.573686894441612, "grad_norm": 16.988311767578125, "learning_rate": 2.4043855175930646e-05, "loss": 2.1252, "step": 32790 }, { "epoch": 5.5753867074621795, "grad_norm": 15.239973068237305, "learning_rate": 2.4041022154229703e-05, "loss": 2.0253, "step": 32800 }, { "epoch": 5.577086520482747, "grad_norm": 13.708240509033203, "learning_rate": 2.4038189132528757e-05, "loss": 2.0783, "step": 32810 }, { "epoch": 5.578786333503315, "grad_norm": 13.724244117736816, "learning_rate": 2.403535611082781e-05, "loss": 1.9797, "step": 32820 }, { "epoch": 5.580486146523882, "grad_norm": 9.430720329284668, "learning_rate": 2.4032523089126864e-05, "loss": 2.0512, "step": 32830 }, { "epoch": 5.58218595954445, "grad_norm": 14.225613594055176, "learning_rate": 2.4029690067425917e-05, "loss": 2.1615, "step": 32840 }, { "epoch": 5.583885772565018, "grad_norm": 15.484753608703613, "learning_rate": 2.402685704572497e-05, "loss": 1.8686, "step": 32850 }, { "epoch": 5.585585585585585, "grad_norm": 15.726289749145508, "learning_rate": 2.4024024024024028e-05, "loss": 1.9956, "step": 32860 }, { "epoch": 5.587285398606153, "grad_norm": 19.725582122802734, "learning_rate": 2.4021191002323078e-05, "loss": 2.0739, "step": 32870 }, { "epoch": 5.588985211626721, "grad_norm": 11.253401756286621, "learning_rate": 2.401835798062213e-05, "loss": 2.0208, "step": 32880 }, { "epoch": 5.590685024647289, "grad_norm": 17.352069854736328, "learning_rate": 2.4015524958921188e-05, "loss": 2.0252, "step": 32890 }, { "epoch": 5.592384837667857, "grad_norm": 11.800569534301758, "learning_rate": 2.401269193722024e-05, "loss": 1.799, "step": 32900 }, { "epoch": 5.5940846506884245, "grad_norm": 14.949400901794434, "learning_rate": 2.4009858915519292e-05, "loss": 2.1522, "step": 32910 }, { "epoch": 5.595784463708992, "grad_norm": 15.508347511291504, "learning_rate": 2.400702589381835e-05, "loss": 1.9941, "step": 32920 }, { "epoch": 5.59748427672956, "grad_norm": 15.11835765838623, "learning_rate": 2.4004192872117402e-05, "loss": 1.8228, "step": 32930 }, { "epoch": 5.5991840897501275, "grad_norm": 12.644542694091797, "learning_rate": 2.4001359850416452e-05, "loss": 2.1578, "step": 32940 }, { "epoch": 5.600883902770695, "grad_norm": 10.7555513381958, "learning_rate": 2.399852682871551e-05, "loss": 2.0236, "step": 32950 }, { "epoch": 5.602583715791263, "grad_norm": 16.866147994995117, "learning_rate": 2.3995693807014563e-05, "loss": 1.9427, "step": 32960 }, { "epoch": 5.6042835288118305, "grad_norm": 10.66411018371582, "learning_rate": 2.3992860785313616e-05, "loss": 1.9734, "step": 32970 }, { "epoch": 5.605983341832398, "grad_norm": 14.38674259185791, "learning_rate": 2.399002776361267e-05, "loss": 2.127, "step": 32980 }, { "epoch": 5.607683154852966, "grad_norm": 14.91036319732666, "learning_rate": 2.3987194741911723e-05, "loss": 2.1225, "step": 32990 }, { "epoch": 5.609382967873534, "grad_norm": 18.896177291870117, "learning_rate": 2.3984361720210777e-05, "loss": 2.071, "step": 33000 }, { "epoch": 5.611082780894102, "grad_norm": 14.76530933380127, "learning_rate": 2.3981528698509834e-05, "loss": 2.1391, "step": 33010 }, { "epoch": 5.61278259391467, "grad_norm": 11.762969017028809, "learning_rate": 2.3978695676808884e-05, "loss": 1.9197, "step": 33020 }, { "epoch": 5.614482406935237, "grad_norm": 17.125762939453125, "learning_rate": 2.3975862655107937e-05, "loss": 1.9085, "step": 33030 }, { "epoch": 5.616182219955805, "grad_norm": 14.759276390075684, "learning_rate": 2.3973029633406994e-05, "loss": 2.1727, "step": 33040 }, { "epoch": 5.617882032976373, "grad_norm": 10.82313060760498, "learning_rate": 2.3970196611706048e-05, "loss": 2.0412, "step": 33050 }, { "epoch": 5.61958184599694, "grad_norm": 14.468134880065918, "learning_rate": 2.3967363590005098e-05, "loss": 1.9678, "step": 33060 }, { "epoch": 5.621281659017508, "grad_norm": 17.508939743041992, "learning_rate": 2.3964530568304155e-05, "loss": 1.9612, "step": 33070 }, { "epoch": 5.6229814720380755, "grad_norm": 15.58491325378418, "learning_rate": 2.396169754660321e-05, "loss": 1.9982, "step": 33080 }, { "epoch": 5.624681285058643, "grad_norm": 13.32664680480957, "learning_rate": 2.395886452490226e-05, "loss": 1.9394, "step": 33090 }, { "epoch": 5.626381098079211, "grad_norm": 25.220577239990234, "learning_rate": 2.3956031503201315e-05, "loss": 1.8533, "step": 33100 }, { "epoch": 5.628080911099779, "grad_norm": 11.930164337158203, "learning_rate": 2.395319848150037e-05, "loss": 2.2082, "step": 33110 }, { "epoch": 5.629780724120347, "grad_norm": 16.130531311035156, "learning_rate": 2.3950365459799423e-05, "loss": 2.1246, "step": 33120 }, { "epoch": 5.631480537140915, "grad_norm": 11.47224235534668, "learning_rate": 2.3947532438098476e-05, "loss": 2.0567, "step": 33130 }, { "epoch": 5.633180350161482, "grad_norm": 12.531076431274414, "learning_rate": 2.394469941639753e-05, "loss": 1.9473, "step": 33140 }, { "epoch": 5.63488016318205, "grad_norm": 11.320317268371582, "learning_rate": 2.3941866394696583e-05, "loss": 1.8939, "step": 33150 }, { "epoch": 5.636579976202618, "grad_norm": 24.84147071838379, "learning_rate": 2.393903337299564e-05, "loss": 1.8764, "step": 33160 }, { "epoch": 5.638279789223185, "grad_norm": 11.398984909057617, "learning_rate": 2.393620035129469e-05, "loss": 2.0747, "step": 33170 }, { "epoch": 5.639979602243753, "grad_norm": 14.45632266998291, "learning_rate": 2.3933367329593744e-05, "loss": 2.1922, "step": 33180 }, { "epoch": 5.641679415264321, "grad_norm": 15.201630592346191, "learning_rate": 2.39305343078928e-05, "loss": 2.2492, "step": 33190 }, { "epoch": 5.643379228284889, "grad_norm": 15.233281135559082, "learning_rate": 2.3927701286191854e-05, "loss": 2.3023, "step": 33200 }, { "epoch": 5.645079041305456, "grad_norm": 16.867677688598633, "learning_rate": 2.3924868264490904e-05, "loss": 2.1375, "step": 33210 }, { "epoch": 5.646778854326024, "grad_norm": 11.094898223876953, "learning_rate": 2.392203524278996e-05, "loss": 2.1157, "step": 33220 }, { "epoch": 5.648478667346592, "grad_norm": 12.03604793548584, "learning_rate": 2.3919202221089015e-05, "loss": 1.8412, "step": 33230 }, { "epoch": 5.65017848036716, "grad_norm": 14.720685005187988, "learning_rate": 2.3916369199388068e-05, "loss": 2.208, "step": 33240 }, { "epoch": 5.651878293387727, "grad_norm": 16.299325942993164, "learning_rate": 2.391353617768712e-05, "loss": 1.9494, "step": 33250 }, { "epoch": 5.653578106408295, "grad_norm": 13.1323823928833, "learning_rate": 2.3910703155986175e-05, "loss": 2.0717, "step": 33260 }, { "epoch": 5.655277919428863, "grad_norm": 15.922279357910156, "learning_rate": 2.390787013428523e-05, "loss": 2.1318, "step": 33270 }, { "epoch": 5.65697773244943, "grad_norm": 14.312005996704102, "learning_rate": 2.3905037112584282e-05, "loss": 2.0628, "step": 33280 }, { "epoch": 5.658677545469998, "grad_norm": 12.366864204406738, "learning_rate": 2.3902204090883336e-05, "loss": 2.0334, "step": 33290 }, { "epoch": 5.660377358490566, "grad_norm": 13.488964080810547, "learning_rate": 2.389937106918239e-05, "loss": 1.9779, "step": 33300 }, { "epoch": 5.662077171511134, "grad_norm": 14.946070671081543, "learning_rate": 2.3896538047481446e-05, "loss": 1.8515, "step": 33310 }, { "epoch": 5.663776984531702, "grad_norm": 14.634360313415527, "learning_rate": 2.3893705025780496e-05, "loss": 2.0969, "step": 33320 }, { "epoch": 5.6654767975522695, "grad_norm": 17.193313598632812, "learning_rate": 2.389087200407955e-05, "loss": 2.0717, "step": 33330 }, { "epoch": 5.667176610572837, "grad_norm": 13.238082885742188, "learning_rate": 2.3888038982378607e-05, "loss": 1.9221, "step": 33340 }, { "epoch": 5.668876423593405, "grad_norm": 21.568506240844727, "learning_rate": 2.388520596067766e-05, "loss": 2.0853, "step": 33350 }, { "epoch": 5.6705762366139725, "grad_norm": 17.410043716430664, "learning_rate": 2.388237293897671e-05, "loss": 2.0604, "step": 33360 }, { "epoch": 5.67227604963454, "grad_norm": 19.21595573425293, "learning_rate": 2.3879539917275767e-05, "loss": 2.0913, "step": 33370 }, { "epoch": 5.673975862655108, "grad_norm": 15.758225440979004, "learning_rate": 2.387670689557482e-05, "loss": 1.8687, "step": 33380 }, { "epoch": 5.675675675675675, "grad_norm": 13.359732627868652, "learning_rate": 2.3873873873873874e-05, "loss": 2.1654, "step": 33390 }, { "epoch": 5.677375488696243, "grad_norm": 11.241922378540039, "learning_rate": 2.3871040852172928e-05, "loss": 2.0041, "step": 33400 }, { "epoch": 5.679075301716811, "grad_norm": 10.733206748962402, "learning_rate": 2.386820783047198e-05, "loss": 1.9244, "step": 33410 }, { "epoch": 5.680775114737379, "grad_norm": 22.355390548706055, "learning_rate": 2.3865374808771035e-05, "loss": 2.0169, "step": 33420 }, { "epoch": 5.682474927757947, "grad_norm": 16.268911361694336, "learning_rate": 2.386254178707009e-05, "loss": 1.8934, "step": 33430 }, { "epoch": 5.684174740778515, "grad_norm": 15.915862083435059, "learning_rate": 2.3859708765369142e-05, "loss": 1.7664, "step": 33440 }, { "epoch": 5.685874553799082, "grad_norm": 14.061205863952637, "learning_rate": 2.38568757436682e-05, "loss": 2.1016, "step": 33450 }, { "epoch": 5.68757436681965, "grad_norm": 22.735292434692383, "learning_rate": 2.3854042721967252e-05, "loss": 2.0109, "step": 33460 }, { "epoch": 5.6892741798402175, "grad_norm": 16.024995803833008, "learning_rate": 2.3851209700266303e-05, "loss": 2.1064, "step": 33470 }, { "epoch": 5.690973992860785, "grad_norm": 11.8131685256958, "learning_rate": 2.384837667856536e-05, "loss": 1.8762, "step": 33480 }, { "epoch": 5.692673805881353, "grad_norm": 25.938282012939453, "learning_rate": 2.3845543656864413e-05, "loss": 1.9025, "step": 33490 }, { "epoch": 5.6943736189019205, "grad_norm": 18.812236785888672, "learning_rate": 2.3842710635163467e-05, "loss": 1.9026, "step": 33500 }, { "epoch": 5.696073431922488, "grad_norm": 11.019416809082031, "learning_rate": 2.383987761346252e-05, "loss": 1.7835, "step": 33510 }, { "epoch": 5.697773244943056, "grad_norm": 22.032636642456055, "learning_rate": 2.3837044591761574e-05, "loss": 1.9324, "step": 33520 }, { "epoch": 5.699473057963624, "grad_norm": 16.758811950683594, "learning_rate": 2.3834211570060627e-05, "loss": 2.1342, "step": 33530 }, { "epoch": 5.701172870984192, "grad_norm": 12.532044410705566, "learning_rate": 2.3831378548359684e-05, "loss": 2.2033, "step": 33540 }, { "epoch": 5.70287268400476, "grad_norm": 20.287113189697266, "learning_rate": 2.3828545526658734e-05, "loss": 2.1146, "step": 33550 }, { "epoch": 5.704572497025327, "grad_norm": 12.247176170349121, "learning_rate": 2.3825712504957788e-05, "loss": 2.0558, "step": 33560 }, { "epoch": 5.706272310045895, "grad_norm": 15.230327606201172, "learning_rate": 2.3822879483256845e-05, "loss": 2.021, "step": 33570 }, { "epoch": 5.707972123066463, "grad_norm": 12.648114204406738, "learning_rate": 2.3820046461555898e-05, "loss": 1.8803, "step": 33580 }, { "epoch": 5.70967193608703, "grad_norm": 12.726792335510254, "learning_rate": 2.3817213439854948e-05, "loss": 2.0142, "step": 33590 }, { "epoch": 5.711371749107598, "grad_norm": 11.88044261932373, "learning_rate": 2.3814380418154005e-05, "loss": 2.0621, "step": 33600 }, { "epoch": 5.713071562128166, "grad_norm": 13.66561508178711, "learning_rate": 2.381154739645306e-05, "loss": 2.0821, "step": 33610 }, { "epoch": 5.714771375148734, "grad_norm": 13.609156608581543, "learning_rate": 2.380871437475211e-05, "loss": 2.1145, "step": 33620 }, { "epoch": 5.716471188169301, "grad_norm": 20.843854904174805, "learning_rate": 2.3805881353051166e-05, "loss": 1.8254, "step": 33630 }, { "epoch": 5.718171001189869, "grad_norm": 13.211363792419434, "learning_rate": 2.380304833135022e-05, "loss": 1.9721, "step": 33640 }, { "epoch": 5.719870814210437, "grad_norm": 22.96099853515625, "learning_rate": 2.3800215309649273e-05, "loss": 1.837, "step": 33650 }, { "epoch": 5.721570627231005, "grad_norm": 13.376564025878906, "learning_rate": 2.3797382287948326e-05, "loss": 1.9735, "step": 33660 }, { "epoch": 5.723270440251572, "grad_norm": 14.837014198303223, "learning_rate": 2.379454926624738e-05, "loss": 1.9927, "step": 33670 }, { "epoch": 5.72497025327214, "grad_norm": 18.22860336303711, "learning_rate": 2.3791716244546433e-05, "loss": 1.8719, "step": 33680 }, { "epoch": 5.726670066292708, "grad_norm": 92.044921875, "learning_rate": 2.378888322284549e-05, "loss": 2.1535, "step": 33690 }, { "epoch": 5.728369879313275, "grad_norm": 32.99443435668945, "learning_rate": 2.378605020114454e-05, "loss": 1.9171, "step": 33700 }, { "epoch": 5.730069692333843, "grad_norm": 15.450225830078125, "learning_rate": 2.3783217179443594e-05, "loss": 1.8702, "step": 33710 }, { "epoch": 5.731769505354411, "grad_norm": 12.912727355957031, "learning_rate": 2.378038415774265e-05, "loss": 2.1322, "step": 33720 }, { "epoch": 5.733469318374979, "grad_norm": 18.681020736694336, "learning_rate": 2.3777551136041704e-05, "loss": 1.9481, "step": 33730 }, { "epoch": 5.735169131395547, "grad_norm": 12.483762741088867, "learning_rate": 2.3774718114340754e-05, "loss": 2.0337, "step": 33740 }, { "epoch": 5.7368689444161145, "grad_norm": 12.100369453430176, "learning_rate": 2.377188509263981e-05, "loss": 2.2654, "step": 33750 }, { "epoch": 5.738568757436682, "grad_norm": 12.104548454284668, "learning_rate": 2.3769052070938865e-05, "loss": 1.9318, "step": 33760 }, { "epoch": 5.74026857045725, "grad_norm": 15.161422729492188, "learning_rate": 2.3766219049237915e-05, "loss": 1.918, "step": 33770 }, { "epoch": 5.7419683834778175, "grad_norm": 18.632774353027344, "learning_rate": 2.3763386027536972e-05, "loss": 2.0461, "step": 33780 }, { "epoch": 5.743668196498385, "grad_norm": 14.698527336120605, "learning_rate": 2.3760553005836025e-05, "loss": 1.9251, "step": 33790 }, { "epoch": 5.745368009518953, "grad_norm": 11.340134620666504, "learning_rate": 2.375771998413508e-05, "loss": 1.9771, "step": 33800 }, { "epoch": 5.74706782253952, "grad_norm": 11.842971801757812, "learning_rate": 2.3754886962434132e-05, "loss": 2.0396, "step": 33810 }, { "epoch": 5.748767635560088, "grad_norm": 10.400971412658691, "learning_rate": 2.3752053940733186e-05, "loss": 2.1605, "step": 33820 }, { "epoch": 5.750467448580656, "grad_norm": 25.56438636779785, "learning_rate": 2.374922091903224e-05, "loss": 1.7246, "step": 33830 }, { "epoch": 5.752167261601224, "grad_norm": 11.735442161560059, "learning_rate": 2.3746387897331296e-05, "loss": 1.8843, "step": 33840 }, { "epoch": 5.753867074621792, "grad_norm": 13.902467727661133, "learning_rate": 2.3743554875630347e-05, "loss": 2.0323, "step": 33850 }, { "epoch": 5.75556688764236, "grad_norm": 15.74985408782959, "learning_rate": 2.37407218539294e-05, "loss": 1.9147, "step": 33860 }, { "epoch": 5.757266700662927, "grad_norm": 11.177862167358398, "learning_rate": 2.3737888832228457e-05, "loss": 2.0078, "step": 33870 }, { "epoch": 5.758966513683495, "grad_norm": 14.294594764709473, "learning_rate": 2.373505581052751e-05, "loss": 2.0836, "step": 33880 }, { "epoch": 5.7606663267040625, "grad_norm": 16.48248863220215, "learning_rate": 2.373222278882656e-05, "loss": 1.951, "step": 33890 }, { "epoch": 5.76236613972463, "grad_norm": 16.514713287353516, "learning_rate": 2.3729389767125618e-05, "loss": 2.2194, "step": 33900 }, { "epoch": 5.764065952745198, "grad_norm": 18.495254516601562, "learning_rate": 2.372655674542467e-05, "loss": 2.0012, "step": 33910 }, { "epoch": 5.7657657657657655, "grad_norm": 15.75590705871582, "learning_rate": 2.3723723723723725e-05, "loss": 2.076, "step": 33920 }, { "epoch": 5.767465578786333, "grad_norm": 15.218550682067871, "learning_rate": 2.3720890702022778e-05, "loss": 1.9562, "step": 33930 }, { "epoch": 5.769165391806901, "grad_norm": 16.64584732055664, "learning_rate": 2.371805768032183e-05, "loss": 1.9479, "step": 33940 }, { "epoch": 5.770865204827469, "grad_norm": 16.328874588012695, "learning_rate": 2.3715224658620885e-05, "loss": 1.7288, "step": 33950 }, { "epoch": 5.772565017848037, "grad_norm": 11.260807991027832, "learning_rate": 2.371239163691994e-05, "loss": 2.1177, "step": 33960 }, { "epoch": 5.774264830868605, "grad_norm": 14.54265022277832, "learning_rate": 2.3709558615218992e-05, "loss": 2.0041, "step": 33970 }, { "epoch": 5.775964643889172, "grad_norm": 18.074565887451172, "learning_rate": 2.3706725593518046e-05, "loss": 1.9756, "step": 33980 }, { "epoch": 5.77766445690974, "grad_norm": 13.755796432495117, "learning_rate": 2.3703892571817103e-05, "loss": 1.9281, "step": 33990 }, { "epoch": 5.779364269930308, "grad_norm": 15.039931297302246, "learning_rate": 2.3701059550116153e-05, "loss": 1.9552, "step": 34000 }, { "epoch": 5.781064082950875, "grad_norm": 17.284719467163086, "learning_rate": 2.3698226528415206e-05, "loss": 1.9964, "step": 34010 }, { "epoch": 5.782763895971443, "grad_norm": 12.963733673095703, "learning_rate": 2.3695393506714263e-05, "loss": 2.1623, "step": 34020 }, { "epoch": 5.784463708992011, "grad_norm": 15.108051300048828, "learning_rate": 2.3692560485013317e-05, "loss": 2.078, "step": 34030 }, { "epoch": 5.786163522012579, "grad_norm": 13.922835350036621, "learning_rate": 2.3689727463312367e-05, "loss": 2.227, "step": 34040 }, { "epoch": 5.787863335033146, "grad_norm": 11.88160514831543, "learning_rate": 2.3686894441611424e-05, "loss": 2.2073, "step": 34050 }, { "epoch": 5.789563148053714, "grad_norm": 15.185197830200195, "learning_rate": 2.3684061419910477e-05, "loss": 2.0727, "step": 34060 }, { "epoch": 5.791262961074282, "grad_norm": 15.302931785583496, "learning_rate": 2.368122839820953e-05, "loss": 2.1705, "step": 34070 }, { "epoch": 5.79296277409485, "grad_norm": 13.381118774414062, "learning_rate": 2.3678395376508584e-05, "loss": 1.9426, "step": 34080 }, { "epoch": 5.794662587115417, "grad_norm": 11.816638946533203, "learning_rate": 2.3675562354807638e-05, "loss": 2.2936, "step": 34090 }, { "epoch": 5.796362400135985, "grad_norm": 14.589153289794922, "learning_rate": 2.367272933310669e-05, "loss": 2.2331, "step": 34100 }, { "epoch": 5.798062213156553, "grad_norm": 13.154232025146484, "learning_rate": 2.3669896311405748e-05, "loss": 2.1241, "step": 34110 }, { "epoch": 5.79976202617712, "grad_norm": 15.237869262695312, "learning_rate": 2.36670632897048e-05, "loss": 1.8332, "step": 34120 }, { "epoch": 5.801461839197688, "grad_norm": 13.298994064331055, "learning_rate": 2.3664230268003852e-05, "loss": 2.1869, "step": 34130 }, { "epoch": 5.803161652218256, "grad_norm": 29.810401916503906, "learning_rate": 2.366139724630291e-05, "loss": 2.044, "step": 34140 }, { "epoch": 5.804861465238824, "grad_norm": 10.65566635131836, "learning_rate": 2.365856422460196e-05, "loss": 1.8372, "step": 34150 }, { "epoch": 5.806561278259392, "grad_norm": 15.334824562072754, "learning_rate": 2.3655731202901016e-05, "loss": 1.929, "step": 34160 }, { "epoch": 5.8082610912799595, "grad_norm": 11.49036693572998, "learning_rate": 2.365289818120007e-05, "loss": 2.0382, "step": 34170 }, { "epoch": 5.809960904300527, "grad_norm": 20.191720962524414, "learning_rate": 2.3650065159499123e-05, "loss": 1.9273, "step": 34180 }, { "epoch": 5.811660717321095, "grad_norm": 12.398641586303711, "learning_rate": 2.3647232137798176e-05, "loss": 1.9501, "step": 34190 }, { "epoch": 5.8133605303416624, "grad_norm": 11.400586128234863, "learning_rate": 2.364439911609723e-05, "loss": 1.9475, "step": 34200 }, { "epoch": 5.81506034336223, "grad_norm": 15.406583786010742, "learning_rate": 2.3641566094396283e-05, "loss": 1.7609, "step": 34210 }, { "epoch": 5.816760156382798, "grad_norm": 13.750529289245605, "learning_rate": 2.363873307269534e-05, "loss": 1.8079, "step": 34220 }, { "epoch": 5.818459969403365, "grad_norm": 13.071441650390625, "learning_rate": 2.363590005099439e-05, "loss": 2.035, "step": 34230 }, { "epoch": 5.820159782423933, "grad_norm": 15.11001205444336, "learning_rate": 2.3633067029293444e-05, "loss": 2.1499, "step": 34240 }, { "epoch": 5.821859595444501, "grad_norm": 12.493179321289062, "learning_rate": 2.36302340075925e-05, "loss": 2.1489, "step": 34250 }, { "epoch": 5.823559408465069, "grad_norm": 17.594388961791992, "learning_rate": 2.3627400985891554e-05, "loss": 2.0301, "step": 34260 }, { "epoch": 5.825259221485637, "grad_norm": 14.622693061828613, "learning_rate": 2.3624567964190605e-05, "loss": 2.0992, "step": 34270 }, { "epoch": 5.826959034506205, "grad_norm": 12.564570426940918, "learning_rate": 2.362173494248966e-05, "loss": 1.9559, "step": 34280 }, { "epoch": 5.828658847526772, "grad_norm": 16.490550994873047, "learning_rate": 2.3618901920788715e-05, "loss": 2.1293, "step": 34290 }, { "epoch": 5.83035866054734, "grad_norm": 18.676443099975586, "learning_rate": 2.3616068899087765e-05, "loss": 2.135, "step": 34300 }, { "epoch": 5.8320584735679075, "grad_norm": 19.721668243408203, "learning_rate": 2.3613235877386822e-05, "loss": 1.9896, "step": 34310 }, { "epoch": 5.833758286588475, "grad_norm": 17.117979049682617, "learning_rate": 2.3610402855685876e-05, "loss": 1.9304, "step": 34320 }, { "epoch": 5.835458099609043, "grad_norm": 11.254862785339355, "learning_rate": 2.360756983398493e-05, "loss": 2.1861, "step": 34330 }, { "epoch": 5.8371579126296105, "grad_norm": 10.25040340423584, "learning_rate": 2.3604736812283983e-05, "loss": 1.9215, "step": 34340 }, { "epoch": 5.838857725650178, "grad_norm": 15.247342109680176, "learning_rate": 2.3601903790583036e-05, "loss": 1.8858, "step": 34350 }, { "epoch": 5.840557538670746, "grad_norm": 11.149075508117676, "learning_rate": 2.359907076888209e-05, "loss": 2.0243, "step": 34360 }, { "epoch": 5.842257351691314, "grad_norm": 13.02056884765625, "learning_rate": 2.3596237747181147e-05, "loss": 2.0437, "step": 34370 }, { "epoch": 5.843957164711882, "grad_norm": 32.94341278076172, "learning_rate": 2.3593404725480197e-05, "loss": 1.9695, "step": 34380 }, { "epoch": 5.84565697773245, "grad_norm": 12.516159057617188, "learning_rate": 2.359057170377925e-05, "loss": 2.1866, "step": 34390 }, { "epoch": 5.847356790753017, "grad_norm": 13.629546165466309, "learning_rate": 2.3587738682078307e-05, "loss": 2.0772, "step": 34400 }, { "epoch": 5.849056603773585, "grad_norm": 13.54809284210205, "learning_rate": 2.358490566037736e-05, "loss": 2.0165, "step": 34410 }, { "epoch": 5.850756416794153, "grad_norm": 16.556177139282227, "learning_rate": 2.358207263867641e-05, "loss": 1.8603, "step": 34420 }, { "epoch": 5.85245622981472, "grad_norm": 14.633132934570312, "learning_rate": 2.3579239616975468e-05, "loss": 1.9831, "step": 34430 }, { "epoch": 5.854156042835288, "grad_norm": 13.874079704284668, "learning_rate": 2.357640659527452e-05, "loss": 1.856, "step": 34440 }, { "epoch": 5.8558558558558556, "grad_norm": 13.530169486999512, "learning_rate": 2.3573573573573575e-05, "loss": 1.9299, "step": 34450 }, { "epoch": 5.857555668876423, "grad_norm": 26.879287719726562, "learning_rate": 2.357074055187263e-05, "loss": 2.0065, "step": 34460 }, { "epoch": 5.859255481896991, "grad_norm": 17.814388275146484, "learning_rate": 2.3567907530171682e-05, "loss": 1.9932, "step": 34470 }, { "epoch": 5.860955294917559, "grad_norm": 16.39439582824707, "learning_rate": 2.3565074508470735e-05, "loss": 1.9575, "step": 34480 }, { "epoch": 5.862655107938127, "grad_norm": 12.522747039794922, "learning_rate": 2.356224148676979e-05, "loss": 2.1327, "step": 34490 }, { "epoch": 5.864354920958695, "grad_norm": 11.848525047302246, "learning_rate": 2.3559408465068842e-05, "loss": 2.1351, "step": 34500 }, { "epoch": 5.866054733979262, "grad_norm": 11.678247451782227, "learning_rate": 2.3556575443367896e-05, "loss": 1.8394, "step": 34510 }, { "epoch": 5.86775454699983, "grad_norm": 16.267242431640625, "learning_rate": 2.3553742421666953e-05, "loss": 1.9531, "step": 34520 }, { "epoch": 5.869454360020398, "grad_norm": 11.30725383758545, "learning_rate": 2.3550909399966003e-05, "loss": 2.0454, "step": 34530 }, { "epoch": 5.871154173040965, "grad_norm": 11.042766571044922, "learning_rate": 2.3548076378265056e-05, "loss": 2.3469, "step": 34540 }, { "epoch": 5.872853986061533, "grad_norm": 14.632050514221191, "learning_rate": 2.3545243356564113e-05, "loss": 1.9496, "step": 34550 }, { "epoch": 5.874553799082101, "grad_norm": 26.934268951416016, "learning_rate": 2.3542410334863167e-05, "loss": 2.1578, "step": 34560 }, { "epoch": 5.876253612102669, "grad_norm": 13.82975959777832, "learning_rate": 2.3539577313162217e-05, "loss": 1.889, "step": 34570 }, { "epoch": 5.877953425123237, "grad_norm": 15.289408683776855, "learning_rate": 2.3536744291461274e-05, "loss": 1.9307, "step": 34580 }, { "epoch": 5.8796532381438045, "grad_norm": 10.32520580291748, "learning_rate": 2.3533911269760327e-05, "loss": 1.9613, "step": 34590 }, { "epoch": 5.881353051164372, "grad_norm": 17.510791778564453, "learning_rate": 2.353107824805938e-05, "loss": 1.9885, "step": 34600 }, { "epoch": 5.88305286418494, "grad_norm": 26.165708541870117, "learning_rate": 2.3528245226358435e-05, "loss": 1.8745, "step": 34610 }, { "epoch": 5.884752677205507, "grad_norm": 15.099791526794434, "learning_rate": 2.3525412204657488e-05, "loss": 1.8964, "step": 34620 }, { "epoch": 5.886452490226075, "grad_norm": 16.84368324279785, "learning_rate": 2.352257918295654e-05, "loss": 2.112, "step": 34630 }, { "epoch": 5.888152303246643, "grad_norm": 17.546916961669922, "learning_rate": 2.3519746161255595e-05, "loss": 2.1686, "step": 34640 }, { "epoch": 5.88985211626721, "grad_norm": 32.354896545410156, "learning_rate": 2.351691313955465e-05, "loss": 1.9894, "step": 34650 }, { "epoch": 5.891551929287778, "grad_norm": 13.197976112365723, "learning_rate": 2.3514080117853702e-05, "loss": 2.1399, "step": 34660 }, { "epoch": 5.893251742308346, "grad_norm": 14.534446716308594, "learning_rate": 2.351124709615276e-05, "loss": 2.0183, "step": 34670 }, { "epoch": 5.894951555328914, "grad_norm": 13.218777656555176, "learning_rate": 2.350841407445181e-05, "loss": 2.0927, "step": 34680 }, { "epoch": 5.896651368349482, "grad_norm": 12.865711212158203, "learning_rate": 2.3505581052750863e-05, "loss": 1.9379, "step": 34690 }, { "epoch": 5.8983511813700495, "grad_norm": 17.6625919342041, "learning_rate": 2.350274803104992e-05, "loss": 2.0587, "step": 34700 }, { "epoch": 5.900050994390617, "grad_norm": 14.628046035766602, "learning_rate": 2.3499915009348973e-05, "loss": 1.8039, "step": 34710 }, { "epoch": 5.901750807411185, "grad_norm": 16.918725967407227, "learning_rate": 2.3497081987648023e-05, "loss": 1.9447, "step": 34720 }, { "epoch": 5.9034506204317525, "grad_norm": 11.447916984558105, "learning_rate": 2.349424896594708e-05, "loss": 2.0149, "step": 34730 }, { "epoch": 5.90515043345232, "grad_norm": 12.661171913146973, "learning_rate": 2.3491415944246134e-05, "loss": 2.0625, "step": 34740 }, { "epoch": 5.906850246472888, "grad_norm": 14.030714988708496, "learning_rate": 2.3488582922545187e-05, "loss": 1.9153, "step": 34750 }, { "epoch": 5.9085500594934555, "grad_norm": 17.46331214904785, "learning_rate": 2.348574990084424e-05, "loss": 2.0185, "step": 34760 }, { "epoch": 5.910249872514023, "grad_norm": 15.183964729309082, "learning_rate": 2.3482916879143294e-05, "loss": 2.0044, "step": 34770 }, { "epoch": 5.911949685534591, "grad_norm": 11.548332214355469, "learning_rate": 2.3480083857442348e-05, "loss": 2.0416, "step": 34780 }, { "epoch": 5.913649498555159, "grad_norm": 8.78765869140625, "learning_rate": 2.3477250835741405e-05, "loss": 2.1088, "step": 34790 }, { "epoch": 5.915349311575727, "grad_norm": 13.952073097229004, "learning_rate": 2.3474417814040455e-05, "loss": 2.0683, "step": 34800 }, { "epoch": 5.917049124596295, "grad_norm": 13.42302131652832, "learning_rate": 2.347158479233951e-05, "loss": 1.9266, "step": 34810 }, { "epoch": 5.918748937616862, "grad_norm": 12.133112907409668, "learning_rate": 2.3468751770638565e-05, "loss": 1.9814, "step": 34820 }, { "epoch": 5.92044875063743, "grad_norm": 13.235557556152344, "learning_rate": 2.3465918748937615e-05, "loss": 2.2415, "step": 34830 }, { "epoch": 5.922148563657998, "grad_norm": 11.15994644165039, "learning_rate": 2.346308572723667e-05, "loss": 1.9047, "step": 34840 }, { "epoch": 5.923848376678565, "grad_norm": 21.39008140563965, "learning_rate": 2.3460252705535726e-05, "loss": 1.9048, "step": 34850 }, { "epoch": 5.925548189699133, "grad_norm": 17.672218322753906, "learning_rate": 2.345741968383478e-05, "loss": 1.9267, "step": 34860 }, { "epoch": 5.9272480027197005, "grad_norm": 14.726696014404297, "learning_rate": 2.3454586662133833e-05, "loss": 1.8417, "step": 34870 }, { "epoch": 5.928947815740268, "grad_norm": 22.368839263916016, "learning_rate": 2.3451753640432886e-05, "loss": 2.0868, "step": 34880 }, { "epoch": 5.930647628760836, "grad_norm": 12.409031867980957, "learning_rate": 2.344892061873194e-05, "loss": 1.9881, "step": 34890 }, { "epoch": 5.932347441781404, "grad_norm": 16.15546226501465, "learning_rate": 2.3446087597030997e-05, "loss": 2.0376, "step": 34900 }, { "epoch": 5.934047254801972, "grad_norm": 12.190756797790527, "learning_rate": 2.3443254575330047e-05, "loss": 1.9232, "step": 34910 }, { "epoch": 5.93574706782254, "grad_norm": 16.70585823059082, "learning_rate": 2.34404215536291e-05, "loss": 2.2623, "step": 34920 }, { "epoch": 5.937446880843107, "grad_norm": 19.703367233276367, "learning_rate": 2.3437588531928157e-05, "loss": 2.0503, "step": 34930 }, { "epoch": 5.939146693863675, "grad_norm": 11.622482299804688, "learning_rate": 2.343475551022721e-05, "loss": 1.8304, "step": 34940 }, { "epoch": 5.940846506884243, "grad_norm": 13.022833824157715, "learning_rate": 2.343192248852626e-05, "loss": 2.0531, "step": 34950 }, { "epoch": 5.94254631990481, "grad_norm": 14.291674613952637, "learning_rate": 2.3429089466825318e-05, "loss": 1.8596, "step": 34960 }, { "epoch": 5.944246132925378, "grad_norm": 10.096460342407227, "learning_rate": 2.342625644512437e-05, "loss": 1.8931, "step": 34970 }, { "epoch": 5.945945945945946, "grad_norm": 11.120285987854004, "learning_rate": 2.342342342342342e-05, "loss": 2.0715, "step": 34980 }, { "epoch": 5.947645758966514, "grad_norm": 14.413532257080078, "learning_rate": 2.342059040172248e-05, "loss": 2.0732, "step": 34990 }, { "epoch": 5.949345571987082, "grad_norm": 15.931699752807617, "learning_rate": 2.3417757380021532e-05, "loss": 1.8391, "step": 35000 }, { "epoch": 5.9510453850076495, "grad_norm": 18.39552879333496, "learning_rate": 2.3414924358320586e-05, "loss": 1.9551, "step": 35010 }, { "epoch": 5.952745198028217, "grad_norm": 11.183842658996582, "learning_rate": 2.341209133661964e-05, "loss": 1.9271, "step": 35020 }, { "epoch": 5.954445011048785, "grad_norm": 13.463181495666504, "learning_rate": 2.3409258314918693e-05, "loss": 1.9418, "step": 35030 }, { "epoch": 5.956144824069352, "grad_norm": 17.87299346923828, "learning_rate": 2.3406425293217746e-05, "loss": 2.1179, "step": 35040 }, { "epoch": 5.95784463708992, "grad_norm": 10.501158714294434, "learning_rate": 2.3403592271516803e-05, "loss": 1.7552, "step": 35050 }, { "epoch": 5.959544450110488, "grad_norm": 18.726245880126953, "learning_rate": 2.3400759249815853e-05, "loss": 1.9523, "step": 35060 }, { "epoch": 5.961244263131055, "grad_norm": 18.598876953125, "learning_rate": 2.3397926228114907e-05, "loss": 2.2685, "step": 35070 }, { "epoch": 5.962944076151623, "grad_norm": 13.869544982910156, "learning_rate": 2.3395093206413964e-05, "loss": 1.9576, "step": 35080 }, { "epoch": 5.964643889172191, "grad_norm": 20.256807327270508, "learning_rate": 2.3392260184713017e-05, "loss": 1.7284, "step": 35090 }, { "epoch": 5.966343702192759, "grad_norm": 13.801568031311035, "learning_rate": 2.3389427163012067e-05, "loss": 2.0721, "step": 35100 }, { "epoch": 5.968043515213327, "grad_norm": 14.429831504821777, "learning_rate": 2.3386594141311124e-05, "loss": 2.0829, "step": 35110 }, { "epoch": 5.9697433282338945, "grad_norm": 14.50070858001709, "learning_rate": 2.3383761119610178e-05, "loss": 1.8515, "step": 35120 }, { "epoch": 5.971443141254462, "grad_norm": 11.357608795166016, "learning_rate": 2.338092809790923e-05, "loss": 2.1155, "step": 35130 }, { "epoch": 5.97314295427503, "grad_norm": 17.31558609008789, "learning_rate": 2.3378095076208285e-05, "loss": 2.0416, "step": 35140 }, { "epoch": 5.9748427672955975, "grad_norm": 20.66874122619629, "learning_rate": 2.3375262054507338e-05, "loss": 2.0477, "step": 35150 }, { "epoch": 5.976542580316165, "grad_norm": 19.071508407592773, "learning_rate": 2.3372429032806392e-05, "loss": 1.8058, "step": 35160 }, { "epoch": 5.978242393336733, "grad_norm": 18.088804244995117, "learning_rate": 2.3369596011105445e-05, "loss": 1.8238, "step": 35170 }, { "epoch": 5.9799422063573004, "grad_norm": 18.02963638305664, "learning_rate": 2.33667629894045e-05, "loss": 2.0035, "step": 35180 }, { "epoch": 5.981642019377868, "grad_norm": 24.019268035888672, "learning_rate": 2.3363929967703552e-05, "loss": 1.8407, "step": 35190 }, { "epoch": 5.983341832398436, "grad_norm": 17.50583839416504, "learning_rate": 2.336109694600261e-05, "loss": 2.0179, "step": 35200 }, { "epoch": 5.985041645419004, "grad_norm": 22.455554962158203, "learning_rate": 2.335826392430166e-05, "loss": 1.9177, "step": 35210 }, { "epoch": 5.986741458439572, "grad_norm": 16.112356185913086, "learning_rate": 2.3355430902600713e-05, "loss": 1.8388, "step": 35220 }, { "epoch": 5.98844127146014, "grad_norm": 12.78359317779541, "learning_rate": 2.335259788089977e-05, "loss": 2.0639, "step": 35230 }, { "epoch": 5.990141084480707, "grad_norm": 13.938620567321777, "learning_rate": 2.3349764859198823e-05, "loss": 2.1888, "step": 35240 }, { "epoch": 5.991840897501275, "grad_norm": 15.417807579040527, "learning_rate": 2.3346931837497873e-05, "loss": 1.9393, "step": 35250 }, { "epoch": 5.993540710521843, "grad_norm": 13.40491771697998, "learning_rate": 2.334409881579693e-05, "loss": 1.9171, "step": 35260 }, { "epoch": 5.99524052354241, "grad_norm": 12.676349639892578, "learning_rate": 2.3341265794095984e-05, "loss": 2.1187, "step": 35270 }, { "epoch": 5.996940336562978, "grad_norm": 14.688410758972168, "learning_rate": 2.3338432772395037e-05, "loss": 1.822, "step": 35280 }, { "epoch": 5.9986401495835455, "grad_norm": 56.26052474975586, "learning_rate": 2.333559975069409e-05, "loss": 1.8924, "step": 35290 }, { "epoch": 6.0, "eval_cer": 1.0, "eval_loss": 2.447620153427124, "eval_runtime": 1963.8659, "eval_samples_per_second": 0.24, "eval_steps_per_second": 0.24, "step": 35298 }, { "epoch": 6.000339962604113, "grad_norm": 13.914128303527832, "learning_rate": 2.3332766728993144e-05, "loss": 1.7917, "step": 35300 }, { "epoch": 6.002039775624682, "grad_norm": 13.013748168945312, "learning_rate": 2.3329933707292198e-05, "loss": 1.9161, "step": 35310 }, { "epoch": 6.003739588645249, "grad_norm": 13.687849044799805, "learning_rate": 2.3327100685591255e-05, "loss": 1.5343, "step": 35320 }, { "epoch": 6.005439401665817, "grad_norm": 16.205533981323242, "learning_rate": 2.3324267663890305e-05, "loss": 1.899, "step": 35330 }, { "epoch": 6.007139214686385, "grad_norm": 15.927934646606445, "learning_rate": 2.332143464218936e-05, "loss": 1.721, "step": 35340 }, { "epoch": 6.008839027706952, "grad_norm": 11.97663402557373, "learning_rate": 2.3318601620488415e-05, "loss": 1.7219, "step": 35350 }, { "epoch": 6.01053884072752, "grad_norm": 10.923454284667969, "learning_rate": 2.3315768598787466e-05, "loss": 1.8712, "step": 35360 }, { "epoch": 6.012238653748088, "grad_norm": 19.572744369506836, "learning_rate": 2.331293557708652e-05, "loss": 1.829, "step": 35370 }, { "epoch": 6.013938466768655, "grad_norm": 12.88201904296875, "learning_rate": 2.3310102555385576e-05, "loss": 1.825, "step": 35380 }, { "epoch": 6.015638279789223, "grad_norm": 16.599124908447266, "learning_rate": 2.330726953368463e-05, "loss": 1.7699, "step": 35390 }, { "epoch": 6.017338092809791, "grad_norm": 12.647037506103516, "learning_rate": 2.330443651198368e-05, "loss": 1.6866, "step": 35400 }, { "epoch": 6.019037905830358, "grad_norm": 15.124777793884277, "learning_rate": 2.3301603490282737e-05, "loss": 1.9443, "step": 35410 }, { "epoch": 6.020737718850927, "grad_norm": 13.272448539733887, "learning_rate": 2.329877046858179e-05, "loss": 1.7735, "step": 35420 }, { "epoch": 6.022437531871494, "grad_norm": 14.484920501708984, "learning_rate": 2.3295937446880844e-05, "loss": 1.9758, "step": 35430 }, { "epoch": 6.024137344892062, "grad_norm": 11.53305435180664, "learning_rate": 2.3293104425179897e-05, "loss": 1.9677, "step": 35440 }, { "epoch": 6.02583715791263, "grad_norm": 14.304597854614258, "learning_rate": 2.329027140347895e-05, "loss": 1.8006, "step": 35450 }, { "epoch": 6.027536970933197, "grad_norm": 15.221000671386719, "learning_rate": 2.3287438381778004e-05, "loss": 1.8084, "step": 35460 }, { "epoch": 6.029236783953765, "grad_norm": 12.856539726257324, "learning_rate": 2.328460536007706e-05, "loss": 1.8637, "step": 35470 }, { "epoch": 6.030936596974333, "grad_norm": 16.755271911621094, "learning_rate": 2.328177233837611e-05, "loss": 1.9102, "step": 35480 }, { "epoch": 6.0326364099949, "grad_norm": 13.237199783325195, "learning_rate": 2.3278939316675165e-05, "loss": 1.9375, "step": 35490 }, { "epoch": 6.034336223015468, "grad_norm": 11.8540678024292, "learning_rate": 2.327610629497422e-05, "loss": 1.8693, "step": 35500 }, { "epoch": 6.036036036036036, "grad_norm": 10.507890701293945, "learning_rate": 2.3273273273273272e-05, "loss": 1.982, "step": 35510 }, { "epoch": 6.037735849056604, "grad_norm": 17.75238800048828, "learning_rate": 2.3270440251572325e-05, "loss": 1.8652, "step": 35520 }, { "epoch": 6.039435662077172, "grad_norm": 19.372825622558594, "learning_rate": 2.3267607229871382e-05, "loss": 1.9686, "step": 35530 }, { "epoch": 6.0411354750977395, "grad_norm": 16.959003448486328, "learning_rate": 2.3264774208170436e-05, "loss": 1.9398, "step": 35540 }, { "epoch": 6.042835288118307, "grad_norm": 10.95129680633545, "learning_rate": 2.3261941186469486e-05, "loss": 1.9661, "step": 35550 }, { "epoch": 6.044535101138875, "grad_norm": 18.493776321411133, "learning_rate": 2.3259108164768543e-05, "loss": 1.8353, "step": 35560 }, { "epoch": 6.0462349141594425, "grad_norm": 10.522631645202637, "learning_rate": 2.3256275143067596e-05, "loss": 1.9956, "step": 35570 }, { "epoch": 6.04793472718001, "grad_norm": 19.714826583862305, "learning_rate": 2.325344212136665e-05, "loss": 1.7913, "step": 35580 }, { "epoch": 6.049634540200578, "grad_norm": 13.466034889221191, "learning_rate": 2.3250609099665703e-05, "loss": 1.7829, "step": 35590 }, { "epoch": 6.051334353221145, "grad_norm": 11.727365493774414, "learning_rate": 2.3247776077964757e-05, "loss": 1.7515, "step": 35600 }, { "epoch": 6.053034166241713, "grad_norm": 13.632180213928223, "learning_rate": 2.324494305626381e-05, "loss": 1.6915, "step": 35610 }, { "epoch": 6.054733979262281, "grad_norm": 16.334386825561523, "learning_rate": 2.3242110034562867e-05, "loss": 1.915, "step": 35620 }, { "epoch": 6.056433792282849, "grad_norm": 15.279976844787598, "learning_rate": 2.3239277012861917e-05, "loss": 1.8437, "step": 35630 }, { "epoch": 6.058133605303417, "grad_norm": 12.24308967590332, "learning_rate": 2.3236443991160974e-05, "loss": 2.0903, "step": 35640 }, { "epoch": 6.059833418323985, "grad_norm": 10.36490249633789, "learning_rate": 2.3233610969460028e-05, "loss": 1.9659, "step": 35650 }, { "epoch": 6.061533231344552, "grad_norm": 11.236129760742188, "learning_rate": 2.323077794775908e-05, "loss": 1.8638, "step": 35660 }, { "epoch": 6.06323304436512, "grad_norm": 12.743512153625488, "learning_rate": 2.3227944926058135e-05, "loss": 1.6469, "step": 35670 }, { "epoch": 6.0649328573856875, "grad_norm": 12.436391830444336, "learning_rate": 2.322511190435719e-05, "loss": 1.829, "step": 35680 }, { "epoch": 6.066632670406255, "grad_norm": 14.38556957244873, "learning_rate": 2.3222278882656242e-05, "loss": 2.0622, "step": 35690 }, { "epoch": 6.068332483426823, "grad_norm": 18.101076126098633, "learning_rate": 2.3219445860955296e-05, "loss": 2.0278, "step": 35700 }, { "epoch": 6.0700322964473905, "grad_norm": 13.482914924621582, "learning_rate": 2.321661283925435e-05, "loss": 1.8036, "step": 35710 }, { "epoch": 6.071732109467958, "grad_norm": 14.211080551147461, "learning_rate": 2.3213779817553403e-05, "loss": 1.8452, "step": 35720 }, { "epoch": 6.073431922488526, "grad_norm": 24.981002807617188, "learning_rate": 2.321094679585246e-05, "loss": 1.7519, "step": 35730 }, { "epoch": 6.075131735509094, "grad_norm": 19.86993408203125, "learning_rate": 2.320811377415151e-05, "loss": 1.6773, "step": 35740 }, { "epoch": 6.076831548529662, "grad_norm": 17.016742706298828, "learning_rate": 2.3205280752450563e-05, "loss": 1.7493, "step": 35750 }, { "epoch": 6.07853136155023, "grad_norm": 11.87149429321289, "learning_rate": 2.320244773074962e-05, "loss": 1.6705, "step": 35760 }, { "epoch": 6.080231174570797, "grad_norm": 15.724288940429688, "learning_rate": 2.3199614709048674e-05, "loss": 1.8651, "step": 35770 }, { "epoch": 6.081930987591365, "grad_norm": 13.51591968536377, "learning_rate": 2.3196781687347724e-05, "loss": 1.6248, "step": 35780 }, { "epoch": 6.083630800611933, "grad_norm": 12.887273788452148, "learning_rate": 2.319394866564678e-05, "loss": 1.8515, "step": 35790 }, { "epoch": 6.0853306136325, "grad_norm": 11.347210884094238, "learning_rate": 2.3191115643945834e-05, "loss": 1.7591, "step": 35800 }, { "epoch": 6.087030426653068, "grad_norm": 14.941389083862305, "learning_rate": 2.3188282622244888e-05, "loss": 1.6803, "step": 35810 }, { "epoch": 6.088730239673636, "grad_norm": 13.340997695922852, "learning_rate": 2.318544960054394e-05, "loss": 1.8957, "step": 35820 }, { "epoch": 6.090430052694203, "grad_norm": 10.380125999450684, "learning_rate": 2.3182616578842995e-05, "loss": 1.7641, "step": 35830 }, { "epoch": 6.092129865714772, "grad_norm": 14.673713684082031, "learning_rate": 2.3179783557142048e-05, "loss": 1.9609, "step": 35840 }, { "epoch": 6.093829678735339, "grad_norm": 19.45250701904297, "learning_rate": 2.3176950535441102e-05, "loss": 1.8402, "step": 35850 }, { "epoch": 6.095529491755907, "grad_norm": 21.476146697998047, "learning_rate": 2.3174117513740155e-05, "loss": 1.7998, "step": 35860 }, { "epoch": 6.097229304776475, "grad_norm": 14.61700439453125, "learning_rate": 2.317128449203921e-05, "loss": 1.9905, "step": 35870 }, { "epoch": 6.098929117797042, "grad_norm": 12.4898099899292, "learning_rate": 2.3168451470338266e-05, "loss": 1.8701, "step": 35880 }, { "epoch": 6.10062893081761, "grad_norm": 23.62188720703125, "learning_rate": 2.3165618448637316e-05, "loss": 1.6313, "step": 35890 }, { "epoch": 6.102328743838178, "grad_norm": 12.920005798339844, "learning_rate": 2.316278542693637e-05, "loss": 1.9405, "step": 35900 }, { "epoch": 6.104028556858745, "grad_norm": 12.509289741516113, "learning_rate": 2.3159952405235426e-05, "loss": 1.7867, "step": 35910 }, { "epoch": 6.105728369879313, "grad_norm": 11.605814933776855, "learning_rate": 2.315711938353448e-05, "loss": 1.9915, "step": 35920 }, { "epoch": 6.107428182899881, "grad_norm": 19.2382869720459, "learning_rate": 2.315428636183353e-05, "loss": 1.8634, "step": 35930 }, { "epoch": 6.109127995920449, "grad_norm": 16.369503021240234, "learning_rate": 2.3151453340132587e-05, "loss": 1.867, "step": 35940 }, { "epoch": 6.110827808941017, "grad_norm": 12.9837064743042, "learning_rate": 2.314862031843164e-05, "loss": 1.7813, "step": 35950 }, { "epoch": 6.1125276219615845, "grad_norm": 13.989629745483398, "learning_rate": 2.3145787296730694e-05, "loss": 1.8645, "step": 35960 }, { "epoch": 6.114227434982152, "grad_norm": 12.424355506896973, "learning_rate": 2.3142954275029747e-05, "loss": 1.8545, "step": 35970 }, { "epoch": 6.11592724800272, "grad_norm": 13.184296607971191, "learning_rate": 2.31401212533288e-05, "loss": 2.1037, "step": 35980 }, { "epoch": 6.1176270610232875, "grad_norm": 16.490238189697266, "learning_rate": 2.3137288231627854e-05, "loss": 1.7535, "step": 35990 }, { "epoch": 6.119326874043855, "grad_norm": 9.967511177062988, "learning_rate": 2.313445520992691e-05, "loss": 1.998, "step": 36000 }, { "epoch": 6.121026687064423, "grad_norm": 18.450605392456055, "learning_rate": 2.313162218822596e-05, "loss": 1.9658, "step": 36010 }, { "epoch": 6.12272650008499, "grad_norm": 16.433624267578125, "learning_rate": 2.3128789166525015e-05, "loss": 1.9175, "step": 36020 }, { "epoch": 6.124426313105558, "grad_norm": 14.486717224121094, "learning_rate": 2.3125956144824072e-05, "loss": 1.7718, "step": 36030 }, { "epoch": 6.126126126126126, "grad_norm": 11.801834106445312, "learning_rate": 2.3123123123123122e-05, "loss": 1.6992, "step": 36040 }, { "epoch": 6.127825939146694, "grad_norm": 13.659153938293457, "learning_rate": 2.3120290101422176e-05, "loss": 1.8193, "step": 36050 }, { "epoch": 6.129525752167262, "grad_norm": 11.372575759887695, "learning_rate": 2.3117457079721232e-05, "loss": 1.6392, "step": 36060 }, { "epoch": 6.13122556518783, "grad_norm": 13.216059684753418, "learning_rate": 2.3114624058020286e-05, "loss": 1.56, "step": 36070 }, { "epoch": 6.132925378208397, "grad_norm": 17.390092849731445, "learning_rate": 2.3111791036319336e-05, "loss": 1.5383, "step": 36080 }, { "epoch": 6.134625191228965, "grad_norm": 9.524346351623535, "learning_rate": 2.3108958014618393e-05, "loss": 1.8384, "step": 36090 }, { "epoch": 6.1363250042495325, "grad_norm": 17.067493438720703, "learning_rate": 2.3106124992917447e-05, "loss": 1.7058, "step": 36100 }, { "epoch": 6.1380248172701, "grad_norm": 16.743988037109375, "learning_rate": 2.31032919712165e-05, "loss": 1.808, "step": 36110 }, { "epoch": 6.139724630290668, "grad_norm": 16.081974029541016, "learning_rate": 2.3100458949515554e-05, "loss": 1.7501, "step": 36120 }, { "epoch": 6.1414244433112355, "grad_norm": 10.675458908081055, "learning_rate": 2.3097625927814607e-05, "loss": 1.8721, "step": 36130 }, { "epoch": 6.143124256331803, "grad_norm": 16.578845977783203, "learning_rate": 2.309479290611366e-05, "loss": 1.716, "step": 36140 }, { "epoch": 6.144824069352371, "grad_norm": 12.774831771850586, "learning_rate": 2.3091959884412718e-05, "loss": 1.8047, "step": 36150 }, { "epoch": 6.146523882372939, "grad_norm": 18.815988540649414, "learning_rate": 2.3089126862711768e-05, "loss": 1.7713, "step": 36160 }, { "epoch": 6.148223695393507, "grad_norm": 16.542339324951172, "learning_rate": 2.308629384101082e-05, "loss": 1.6704, "step": 36170 }, { "epoch": 6.149923508414075, "grad_norm": 12.665533065795898, "learning_rate": 2.3083460819309878e-05, "loss": 1.9697, "step": 36180 }, { "epoch": 6.151623321434642, "grad_norm": 13.133753776550293, "learning_rate": 2.3080627797608928e-05, "loss": 1.7059, "step": 36190 }, { "epoch": 6.15332313445521, "grad_norm": 16.086589813232422, "learning_rate": 2.3077794775907982e-05, "loss": 2.0023, "step": 36200 }, { "epoch": 6.155022947475778, "grad_norm": 18.2018985748291, "learning_rate": 2.307496175420704e-05, "loss": 2.042, "step": 36210 }, { "epoch": 6.156722760496345, "grad_norm": 19.68405532836914, "learning_rate": 2.3072128732506092e-05, "loss": 1.7669, "step": 36220 }, { "epoch": 6.158422573516913, "grad_norm": 17.74659538269043, "learning_rate": 2.3069295710805142e-05, "loss": 1.9472, "step": 36230 }, { "epoch": 6.160122386537481, "grad_norm": 15.78065013885498, "learning_rate": 2.30664626891042e-05, "loss": 1.791, "step": 36240 }, { "epoch": 6.161822199558048, "grad_norm": 16.216716766357422, "learning_rate": 2.3063629667403253e-05, "loss": 1.8076, "step": 36250 }, { "epoch": 6.163522012578617, "grad_norm": 13.901571273803711, "learning_rate": 2.3060796645702306e-05, "loss": 1.8711, "step": 36260 }, { "epoch": 6.165221825599184, "grad_norm": 23.17330551147461, "learning_rate": 2.305796362400136e-05, "loss": 1.6171, "step": 36270 }, { "epoch": 6.166921638619752, "grad_norm": 12.25304889678955, "learning_rate": 2.3055130602300413e-05, "loss": 2.0772, "step": 36280 }, { "epoch": 6.16862145164032, "grad_norm": 19.409120559692383, "learning_rate": 2.3052297580599467e-05, "loss": 1.6734, "step": 36290 }, { "epoch": 6.170321264660887, "grad_norm": 16.331762313842773, "learning_rate": 2.3049464558898524e-05, "loss": 1.7498, "step": 36300 }, { "epoch": 6.172021077681455, "grad_norm": 19.79487419128418, "learning_rate": 2.3046631537197574e-05, "loss": 1.5876, "step": 36310 }, { "epoch": 6.173720890702023, "grad_norm": 15.02978229522705, "learning_rate": 2.3043798515496627e-05, "loss": 1.8492, "step": 36320 }, { "epoch": 6.17542070372259, "grad_norm": 20.80516815185547, "learning_rate": 2.3040965493795684e-05, "loss": 1.9536, "step": 36330 }, { "epoch": 6.177120516743158, "grad_norm": 14.53509521484375, "learning_rate": 2.3038132472094738e-05, "loss": 1.8032, "step": 36340 }, { "epoch": 6.178820329763726, "grad_norm": 14.725790023803711, "learning_rate": 2.303529945039379e-05, "loss": 1.7298, "step": 36350 }, { "epoch": 6.180520142784294, "grad_norm": 16.50689125061035, "learning_rate": 2.3032466428692845e-05, "loss": 1.8899, "step": 36360 }, { "epoch": 6.182219955804862, "grad_norm": 16.294137954711914, "learning_rate": 2.30296334069919e-05, "loss": 1.711, "step": 36370 }, { "epoch": 6.1839197688254295, "grad_norm": 13.41368579864502, "learning_rate": 2.3026800385290952e-05, "loss": 1.9063, "step": 36380 }, { "epoch": 6.185619581845997, "grad_norm": 14.201903343200684, "learning_rate": 2.3023967363590005e-05, "loss": 1.7966, "step": 36390 }, { "epoch": 6.187319394866565, "grad_norm": 14.10412311553955, "learning_rate": 2.302113434188906e-05, "loss": 1.7664, "step": 36400 }, { "epoch": 6.1890192078871324, "grad_norm": 14.01546573638916, "learning_rate": 2.3018301320188116e-05, "loss": 1.9142, "step": 36410 }, { "epoch": 6.1907190209077, "grad_norm": 18.721290588378906, "learning_rate": 2.3015468298487166e-05, "loss": 1.6127, "step": 36420 }, { "epoch": 6.192418833928268, "grad_norm": 13.949487686157227, "learning_rate": 2.301263527678622e-05, "loss": 1.9415, "step": 36430 }, { "epoch": 6.194118646948835, "grad_norm": 19.758317947387695, "learning_rate": 2.3009802255085276e-05, "loss": 1.87, "step": 36440 }, { "epoch": 6.195818459969403, "grad_norm": 16.891735076904297, "learning_rate": 2.300696923338433e-05, "loss": 1.9274, "step": 36450 }, { "epoch": 6.197518272989971, "grad_norm": 24.233232498168945, "learning_rate": 2.300413621168338e-05, "loss": 1.6177, "step": 36460 }, { "epoch": 6.199218086010539, "grad_norm": 12.211142539978027, "learning_rate": 2.3001303189982437e-05, "loss": 1.7425, "step": 36470 }, { "epoch": 6.200917899031107, "grad_norm": 13.452350616455078, "learning_rate": 2.299847016828149e-05, "loss": 1.664, "step": 36480 }, { "epoch": 6.202617712051675, "grad_norm": 13.430413246154785, "learning_rate": 2.2995637146580544e-05, "loss": 1.8675, "step": 36490 }, { "epoch": 6.204317525072242, "grad_norm": 17.275720596313477, "learning_rate": 2.2992804124879598e-05, "loss": 1.7401, "step": 36500 }, { "epoch": 6.20601733809281, "grad_norm": 14.112566947937012, "learning_rate": 2.298997110317865e-05, "loss": 1.9266, "step": 36510 }, { "epoch": 6.2077171511133775, "grad_norm": 14.993905067443848, "learning_rate": 2.2987138081477705e-05, "loss": 1.8104, "step": 36520 }, { "epoch": 6.209416964133945, "grad_norm": 11.074893951416016, "learning_rate": 2.298430505977676e-05, "loss": 1.683, "step": 36530 }, { "epoch": 6.211116777154513, "grad_norm": 14.403799057006836, "learning_rate": 2.298147203807581e-05, "loss": 1.931, "step": 36540 }, { "epoch": 6.2128165901750805, "grad_norm": 18.906723022460938, "learning_rate": 2.2978639016374865e-05, "loss": 1.7822, "step": 36550 }, { "epoch": 6.214516403195648, "grad_norm": 22.67253303527832, "learning_rate": 2.2975805994673922e-05, "loss": 1.9062, "step": 36560 }, { "epoch": 6.216216216216216, "grad_norm": 19.249204635620117, "learning_rate": 2.2972972972972972e-05, "loss": 1.9022, "step": 36570 }, { "epoch": 6.217916029236784, "grad_norm": 14.2219877243042, "learning_rate": 2.2970139951272026e-05, "loss": 1.6692, "step": 36580 }, { "epoch": 6.219615842257352, "grad_norm": 10.634769439697266, "learning_rate": 2.2967306929571083e-05, "loss": 1.7733, "step": 36590 }, { "epoch": 6.22131565527792, "grad_norm": 13.05622673034668, "learning_rate": 2.2964473907870136e-05, "loss": 1.8489, "step": 36600 }, { "epoch": 6.223015468298487, "grad_norm": 13.299354553222656, "learning_rate": 2.2961640886169186e-05, "loss": 2.0049, "step": 36610 }, { "epoch": 6.224715281319055, "grad_norm": 15.729256629943848, "learning_rate": 2.2958807864468243e-05, "loss": 1.6679, "step": 36620 }, { "epoch": 6.226415094339623, "grad_norm": 12.363957405090332, "learning_rate": 2.2955974842767297e-05, "loss": 1.8741, "step": 36630 }, { "epoch": 6.22811490736019, "grad_norm": 23.265304565429688, "learning_rate": 2.295314182106635e-05, "loss": 1.5945, "step": 36640 }, { "epoch": 6.229814720380758, "grad_norm": 15.980298042297363, "learning_rate": 2.2950308799365404e-05, "loss": 1.8473, "step": 36650 }, { "epoch": 6.2315145334013256, "grad_norm": 18.795576095581055, "learning_rate": 2.2947475777664457e-05, "loss": 1.771, "step": 36660 }, { "epoch": 6.233214346421893, "grad_norm": 12.657903671264648, "learning_rate": 2.294464275596351e-05, "loss": 1.9076, "step": 36670 }, { "epoch": 6.234914159442462, "grad_norm": 22.49336814880371, "learning_rate": 2.2941809734262568e-05, "loss": 1.7619, "step": 36680 }, { "epoch": 6.236613972463029, "grad_norm": 12.354216575622559, "learning_rate": 2.2938976712561618e-05, "loss": 1.6468, "step": 36690 }, { "epoch": 6.238313785483597, "grad_norm": 15.507490158081055, "learning_rate": 2.293614369086067e-05, "loss": 1.9522, "step": 36700 }, { "epoch": 6.240013598504165, "grad_norm": 11.133432388305664, "learning_rate": 2.293331066915973e-05, "loss": 1.8325, "step": 36710 }, { "epoch": 6.241713411524732, "grad_norm": 11.51002025604248, "learning_rate": 2.293047764745878e-05, "loss": 1.7847, "step": 36720 }, { "epoch": 6.2434132245453, "grad_norm": 12.455099105834961, "learning_rate": 2.2927644625757832e-05, "loss": 1.8529, "step": 36730 }, { "epoch": 6.245113037565868, "grad_norm": 11.88995361328125, "learning_rate": 2.292481160405689e-05, "loss": 1.8909, "step": 36740 }, { "epoch": 6.246812850586435, "grad_norm": 21.725183486938477, "learning_rate": 2.2921978582355942e-05, "loss": 1.6469, "step": 36750 }, { "epoch": 6.248512663607003, "grad_norm": 14.230973243713379, "learning_rate": 2.2919145560654993e-05, "loss": 1.7994, "step": 36760 }, { "epoch": 6.250212476627571, "grad_norm": 21.91415786743164, "learning_rate": 2.291631253895405e-05, "loss": 1.8861, "step": 36770 }, { "epoch": 6.251912289648139, "grad_norm": 28.167726516723633, "learning_rate": 2.2913479517253103e-05, "loss": 1.7677, "step": 36780 }, { "epoch": 6.253612102668707, "grad_norm": 13.767829895019531, "learning_rate": 2.2910646495552156e-05, "loss": 1.8503, "step": 36790 }, { "epoch": 6.2553119156892745, "grad_norm": 15.238048553466797, "learning_rate": 2.290781347385121e-05, "loss": 1.9586, "step": 36800 }, { "epoch": 6.257011728709842, "grad_norm": 19.376117706298828, "learning_rate": 2.2904980452150264e-05, "loss": 2.0404, "step": 36810 }, { "epoch": 6.25871154173041, "grad_norm": 17.657705307006836, "learning_rate": 2.2902147430449317e-05, "loss": 1.6423, "step": 36820 }, { "epoch": 6.260411354750977, "grad_norm": 25.942890167236328, "learning_rate": 2.2899314408748374e-05, "loss": 1.8789, "step": 36830 }, { "epoch": 6.262111167771545, "grad_norm": 19.065664291381836, "learning_rate": 2.2896481387047424e-05, "loss": 1.7683, "step": 36840 }, { "epoch": 6.263810980792113, "grad_norm": 13.344552040100098, "learning_rate": 2.2893648365346478e-05, "loss": 1.8392, "step": 36850 }, { "epoch": 6.26551079381268, "grad_norm": 16.621623992919922, "learning_rate": 2.2890815343645535e-05, "loss": 1.8526, "step": 36860 }, { "epoch": 6.267210606833248, "grad_norm": 13.336443901062012, "learning_rate": 2.2887982321944588e-05, "loss": 1.8055, "step": 36870 }, { "epoch": 6.268910419853816, "grad_norm": 11.487140655517578, "learning_rate": 2.2885149300243638e-05, "loss": 1.8017, "step": 36880 }, { "epoch": 6.270610232874384, "grad_norm": 15.478418350219727, "learning_rate": 2.2882316278542695e-05, "loss": 1.6828, "step": 36890 }, { "epoch": 6.272310045894952, "grad_norm": 10.265013694763184, "learning_rate": 2.287948325684175e-05, "loss": 1.9097, "step": 36900 }, { "epoch": 6.2740098589155195, "grad_norm": 17.656230926513672, "learning_rate": 2.28766502351408e-05, "loss": 1.7972, "step": 36910 }, { "epoch": 6.275709671936087, "grad_norm": 13.031747817993164, "learning_rate": 2.2873817213439856e-05, "loss": 1.6693, "step": 36920 }, { "epoch": 6.277409484956655, "grad_norm": 10.411726951599121, "learning_rate": 2.287098419173891e-05, "loss": 2.0316, "step": 36930 }, { "epoch": 6.2791092979772225, "grad_norm": 15.277167320251465, "learning_rate": 2.2868151170037963e-05, "loss": 1.915, "step": 36940 }, { "epoch": 6.28080911099779, "grad_norm": 22.870216369628906, "learning_rate": 2.2865318148337016e-05, "loss": 1.8393, "step": 36950 }, { "epoch": 6.282508924018358, "grad_norm": 13.466568946838379, "learning_rate": 2.286248512663607e-05, "loss": 2.0516, "step": 36960 }, { "epoch": 6.2842087370389255, "grad_norm": 12.683758735656738, "learning_rate": 2.2859652104935123e-05, "loss": 1.8853, "step": 36970 }, { "epoch": 6.285908550059493, "grad_norm": 12.724760055541992, "learning_rate": 2.285681908323418e-05, "loss": 1.9316, "step": 36980 }, { "epoch": 6.287608363080061, "grad_norm": 15.327051162719727, "learning_rate": 2.285398606153323e-05, "loss": 1.7866, "step": 36990 }, { "epoch": 6.289308176100629, "grad_norm": 15.195698738098145, "learning_rate": 2.2851153039832284e-05, "loss": 1.8574, "step": 37000 }, { "epoch": 6.291007989121197, "grad_norm": 18.630661010742188, "learning_rate": 2.284832001813134e-05, "loss": 1.8546, "step": 37010 }, { "epoch": 6.292707802141765, "grad_norm": 13.139487266540527, "learning_rate": 2.2845486996430394e-05, "loss": 1.8707, "step": 37020 }, { "epoch": 6.294407615162332, "grad_norm": 12.87179183959961, "learning_rate": 2.2842653974729444e-05, "loss": 1.7967, "step": 37030 }, { "epoch": 6.2961074281829, "grad_norm": 20.667631149291992, "learning_rate": 2.28398209530285e-05, "loss": 1.9109, "step": 37040 }, { "epoch": 6.297807241203468, "grad_norm": 18.40496253967285, "learning_rate": 2.2836987931327555e-05, "loss": 1.7963, "step": 37050 }, { "epoch": 6.299507054224035, "grad_norm": 15.51547622680664, "learning_rate": 2.283415490962661e-05, "loss": 1.7862, "step": 37060 }, { "epoch": 6.301206867244603, "grad_norm": 15.108768463134766, "learning_rate": 2.2831321887925662e-05, "loss": 1.6784, "step": 37070 }, { "epoch": 6.3029066802651705, "grad_norm": 18.5584659576416, "learning_rate": 2.2828488866224715e-05, "loss": 1.7086, "step": 37080 }, { "epoch": 6.304606493285738, "grad_norm": 12.196829795837402, "learning_rate": 2.2825655844523772e-05, "loss": 1.8374, "step": 37090 }, { "epoch": 6.306306306306306, "grad_norm": 21.221961975097656, "learning_rate": 2.2822822822822822e-05, "loss": 1.6205, "step": 37100 }, { "epoch": 6.308006119326874, "grad_norm": 20.812414169311523, "learning_rate": 2.2819989801121876e-05, "loss": 1.7991, "step": 37110 }, { "epoch": 6.309705932347442, "grad_norm": 14.354166984558105, "learning_rate": 2.2817156779420933e-05, "loss": 1.7894, "step": 37120 }, { "epoch": 6.31140574536801, "grad_norm": 17.986215591430664, "learning_rate": 2.2814323757719986e-05, "loss": 1.7559, "step": 37130 }, { "epoch": 6.313105558388577, "grad_norm": 17.485300064086914, "learning_rate": 2.2811490736019037e-05, "loss": 1.9668, "step": 37140 }, { "epoch": 6.314805371409145, "grad_norm": 12.778443336486816, "learning_rate": 2.2808657714318093e-05, "loss": 1.9517, "step": 37150 }, { "epoch": 6.316505184429713, "grad_norm": 13.866868019104004, "learning_rate": 2.2805824692617147e-05, "loss": 1.7717, "step": 37160 }, { "epoch": 6.31820499745028, "grad_norm": 11.127803802490234, "learning_rate": 2.28029916709162e-05, "loss": 2.0258, "step": 37170 }, { "epoch": 6.319904810470848, "grad_norm": 18.197038650512695, "learning_rate": 2.2800158649215254e-05, "loss": 1.7535, "step": 37180 }, { "epoch": 6.321604623491416, "grad_norm": 11.311237335205078, "learning_rate": 2.2797325627514308e-05, "loss": 1.9789, "step": 37190 }, { "epoch": 6.323304436511984, "grad_norm": 14.928686141967773, "learning_rate": 2.279449260581336e-05, "loss": 1.8128, "step": 37200 }, { "epoch": 6.325004249532552, "grad_norm": 13.719966888427734, "learning_rate": 2.2791659584112418e-05, "loss": 1.746, "step": 37210 }, { "epoch": 6.3267040625531195, "grad_norm": 14.305061340332031, "learning_rate": 2.2788826562411468e-05, "loss": 1.8624, "step": 37220 }, { "epoch": 6.328403875573687, "grad_norm": 15.21878719329834, "learning_rate": 2.278599354071052e-05, "loss": 1.6432, "step": 37230 }, { "epoch": 6.330103688594255, "grad_norm": 15.550573348999023, "learning_rate": 2.278316051900958e-05, "loss": 1.4939, "step": 37240 }, { "epoch": 6.331803501614822, "grad_norm": 13.866235733032227, "learning_rate": 2.278032749730863e-05, "loss": 1.693, "step": 37250 }, { "epoch": 6.33350331463539, "grad_norm": 16.630842208862305, "learning_rate": 2.2777494475607682e-05, "loss": 1.4335, "step": 37260 }, { "epoch": 6.335203127655958, "grad_norm": 17.597675323486328, "learning_rate": 2.277466145390674e-05, "loss": 1.6215, "step": 37270 }, { "epoch": 6.336902940676525, "grad_norm": 18.052444458007812, "learning_rate": 2.2771828432205793e-05, "loss": 1.9336, "step": 37280 }, { "epoch": 6.338602753697093, "grad_norm": 13.854071617126465, "learning_rate": 2.2768995410504843e-05, "loss": 1.9944, "step": 37290 }, { "epoch": 6.340302566717661, "grad_norm": 12.68319034576416, "learning_rate": 2.27661623888039e-05, "loss": 1.7377, "step": 37300 }, { "epoch": 6.342002379738229, "grad_norm": 23.995651245117188, "learning_rate": 2.2763329367102953e-05, "loss": 1.6281, "step": 37310 }, { "epoch": 6.343702192758797, "grad_norm": 17.440284729003906, "learning_rate": 2.2760496345402007e-05, "loss": 1.8146, "step": 37320 }, { "epoch": 6.3454020057793645, "grad_norm": 10.469415664672852, "learning_rate": 2.275766332370106e-05, "loss": 2.0008, "step": 37330 }, { "epoch": 6.347101818799932, "grad_norm": 10.806683540344238, "learning_rate": 2.2754830302000114e-05, "loss": 1.4833, "step": 37340 }, { "epoch": 6.3488016318205, "grad_norm": 13.431480407714844, "learning_rate": 2.2751997280299167e-05, "loss": 1.744, "step": 37350 }, { "epoch": 6.3505014448410675, "grad_norm": 19.09899139404297, "learning_rate": 2.2749164258598224e-05, "loss": 1.7717, "step": 37360 }, { "epoch": 6.352201257861635, "grad_norm": 13.052685737609863, "learning_rate": 2.2746331236897274e-05, "loss": 1.9412, "step": 37370 }, { "epoch": 6.353901070882203, "grad_norm": 12.930635452270508, "learning_rate": 2.2743498215196328e-05, "loss": 1.9206, "step": 37380 }, { "epoch": 6.3556008839027704, "grad_norm": 17.20347023010254, "learning_rate": 2.2740665193495385e-05, "loss": 1.6071, "step": 37390 }, { "epoch": 6.357300696923338, "grad_norm": 12.040891647338867, "learning_rate": 2.2737832171794438e-05, "loss": 1.8088, "step": 37400 }, { "epoch": 6.359000509943906, "grad_norm": 12.496943473815918, "learning_rate": 2.273499915009349e-05, "loss": 1.7105, "step": 37410 }, { "epoch": 6.360700322964474, "grad_norm": 14.751151084899902, "learning_rate": 2.2732166128392545e-05, "loss": 1.9095, "step": 37420 }, { "epoch": 6.362400135985042, "grad_norm": 12.758460998535156, "learning_rate": 2.27293331066916e-05, "loss": 1.623, "step": 37430 }, { "epoch": 6.36409994900561, "grad_norm": 13.849419593811035, "learning_rate": 2.272650008499065e-05, "loss": 1.8818, "step": 37440 }, { "epoch": 6.365799762026177, "grad_norm": 14.684795379638672, "learning_rate": 2.2723667063289706e-05, "loss": 1.8722, "step": 37450 }, { "epoch": 6.367499575046745, "grad_norm": 19.107995986938477, "learning_rate": 2.272083404158876e-05, "loss": 1.6378, "step": 37460 }, { "epoch": 6.369199388067313, "grad_norm": 15.876176834106445, "learning_rate": 2.2718001019887813e-05, "loss": 2.2099, "step": 37470 }, { "epoch": 6.37089920108788, "grad_norm": 20.057096481323242, "learning_rate": 2.2715167998186866e-05, "loss": 1.5679, "step": 37480 }, { "epoch": 6.372599014108448, "grad_norm": 15.74729061126709, "learning_rate": 2.271233497648592e-05, "loss": 1.7602, "step": 37490 }, { "epoch": 6.3742988271290155, "grad_norm": 11.449872970581055, "learning_rate": 2.2709501954784973e-05, "loss": 1.8575, "step": 37500 }, { "epoch": 6.375998640149583, "grad_norm": 13.389914512634277, "learning_rate": 2.270666893308403e-05, "loss": 1.7882, "step": 37510 }, { "epoch": 6.377698453170151, "grad_norm": 17.74294662475586, "learning_rate": 2.270383591138308e-05, "loss": 1.8214, "step": 37520 }, { "epoch": 6.379398266190719, "grad_norm": 18.63683319091797, "learning_rate": 2.2701002889682134e-05, "loss": 1.6891, "step": 37530 }, { "epoch": 6.381098079211287, "grad_norm": 18.297489166259766, "learning_rate": 2.269816986798119e-05, "loss": 1.8822, "step": 37540 }, { "epoch": 6.382797892231855, "grad_norm": 13.325675010681152, "learning_rate": 2.2695336846280244e-05, "loss": 1.8146, "step": 37550 }, { "epoch": 6.384497705252422, "grad_norm": 14.815123558044434, "learning_rate": 2.2692503824579295e-05, "loss": 2.0431, "step": 37560 }, { "epoch": 6.38619751827299, "grad_norm": 14.772012710571289, "learning_rate": 2.268967080287835e-05, "loss": 1.8949, "step": 37570 }, { "epoch": 6.387897331293558, "grad_norm": 13.02975082397461, "learning_rate": 2.2686837781177405e-05, "loss": 1.9969, "step": 37580 }, { "epoch": 6.389597144314125, "grad_norm": 9.30424690246582, "learning_rate": 2.2684004759476455e-05, "loss": 2.0314, "step": 37590 }, { "epoch": 6.391296957334693, "grad_norm": 14.362452507019043, "learning_rate": 2.2681171737775512e-05, "loss": 1.8819, "step": 37600 }, { "epoch": 6.392996770355261, "grad_norm": 11.720630645751953, "learning_rate": 2.2678338716074566e-05, "loss": 1.7998, "step": 37610 }, { "epoch": 6.394696583375828, "grad_norm": 11.577475547790527, "learning_rate": 2.267550569437362e-05, "loss": 1.7691, "step": 37620 }, { "epoch": 6.396396396396397, "grad_norm": 17.54344940185547, "learning_rate": 2.2672672672672673e-05, "loss": 1.9336, "step": 37630 }, { "epoch": 6.398096209416964, "grad_norm": 17.23659324645996, "learning_rate": 2.2669839650971726e-05, "loss": 1.6256, "step": 37640 }, { "epoch": 6.399796022437532, "grad_norm": 12.481876373291016, "learning_rate": 2.266700662927078e-05, "loss": 1.6861, "step": 37650 }, { "epoch": 6.4014958354581, "grad_norm": 17.463016510009766, "learning_rate": 2.2664173607569837e-05, "loss": 1.6946, "step": 37660 }, { "epoch": 6.403195648478667, "grad_norm": 10.329825401306152, "learning_rate": 2.2661340585868887e-05, "loss": 1.7104, "step": 37670 }, { "epoch": 6.404895461499235, "grad_norm": 15.21828842163086, "learning_rate": 2.265850756416794e-05, "loss": 1.9024, "step": 37680 }, { "epoch": 6.406595274519803, "grad_norm": 13.839629173278809, "learning_rate": 2.2655674542466997e-05, "loss": 1.7003, "step": 37690 }, { "epoch": 6.40829508754037, "grad_norm": 16.284788131713867, "learning_rate": 2.265284152076605e-05, "loss": 1.9147, "step": 37700 }, { "epoch": 6.409994900560938, "grad_norm": 12.855202674865723, "learning_rate": 2.26500084990651e-05, "loss": 2.094, "step": 37710 }, { "epoch": 6.411694713581506, "grad_norm": 11.609037399291992, "learning_rate": 2.2647175477364158e-05, "loss": 1.7133, "step": 37720 }, { "epoch": 6.413394526602074, "grad_norm": 12.96723747253418, "learning_rate": 2.264434245566321e-05, "loss": 1.6455, "step": 37730 }, { "epoch": 6.415094339622642, "grad_norm": 20.704193115234375, "learning_rate": 2.2641509433962265e-05, "loss": 1.8605, "step": 37740 }, { "epoch": 6.4167941526432095, "grad_norm": 12.83484935760498, "learning_rate": 2.263867641226132e-05, "loss": 1.8906, "step": 37750 }, { "epoch": 6.418493965663777, "grad_norm": 11.916254997253418, "learning_rate": 2.2635843390560372e-05, "loss": 1.9289, "step": 37760 }, { "epoch": 6.420193778684345, "grad_norm": 20.95885467529297, "learning_rate": 2.2633010368859425e-05, "loss": 1.9072, "step": 37770 }, { "epoch": 6.4218935917049125, "grad_norm": 13.552680015563965, "learning_rate": 2.263017734715848e-05, "loss": 1.6983, "step": 37780 }, { "epoch": 6.42359340472548, "grad_norm": 21.28020668029785, "learning_rate": 2.2627344325457532e-05, "loss": 1.8785, "step": 37790 }, { "epoch": 6.425293217746048, "grad_norm": 11.037884712219238, "learning_rate": 2.2624511303756586e-05, "loss": 1.8005, "step": 37800 }, { "epoch": 6.426993030766615, "grad_norm": 21.686182022094727, "learning_rate": 2.2621678282055643e-05, "loss": 1.6806, "step": 37810 }, { "epoch": 6.428692843787183, "grad_norm": 24.44826316833496, "learning_rate": 2.2618845260354693e-05, "loss": 2.0628, "step": 37820 }, { "epoch": 6.430392656807751, "grad_norm": 11.354048728942871, "learning_rate": 2.261601223865375e-05, "loss": 1.5454, "step": 37830 }, { "epoch": 6.432092469828319, "grad_norm": 15.387992858886719, "learning_rate": 2.2613179216952803e-05, "loss": 1.8122, "step": 37840 }, { "epoch": 6.433792282848887, "grad_norm": 17.787172317504883, "learning_rate": 2.2610346195251857e-05, "loss": 1.8914, "step": 37850 }, { "epoch": 6.435492095869455, "grad_norm": 10.090105056762695, "learning_rate": 2.260751317355091e-05, "loss": 1.8183, "step": 37860 }, { "epoch": 6.437191908890022, "grad_norm": 14.66614055633545, "learning_rate": 2.2604680151849964e-05, "loss": 1.8449, "step": 37870 }, { "epoch": 6.43889172191059, "grad_norm": 20.46605110168457, "learning_rate": 2.2601847130149017e-05, "loss": 1.826, "step": 37880 }, { "epoch": 6.4405915349311575, "grad_norm": 20.775991439819336, "learning_rate": 2.2599014108448074e-05, "loss": 2.214, "step": 37890 }, { "epoch": 6.442291347951725, "grad_norm": 17.901866912841797, "learning_rate": 2.2596181086747125e-05, "loss": 1.64, "step": 37900 }, { "epoch": 6.443991160972293, "grad_norm": 11.132393836975098, "learning_rate": 2.2593348065046178e-05, "loss": 1.8341, "step": 37910 }, { "epoch": 6.4456909739928605, "grad_norm": 20.76275062561035, "learning_rate": 2.2590515043345235e-05, "loss": 1.7376, "step": 37920 }, { "epoch": 6.447390787013428, "grad_norm": 19.283721923828125, "learning_rate": 2.2587682021644285e-05, "loss": 1.862, "step": 37930 }, { "epoch": 6.449090600033996, "grad_norm": 15.756714820861816, "learning_rate": 2.258484899994334e-05, "loss": 1.6883, "step": 37940 }, { "epoch": 6.450790413054564, "grad_norm": 15.807934761047363, "learning_rate": 2.2582015978242396e-05, "loss": 1.7276, "step": 37950 }, { "epoch": 6.452490226075132, "grad_norm": 12.207472801208496, "learning_rate": 2.257918295654145e-05, "loss": 1.7831, "step": 37960 }, { "epoch": 6.4541900390957, "grad_norm": 18.973634719848633, "learning_rate": 2.25763499348405e-05, "loss": 1.7922, "step": 37970 }, { "epoch": 6.455889852116267, "grad_norm": 18.72721290588379, "learning_rate": 2.2573516913139556e-05, "loss": 1.6221, "step": 37980 }, { "epoch": 6.457589665136835, "grad_norm": 12.797273635864258, "learning_rate": 2.257068389143861e-05, "loss": 1.7, "step": 37990 }, { "epoch": 6.459289478157403, "grad_norm": 13.456064224243164, "learning_rate": 2.2567850869737663e-05, "loss": 1.5955, "step": 38000 }, { "epoch": 6.46098929117797, "grad_norm": 15.098454475402832, "learning_rate": 2.2565017848036717e-05, "loss": 1.7703, "step": 38010 }, { "epoch": 6.462689104198538, "grad_norm": 12.486918449401855, "learning_rate": 2.256218482633577e-05, "loss": 1.928, "step": 38020 }, { "epoch": 6.464388917219106, "grad_norm": 11.164590835571289, "learning_rate": 2.2559351804634824e-05, "loss": 1.6264, "step": 38030 }, { "epoch": 6.466088730239673, "grad_norm": 15.464624404907227, "learning_rate": 2.255651878293388e-05, "loss": 1.8195, "step": 38040 }, { "epoch": 6.467788543260242, "grad_norm": 13.453564643859863, "learning_rate": 2.255368576123293e-05, "loss": 1.5018, "step": 38050 }, { "epoch": 6.469488356280809, "grad_norm": 15.392379760742188, "learning_rate": 2.2550852739531984e-05, "loss": 1.6863, "step": 38060 }, { "epoch": 6.471188169301377, "grad_norm": 16.624008178710938, "learning_rate": 2.254801971783104e-05, "loss": 1.8561, "step": 38070 }, { "epoch": 6.472887982321945, "grad_norm": 13.872031211853027, "learning_rate": 2.2545186696130095e-05, "loss": 1.7951, "step": 38080 }, { "epoch": 6.474587795342512, "grad_norm": 11.022936820983887, "learning_rate": 2.2542353674429145e-05, "loss": 1.8451, "step": 38090 }, { "epoch": 6.47628760836308, "grad_norm": 11.710686683654785, "learning_rate": 2.2539520652728202e-05, "loss": 1.899, "step": 38100 }, { "epoch": 6.477987421383648, "grad_norm": 12.690605163574219, "learning_rate": 2.2536687631027255e-05, "loss": 1.7751, "step": 38110 }, { "epoch": 6.479687234404215, "grad_norm": 16.4415340423584, "learning_rate": 2.2533854609326305e-05, "loss": 1.9353, "step": 38120 }, { "epoch": 6.481387047424783, "grad_norm": 29.38494110107422, "learning_rate": 2.2531021587625362e-05, "loss": 1.6505, "step": 38130 }, { "epoch": 6.483086860445351, "grad_norm": 10.494175910949707, "learning_rate": 2.2528188565924416e-05, "loss": 1.9467, "step": 38140 }, { "epoch": 6.484786673465919, "grad_norm": 11.381546974182129, "learning_rate": 2.252535554422347e-05, "loss": 1.8621, "step": 38150 }, { "epoch": 6.486486486486487, "grad_norm": 13.917597770690918, "learning_rate": 2.2522522522522523e-05, "loss": 1.4152, "step": 38160 }, { "epoch": 6.4881862995070545, "grad_norm": 16.778827667236328, "learning_rate": 2.2519689500821576e-05, "loss": 1.6935, "step": 38170 }, { "epoch": 6.489886112527622, "grad_norm": 11.425920486450195, "learning_rate": 2.251685647912063e-05, "loss": 2.0403, "step": 38180 }, { "epoch": 6.49158592554819, "grad_norm": 13.319698333740234, "learning_rate": 2.2514023457419687e-05, "loss": 1.723, "step": 38190 }, { "epoch": 6.4932857385687575, "grad_norm": 14.685376167297363, "learning_rate": 2.2511190435718737e-05, "loss": 1.8999, "step": 38200 }, { "epoch": 6.494985551589325, "grad_norm": 15.83808422088623, "learning_rate": 2.250835741401779e-05, "loss": 1.8351, "step": 38210 }, { "epoch": 6.496685364609893, "grad_norm": 14.62051010131836, "learning_rate": 2.2505524392316847e-05, "loss": 2.0382, "step": 38220 }, { "epoch": 6.49838517763046, "grad_norm": 20.3931884765625, "learning_rate": 2.25026913706159e-05, "loss": 1.8677, "step": 38230 }, { "epoch": 6.500084990651028, "grad_norm": 14.202239036560059, "learning_rate": 2.249985834891495e-05, "loss": 2.0491, "step": 38240 }, { "epoch": 6.501784803671596, "grad_norm": 16.88774299621582, "learning_rate": 2.2497025327214008e-05, "loss": 1.7203, "step": 38250 }, { "epoch": 6.503484616692164, "grad_norm": 11.798806190490723, "learning_rate": 2.249419230551306e-05, "loss": 2.0089, "step": 38260 }, { "epoch": 6.505184429712732, "grad_norm": 25.81189727783203, "learning_rate": 2.249135928381211e-05, "loss": 1.9177, "step": 38270 }, { "epoch": 6.5068842427333, "grad_norm": 16.67625617980957, "learning_rate": 2.248852626211117e-05, "loss": 1.8616, "step": 38280 }, { "epoch": 6.508584055753867, "grad_norm": 13.00487995147705, "learning_rate": 2.2485693240410222e-05, "loss": 1.8555, "step": 38290 }, { "epoch": 6.510283868774435, "grad_norm": 10.442505836486816, "learning_rate": 2.2482860218709276e-05, "loss": 2.1096, "step": 38300 }, { "epoch": 6.5119836817950025, "grad_norm": 14.338397026062012, "learning_rate": 2.248002719700833e-05, "loss": 1.6844, "step": 38310 }, { "epoch": 6.51368349481557, "grad_norm": 12.078507423400879, "learning_rate": 2.2477194175307383e-05, "loss": 1.7946, "step": 38320 }, { "epoch": 6.515383307836138, "grad_norm": 16.85171890258789, "learning_rate": 2.2474361153606436e-05, "loss": 1.5633, "step": 38330 }, { "epoch": 6.5170831208567055, "grad_norm": 13.340333938598633, "learning_rate": 2.2471528131905493e-05, "loss": 1.8828, "step": 38340 }, { "epoch": 6.518782933877273, "grad_norm": 19.814903259277344, "learning_rate": 2.2468695110204543e-05, "loss": 1.7972, "step": 38350 }, { "epoch": 6.520482746897841, "grad_norm": 12.41966724395752, "learning_rate": 2.2465862088503597e-05, "loss": 1.6848, "step": 38360 }, { "epoch": 6.522182559918409, "grad_norm": 21.906469345092773, "learning_rate": 2.2463029066802654e-05, "loss": 1.6244, "step": 38370 }, { "epoch": 6.523882372938977, "grad_norm": 15.743451118469238, "learning_rate": 2.2460196045101707e-05, "loss": 1.873, "step": 38380 }, { "epoch": 6.525582185959545, "grad_norm": 15.142967224121094, "learning_rate": 2.2457363023400757e-05, "loss": 2.2176, "step": 38390 }, { "epoch": 6.527281998980112, "grad_norm": 18.496522903442383, "learning_rate": 2.2454530001699814e-05, "loss": 1.6722, "step": 38400 }, { "epoch": 6.52898181200068, "grad_norm": 10.644270896911621, "learning_rate": 2.2451696979998868e-05, "loss": 1.8589, "step": 38410 }, { "epoch": 6.530681625021248, "grad_norm": 15.538422584533691, "learning_rate": 2.244886395829792e-05, "loss": 1.7877, "step": 38420 }, { "epoch": 6.532381438041815, "grad_norm": 17.686439514160156, "learning_rate": 2.2446030936596975e-05, "loss": 1.7919, "step": 38430 }, { "epoch": 6.534081251062383, "grad_norm": 19.152658462524414, "learning_rate": 2.2443197914896028e-05, "loss": 1.5368, "step": 38440 }, { "epoch": 6.535781064082951, "grad_norm": 16.102216720581055, "learning_rate": 2.2440364893195082e-05, "loss": 1.7519, "step": 38450 }, { "epoch": 6.537480877103519, "grad_norm": 16.374696731567383, "learning_rate": 2.2437531871494135e-05, "loss": 1.9367, "step": 38460 }, { "epoch": 6.539180690124086, "grad_norm": 16.573686599731445, "learning_rate": 2.243469884979319e-05, "loss": 1.739, "step": 38470 }, { "epoch": 6.540880503144654, "grad_norm": 12.121818542480469, "learning_rate": 2.2431865828092242e-05, "loss": 1.8733, "step": 38480 }, { "epoch": 6.542580316165222, "grad_norm": 12.75258731842041, "learning_rate": 2.24290328063913e-05, "loss": 1.8198, "step": 38490 }, { "epoch": 6.54428012918579, "grad_norm": 12.168744087219238, "learning_rate": 2.242619978469035e-05, "loss": 1.8004, "step": 38500 }, { "epoch": 6.545979942206357, "grad_norm": 13.84501838684082, "learning_rate": 2.2423366762989403e-05, "loss": 1.7745, "step": 38510 }, { "epoch": 6.547679755226925, "grad_norm": 14.10037899017334, "learning_rate": 2.242053374128846e-05, "loss": 1.9558, "step": 38520 }, { "epoch": 6.549379568247493, "grad_norm": 12.32919979095459, "learning_rate": 2.2417700719587513e-05, "loss": 1.8385, "step": 38530 }, { "epoch": 6.55107938126806, "grad_norm": 12.576639175415039, "learning_rate": 2.2414867697886567e-05, "loss": 1.7857, "step": 38540 }, { "epoch": 6.552779194288628, "grad_norm": 11.108550071716309, "learning_rate": 2.241203467618562e-05, "loss": 1.5751, "step": 38550 }, { "epoch": 6.554479007309196, "grad_norm": 13.781158447265625, "learning_rate": 2.2409201654484674e-05, "loss": 1.7679, "step": 38560 }, { "epoch": 6.556178820329764, "grad_norm": 14.25123119354248, "learning_rate": 2.240636863278373e-05, "loss": 1.8381, "step": 38570 }, { "epoch": 6.557878633350332, "grad_norm": 20.42469596862793, "learning_rate": 2.240353561108278e-05, "loss": 1.7591, "step": 38580 }, { "epoch": 6.5595784463708995, "grad_norm": 20.996849060058594, "learning_rate": 2.2400702589381834e-05, "loss": 1.5011, "step": 38590 }, { "epoch": 6.561278259391467, "grad_norm": 13.863409042358398, "learning_rate": 2.239786956768089e-05, "loss": 1.7539, "step": 38600 }, { "epoch": 6.562978072412035, "grad_norm": 16.433263778686523, "learning_rate": 2.2395036545979945e-05, "loss": 1.9151, "step": 38610 }, { "epoch": 6.5646778854326024, "grad_norm": 23.74393653869629, "learning_rate": 2.2392203524278995e-05, "loss": 1.8492, "step": 38620 }, { "epoch": 6.56637769845317, "grad_norm": 17.28091812133789, "learning_rate": 2.2389370502578052e-05, "loss": 1.9522, "step": 38630 }, { "epoch": 6.568077511473738, "grad_norm": 12.920809745788574, "learning_rate": 2.2386537480877105e-05, "loss": 1.8058, "step": 38640 }, { "epoch": 6.569777324494305, "grad_norm": 10.833897590637207, "learning_rate": 2.2383704459176156e-05, "loss": 1.7855, "step": 38650 }, { "epoch": 6.571477137514873, "grad_norm": 15.153417587280273, "learning_rate": 2.2380871437475213e-05, "loss": 1.6635, "step": 38660 }, { "epoch": 6.573176950535441, "grad_norm": 19.374814987182617, "learning_rate": 2.2378038415774266e-05, "loss": 1.8524, "step": 38670 }, { "epoch": 6.574876763556009, "grad_norm": 13.041993141174316, "learning_rate": 2.237520539407332e-05, "loss": 1.7646, "step": 38680 }, { "epoch": 6.576576576576577, "grad_norm": 10.644253730773926, "learning_rate": 2.2372372372372373e-05, "loss": 1.8503, "step": 38690 }, { "epoch": 6.578276389597145, "grad_norm": 10.895718574523926, "learning_rate": 2.2369539350671427e-05, "loss": 1.9639, "step": 38700 }, { "epoch": 6.579976202617712, "grad_norm": 11.846012115478516, "learning_rate": 2.236670632897048e-05, "loss": 1.8769, "step": 38710 }, { "epoch": 6.58167601563828, "grad_norm": 20.15083122253418, "learning_rate": 2.2363873307269537e-05, "loss": 1.8502, "step": 38720 }, { "epoch": 6.5833758286588475, "grad_norm": 18.60836410522461, "learning_rate": 2.2361040285568587e-05, "loss": 2.0108, "step": 38730 }, { "epoch": 6.585075641679415, "grad_norm": 15.689933776855469, "learning_rate": 2.235820726386764e-05, "loss": 1.6403, "step": 38740 }, { "epoch": 6.586775454699983, "grad_norm": 10.313735008239746, "learning_rate": 2.2355374242166698e-05, "loss": 1.8117, "step": 38750 }, { "epoch": 6.5884752677205505, "grad_norm": 11.664031982421875, "learning_rate": 2.235254122046575e-05, "loss": 1.8703, "step": 38760 }, { "epoch": 6.590175080741118, "grad_norm": 19.60015869140625, "learning_rate": 2.23497081987648e-05, "loss": 1.6402, "step": 38770 }, { "epoch": 6.591874893761686, "grad_norm": 14.177414894104004, "learning_rate": 2.2346875177063858e-05, "loss": 1.8778, "step": 38780 }, { "epoch": 6.593574706782254, "grad_norm": 11.102758407592773, "learning_rate": 2.234404215536291e-05, "loss": 1.7606, "step": 38790 }, { "epoch": 6.595274519802822, "grad_norm": 15.84093952178955, "learning_rate": 2.2341209133661962e-05, "loss": 1.6908, "step": 38800 }, { "epoch": 6.59697433282339, "grad_norm": 21.62769889831543, "learning_rate": 2.233837611196102e-05, "loss": 1.7991, "step": 38810 }, { "epoch": 6.598674145843957, "grad_norm": 15.591104507446289, "learning_rate": 2.2335543090260072e-05, "loss": 1.6964, "step": 38820 }, { "epoch": 6.600373958864525, "grad_norm": 7.846480369567871, "learning_rate": 2.2332710068559126e-05, "loss": 1.7559, "step": 38830 }, { "epoch": 6.602073771885093, "grad_norm": 16.315448760986328, "learning_rate": 2.232987704685818e-05, "loss": 1.8925, "step": 38840 }, { "epoch": 6.60377358490566, "grad_norm": 12.793950080871582, "learning_rate": 2.2327044025157233e-05, "loss": 1.7733, "step": 38850 }, { "epoch": 6.605473397926228, "grad_norm": 16.15806770324707, "learning_rate": 2.2324211003456286e-05, "loss": 1.8756, "step": 38860 }, { "epoch": 6.6071732109467955, "grad_norm": 23.946779251098633, "learning_rate": 2.2321377981755343e-05, "loss": 1.7387, "step": 38870 }, { "epoch": 6.608873023967363, "grad_norm": 13.470664024353027, "learning_rate": 2.2318544960054393e-05, "loss": 1.8707, "step": 38880 }, { "epoch": 6.610572836987931, "grad_norm": 17.479257583618164, "learning_rate": 2.2315711938353447e-05, "loss": 1.4438, "step": 38890 }, { "epoch": 6.612272650008499, "grad_norm": 15.856487274169922, "learning_rate": 2.2312878916652504e-05, "loss": 1.814, "step": 38900 }, { "epoch": 6.613972463029067, "grad_norm": 10.25173568725586, "learning_rate": 2.2310045894951557e-05, "loss": 1.7033, "step": 38910 }, { "epoch": 6.615672276049635, "grad_norm": 18.258907318115234, "learning_rate": 2.2307212873250607e-05, "loss": 1.5946, "step": 38920 }, { "epoch": 6.617372089070202, "grad_norm": 15.137003898620605, "learning_rate": 2.2304379851549664e-05, "loss": 1.6961, "step": 38930 }, { "epoch": 6.61907190209077, "grad_norm": 12.8327054977417, "learning_rate": 2.2301546829848718e-05, "loss": 1.7922, "step": 38940 }, { "epoch": 6.620771715111338, "grad_norm": 12.718996047973633, "learning_rate": 2.229871380814777e-05, "loss": 1.9184, "step": 38950 }, { "epoch": 6.622471528131905, "grad_norm": 8.391510963439941, "learning_rate": 2.2295880786446825e-05, "loss": 1.9165, "step": 38960 }, { "epoch": 6.624171341152473, "grad_norm": 24.380393981933594, "learning_rate": 2.229304776474588e-05, "loss": 1.8294, "step": 38970 }, { "epoch": 6.625871154173041, "grad_norm": 13.497225761413574, "learning_rate": 2.2290214743044932e-05, "loss": 1.8158, "step": 38980 }, { "epoch": 6.627570967193609, "grad_norm": 14.481985092163086, "learning_rate": 2.2287381721343986e-05, "loss": 1.7175, "step": 38990 }, { "epoch": 6.629270780214177, "grad_norm": 10.495716094970703, "learning_rate": 2.228454869964304e-05, "loss": 1.6043, "step": 39000 }, { "epoch": 6.6309705932347445, "grad_norm": 11.84445858001709, "learning_rate": 2.2281715677942093e-05, "loss": 1.7789, "step": 39010 }, { "epoch": 6.632670406255312, "grad_norm": 19.42310905456543, "learning_rate": 2.227888265624115e-05, "loss": 1.8745, "step": 39020 }, { "epoch": 6.63437021927588, "grad_norm": 17.89912986755371, "learning_rate": 2.22760496345402e-05, "loss": 1.6535, "step": 39030 }, { "epoch": 6.636070032296447, "grad_norm": 14.97494888305664, "learning_rate": 2.2273216612839253e-05, "loss": 1.7977, "step": 39040 }, { "epoch": 6.637769845317015, "grad_norm": 12.32768726348877, "learning_rate": 2.227038359113831e-05, "loss": 2.0569, "step": 39050 }, { "epoch": 6.639469658337583, "grad_norm": 13.260787963867188, "learning_rate": 2.2267550569437364e-05, "loss": 1.6238, "step": 39060 }, { "epoch": 6.64116947135815, "grad_norm": 19.01959991455078, "learning_rate": 2.2264717547736414e-05, "loss": 1.8337, "step": 39070 }, { "epoch": 6.642869284378718, "grad_norm": 14.259637832641602, "learning_rate": 2.226188452603547e-05, "loss": 1.9273, "step": 39080 }, { "epoch": 6.644569097399286, "grad_norm": 13.214277267456055, "learning_rate": 2.2259051504334524e-05, "loss": 1.847, "step": 39090 }, { "epoch": 6.646268910419854, "grad_norm": 13.833882331848145, "learning_rate": 2.2256218482633578e-05, "loss": 1.6466, "step": 39100 }, { "epoch": 6.647968723440422, "grad_norm": 15.861743927001953, "learning_rate": 2.225338546093263e-05, "loss": 2.0425, "step": 39110 }, { "epoch": 6.6496685364609895, "grad_norm": 13.330206871032715, "learning_rate": 2.2250552439231685e-05, "loss": 1.7331, "step": 39120 }, { "epoch": 6.651368349481557, "grad_norm": 13.851170539855957, "learning_rate": 2.2247719417530738e-05, "loss": 1.6662, "step": 39130 }, { "epoch": 6.653068162502125, "grad_norm": 15.824163436889648, "learning_rate": 2.2244886395829792e-05, "loss": 1.7101, "step": 39140 }, { "epoch": 6.6547679755226925, "grad_norm": 15.561247825622559, "learning_rate": 2.2242053374128845e-05, "loss": 1.9478, "step": 39150 }, { "epoch": 6.65646778854326, "grad_norm": 14.827356338500977, "learning_rate": 2.22392203524279e-05, "loss": 1.6903, "step": 39160 }, { "epoch": 6.658167601563828, "grad_norm": 14.052197456359863, "learning_rate": 2.2236387330726956e-05, "loss": 1.6983, "step": 39170 }, { "epoch": 6.6598674145843955, "grad_norm": 13.16513729095459, "learning_rate": 2.2233554309026006e-05, "loss": 2.0015, "step": 39180 }, { "epoch": 6.661567227604963, "grad_norm": 15.406061172485352, "learning_rate": 2.223072128732506e-05, "loss": 1.7673, "step": 39190 }, { "epoch": 6.663267040625531, "grad_norm": 16.382272720336914, "learning_rate": 2.2227888265624116e-05, "loss": 1.8663, "step": 39200 }, { "epoch": 6.664966853646099, "grad_norm": 14.158053398132324, "learning_rate": 2.222505524392317e-05, "loss": 1.847, "step": 39210 }, { "epoch": 6.666666666666667, "grad_norm": 12.668237686157227, "learning_rate": 2.222222222222222e-05, "loss": 1.9631, "step": 39220 }, { "epoch": 6.668366479687235, "grad_norm": 14.537093162536621, "learning_rate": 2.2219389200521277e-05, "loss": 1.7078, "step": 39230 }, { "epoch": 6.670066292707802, "grad_norm": 15.074851989746094, "learning_rate": 2.221655617882033e-05, "loss": 1.8896, "step": 39240 }, { "epoch": 6.67176610572837, "grad_norm": 14.578266143798828, "learning_rate": 2.2213723157119384e-05, "loss": 2.0119, "step": 39250 }, { "epoch": 6.673465918748938, "grad_norm": 14.927539825439453, "learning_rate": 2.2210890135418437e-05, "loss": 1.81, "step": 39260 }, { "epoch": 6.675165731769505, "grad_norm": 13.043336868286133, "learning_rate": 2.220805711371749e-05, "loss": 1.9332, "step": 39270 }, { "epoch": 6.676865544790073, "grad_norm": 13.314399719238281, "learning_rate": 2.2205224092016548e-05, "loss": 1.7816, "step": 39280 }, { "epoch": 6.6785653578106405, "grad_norm": 20.47813606262207, "learning_rate": 2.22023910703156e-05, "loss": 1.7, "step": 39290 }, { "epoch": 6.680265170831208, "grad_norm": 14.026104927062988, "learning_rate": 2.219955804861465e-05, "loss": 1.7934, "step": 39300 }, { "epoch": 6.681964983851776, "grad_norm": 12.806169509887695, "learning_rate": 2.219672502691371e-05, "loss": 1.6141, "step": 39310 }, { "epoch": 6.683664796872344, "grad_norm": 18.17342758178711, "learning_rate": 2.2193892005212762e-05, "loss": 1.9051, "step": 39320 }, { "epoch": 6.685364609892912, "grad_norm": 14.107656478881836, "learning_rate": 2.2191058983511812e-05, "loss": 1.827, "step": 39330 }, { "epoch": 6.68706442291348, "grad_norm": 17.25820541381836, "learning_rate": 2.218822596181087e-05, "loss": 1.8429, "step": 39340 }, { "epoch": 6.688764235934047, "grad_norm": 22.346847534179688, "learning_rate": 2.2185392940109922e-05, "loss": 1.7674, "step": 39350 }, { "epoch": 6.690464048954615, "grad_norm": 11.465150833129883, "learning_rate": 2.2182559918408976e-05, "loss": 1.7632, "step": 39360 }, { "epoch": 6.692163861975183, "grad_norm": 13.24609661102295, "learning_rate": 2.217972689670803e-05, "loss": 1.5851, "step": 39370 }, { "epoch": 6.69386367499575, "grad_norm": 10.383570671081543, "learning_rate": 2.2176893875007083e-05, "loss": 1.8778, "step": 39380 }, { "epoch": 6.695563488016318, "grad_norm": 13.727225303649902, "learning_rate": 2.2174060853306137e-05, "loss": 1.7654, "step": 39390 }, { "epoch": 6.697263301036886, "grad_norm": 11.35421371459961, "learning_rate": 2.2171227831605193e-05, "loss": 1.8744, "step": 39400 }, { "epoch": 6.698963114057454, "grad_norm": 26.017372131347656, "learning_rate": 2.2168394809904244e-05, "loss": 1.7932, "step": 39410 }, { "epoch": 6.700662927078022, "grad_norm": 20.028762817382812, "learning_rate": 2.2165561788203297e-05, "loss": 1.9506, "step": 39420 }, { "epoch": 6.7023627400985895, "grad_norm": 29.378690719604492, "learning_rate": 2.2162728766502354e-05, "loss": 1.9996, "step": 39430 }, { "epoch": 6.704062553119157, "grad_norm": 23.137617111206055, "learning_rate": 2.2159895744801408e-05, "loss": 1.9874, "step": 39440 }, { "epoch": 6.705762366139725, "grad_norm": 23.42072105407715, "learning_rate": 2.2157062723100458e-05, "loss": 1.8913, "step": 39450 }, { "epoch": 6.707462179160292, "grad_norm": 15.364208221435547, "learning_rate": 2.2154229701399515e-05, "loss": 1.8633, "step": 39460 }, { "epoch": 6.70916199218086, "grad_norm": 14.143489837646484, "learning_rate": 2.2151396679698568e-05, "loss": 1.7866, "step": 39470 }, { "epoch": 6.710861805201428, "grad_norm": 20.611818313598633, "learning_rate": 2.2148563657997618e-05, "loss": 1.7433, "step": 39480 }, { "epoch": 6.712561618221995, "grad_norm": 10.386760711669922, "learning_rate": 2.2145730636296675e-05, "loss": 1.8657, "step": 39490 }, { "epoch": 6.714261431242563, "grad_norm": 12.660164833068848, "learning_rate": 2.214289761459573e-05, "loss": 1.8156, "step": 39500 }, { "epoch": 6.715961244263131, "grad_norm": 17.900663375854492, "learning_rate": 2.2140064592894782e-05, "loss": 1.9001, "step": 39510 }, { "epoch": 6.717661057283699, "grad_norm": 18.472089767456055, "learning_rate": 2.2137231571193836e-05, "loss": 1.7075, "step": 39520 }, { "epoch": 6.719360870304267, "grad_norm": 13.716729164123535, "learning_rate": 2.213439854949289e-05, "loss": 1.8444, "step": 39530 }, { "epoch": 6.7210606833248345, "grad_norm": 10.534502029418945, "learning_rate": 2.2131565527791943e-05, "loss": 1.8078, "step": 39540 }, { "epoch": 6.722760496345402, "grad_norm": 9.961870193481445, "learning_rate": 2.2128732506091e-05, "loss": 1.8956, "step": 39550 }, { "epoch": 6.72446030936597, "grad_norm": 12.090106010437012, "learning_rate": 2.212589948439005e-05, "loss": 1.6128, "step": 39560 }, { "epoch": 6.7261601223865375, "grad_norm": 12.841254234313965, "learning_rate": 2.2123066462689103e-05, "loss": 1.82, "step": 39570 }, { "epoch": 6.727859935407105, "grad_norm": 12.926685333251953, "learning_rate": 2.212023344098816e-05, "loss": 1.8034, "step": 39580 }, { "epoch": 6.729559748427673, "grad_norm": 16.179576873779297, "learning_rate": 2.2117400419287214e-05, "loss": 1.9778, "step": 39590 }, { "epoch": 6.7312595614482404, "grad_norm": 18.89673614501953, "learning_rate": 2.2114567397586264e-05, "loss": 1.9207, "step": 39600 }, { "epoch": 6.732959374468808, "grad_norm": 17.161775588989258, "learning_rate": 2.211173437588532e-05, "loss": 1.9262, "step": 39610 }, { "epoch": 6.734659187489376, "grad_norm": 16.74262237548828, "learning_rate": 2.2108901354184374e-05, "loss": 1.8717, "step": 39620 }, { "epoch": 6.736359000509944, "grad_norm": 16.589946746826172, "learning_rate": 2.2106068332483428e-05, "loss": 1.9151, "step": 39630 }, { "epoch": 6.738058813530512, "grad_norm": 15.057445526123047, "learning_rate": 2.210323531078248e-05, "loss": 1.9307, "step": 39640 }, { "epoch": 6.73975862655108, "grad_norm": 12.386497497558594, "learning_rate": 2.2100402289081535e-05, "loss": 1.864, "step": 39650 }, { "epoch": 6.741458439571647, "grad_norm": 14.123252868652344, "learning_rate": 2.209756926738059e-05, "loss": 1.8732, "step": 39660 }, { "epoch": 6.743158252592215, "grad_norm": 16.90985870361328, "learning_rate": 2.2094736245679642e-05, "loss": 1.8807, "step": 39670 }, { "epoch": 6.744858065612783, "grad_norm": 18.170732498168945, "learning_rate": 2.2091903223978695e-05, "loss": 1.9626, "step": 39680 }, { "epoch": 6.74655787863335, "grad_norm": 45.20295333862305, "learning_rate": 2.208907020227775e-05, "loss": 1.6154, "step": 39690 }, { "epoch": 6.748257691653918, "grad_norm": 12.909029006958008, "learning_rate": 2.2086237180576806e-05, "loss": 1.9032, "step": 39700 }, { "epoch": 6.7499575046744855, "grad_norm": 24.080713272094727, "learning_rate": 2.2083404158875856e-05, "loss": 1.6773, "step": 39710 }, { "epoch": 6.751657317695053, "grad_norm": 14.678606033325195, "learning_rate": 2.208057113717491e-05, "loss": 1.678, "step": 39720 }, { "epoch": 6.753357130715621, "grad_norm": 18.092073440551758, "learning_rate": 2.2077738115473966e-05, "loss": 1.7604, "step": 39730 }, { "epoch": 6.755056943736189, "grad_norm": 19.45437240600586, "learning_rate": 2.207490509377302e-05, "loss": 1.8009, "step": 39740 }, { "epoch": 6.756756756756757, "grad_norm": 13.647940635681152, "learning_rate": 2.207207207207207e-05, "loss": 1.8075, "step": 39750 }, { "epoch": 6.758456569777325, "grad_norm": 16.45145034790039, "learning_rate": 2.2069239050371127e-05, "loss": 1.7651, "step": 39760 }, { "epoch": 6.760156382797892, "grad_norm": 25.800214767456055, "learning_rate": 2.206640602867018e-05, "loss": 1.9506, "step": 39770 }, { "epoch": 6.76185619581846, "grad_norm": 19.53189468383789, "learning_rate": 2.2063573006969234e-05, "loss": 1.8273, "step": 39780 }, { "epoch": 6.763556008839028, "grad_norm": 18.602561950683594, "learning_rate": 2.2060739985268288e-05, "loss": 1.8987, "step": 39790 }, { "epoch": 6.765255821859595, "grad_norm": 13.002293586730957, "learning_rate": 2.205790696356734e-05, "loss": 1.8403, "step": 39800 }, { "epoch": 6.766955634880163, "grad_norm": 14.088385581970215, "learning_rate": 2.2055073941866395e-05, "loss": 1.7842, "step": 39810 }, { "epoch": 6.768655447900731, "grad_norm": 16.0533447265625, "learning_rate": 2.205224092016545e-05, "loss": 1.6951, "step": 39820 }, { "epoch": 6.770355260921299, "grad_norm": 17.311697006225586, "learning_rate": 2.20494078984645e-05, "loss": 1.9722, "step": 39830 }, { "epoch": 6.772055073941866, "grad_norm": 13.948801040649414, "learning_rate": 2.2046574876763555e-05, "loss": 1.7406, "step": 39840 }, { "epoch": 6.773754886962434, "grad_norm": 15.959355354309082, "learning_rate": 2.2043741855062612e-05, "loss": 1.8207, "step": 39850 }, { "epoch": 6.775454699983002, "grad_norm": 17.82697105407715, "learning_rate": 2.2040908833361662e-05, "loss": 1.8454, "step": 39860 }, { "epoch": 6.77715451300357, "grad_norm": 17.63559341430664, "learning_rate": 2.2038075811660716e-05, "loss": 1.8431, "step": 39870 }, { "epoch": 6.778854326024137, "grad_norm": 13.916031837463379, "learning_rate": 2.2035242789959773e-05, "loss": 1.7823, "step": 39880 }, { "epoch": 6.780554139044705, "grad_norm": 14.358513832092285, "learning_rate": 2.2032409768258826e-05, "loss": 1.7795, "step": 39890 }, { "epoch": 6.782253952065273, "grad_norm": 16.441747665405273, "learning_rate": 2.2029576746557876e-05, "loss": 1.7698, "step": 39900 }, { "epoch": 6.78395376508584, "grad_norm": 16.6614933013916, "learning_rate": 2.2026743724856933e-05, "loss": 1.7516, "step": 39910 }, { "epoch": 6.785653578106408, "grad_norm": 11.661684036254883, "learning_rate": 2.2023910703155987e-05, "loss": 1.64, "step": 39920 }, { "epoch": 6.787353391126976, "grad_norm": 13.848204612731934, "learning_rate": 2.202107768145504e-05, "loss": 1.5482, "step": 39930 }, { "epoch": 6.789053204147544, "grad_norm": 14.123588562011719, "learning_rate": 2.2018244659754094e-05, "loss": 1.6398, "step": 39940 }, { "epoch": 6.790753017168112, "grad_norm": 20.105316162109375, "learning_rate": 2.2015411638053147e-05, "loss": 1.7088, "step": 39950 }, { "epoch": 6.7924528301886795, "grad_norm": 16.625123977661133, "learning_rate": 2.20125786163522e-05, "loss": 1.8561, "step": 39960 }, { "epoch": 6.794152643209247, "grad_norm": 17.37016487121582, "learning_rate": 2.2009745594651258e-05, "loss": 2.0572, "step": 39970 }, { "epoch": 6.795852456229815, "grad_norm": 21.555801391601562, "learning_rate": 2.2006912572950308e-05, "loss": 1.7297, "step": 39980 }, { "epoch": 6.7975522692503825, "grad_norm": 27.27083396911621, "learning_rate": 2.2004079551249365e-05, "loss": 1.6985, "step": 39990 }, { "epoch": 6.79925208227095, "grad_norm": 14.34648609161377, "learning_rate": 2.200124652954842e-05, "loss": 1.6606, "step": 40000 }, { "epoch": 6.800951895291518, "grad_norm": 32.7663688659668, "learning_rate": 2.199841350784747e-05, "loss": 1.826, "step": 40010 }, { "epoch": 6.802651708312085, "grad_norm": 19.98823356628418, "learning_rate": 2.1995580486146525e-05, "loss": 1.8759, "step": 40020 }, { "epoch": 6.804351521332653, "grad_norm": 16.601240158081055, "learning_rate": 2.199274746444558e-05, "loss": 1.9194, "step": 40030 }, { "epoch": 6.806051334353221, "grad_norm": 13.727110862731934, "learning_rate": 2.1989914442744632e-05, "loss": 1.8833, "step": 40040 }, { "epoch": 6.807751147373789, "grad_norm": 20.813945770263672, "learning_rate": 2.1987081421043686e-05, "loss": 1.7304, "step": 40050 }, { "epoch": 6.809450960394357, "grad_norm": 16.324954986572266, "learning_rate": 2.198424839934274e-05, "loss": 1.9604, "step": 40060 }, { "epoch": 6.811150773414925, "grad_norm": 13.836637496948242, "learning_rate": 2.1981415377641793e-05, "loss": 1.8296, "step": 40070 }, { "epoch": 6.812850586435492, "grad_norm": 16.319473266601562, "learning_rate": 2.197858235594085e-05, "loss": 1.8692, "step": 40080 }, { "epoch": 6.81455039945606, "grad_norm": 13.501866340637207, "learning_rate": 2.19757493342399e-05, "loss": 1.82, "step": 40090 }, { "epoch": 6.8162502124766275, "grad_norm": 17.122175216674805, "learning_rate": 2.1972916312538954e-05, "loss": 1.5765, "step": 40100 }, { "epoch": 6.817950025497195, "grad_norm": 31.89051628112793, "learning_rate": 2.197008329083801e-05, "loss": 1.9613, "step": 40110 }, { "epoch": 6.819649838517763, "grad_norm": 10.631219863891602, "learning_rate": 2.1967250269137064e-05, "loss": 1.8214, "step": 40120 }, { "epoch": 6.8213496515383305, "grad_norm": 13.010945320129395, "learning_rate": 2.1964417247436114e-05, "loss": 1.7612, "step": 40130 }, { "epoch": 6.823049464558898, "grad_norm": 13.744952201843262, "learning_rate": 2.196158422573517e-05, "loss": 1.7097, "step": 40140 }, { "epoch": 6.824749277579466, "grad_norm": 15.812854766845703, "learning_rate": 2.1958751204034225e-05, "loss": 1.6373, "step": 40150 }, { "epoch": 6.826449090600034, "grad_norm": 15.786505699157715, "learning_rate": 2.1955918182333278e-05, "loss": 1.6745, "step": 40160 }, { "epoch": 6.828148903620602, "grad_norm": 20.244739532470703, "learning_rate": 2.195308516063233e-05, "loss": 1.7302, "step": 40170 }, { "epoch": 6.82984871664117, "grad_norm": 15.46937084197998, "learning_rate": 2.1950252138931385e-05, "loss": 2.1441, "step": 40180 }, { "epoch": 6.831548529661737, "grad_norm": 19.126502990722656, "learning_rate": 2.194741911723044e-05, "loss": 1.6145, "step": 40190 }, { "epoch": 6.833248342682305, "grad_norm": 14.432228088378906, "learning_rate": 2.1944586095529492e-05, "loss": 1.7598, "step": 40200 }, { "epoch": 6.834948155702873, "grad_norm": 19.153173446655273, "learning_rate": 2.1941753073828546e-05, "loss": 1.6642, "step": 40210 }, { "epoch": 6.83664796872344, "grad_norm": 19.103364944458008, "learning_rate": 2.19389200521276e-05, "loss": 1.8534, "step": 40220 }, { "epoch": 6.838347781744008, "grad_norm": 10.324997901916504, "learning_rate": 2.1936087030426656e-05, "loss": 2.007, "step": 40230 }, { "epoch": 6.840047594764576, "grad_norm": 18.444440841674805, "learning_rate": 2.1933254008725706e-05, "loss": 1.7146, "step": 40240 }, { "epoch": 6.841747407785144, "grad_norm": 22.923429489135742, "learning_rate": 2.193042098702476e-05, "loss": 1.8261, "step": 40250 }, { "epoch": 6.843447220805711, "grad_norm": 18.258995056152344, "learning_rate": 2.1927587965323817e-05, "loss": 1.7219, "step": 40260 }, { "epoch": 6.845147033826279, "grad_norm": 15.590622901916504, "learning_rate": 2.192475494362287e-05, "loss": 1.7554, "step": 40270 }, { "epoch": 6.846846846846847, "grad_norm": 11.935357093811035, "learning_rate": 2.192192192192192e-05, "loss": 1.745, "step": 40280 }, { "epoch": 6.848546659867415, "grad_norm": 17.120054244995117, "learning_rate": 2.1919088900220977e-05, "loss": 1.7602, "step": 40290 }, { "epoch": 6.850246472887982, "grad_norm": 8.296123504638672, "learning_rate": 2.191625587852003e-05, "loss": 1.7796, "step": 40300 }, { "epoch": 6.85194628590855, "grad_norm": 13.551980018615723, "learning_rate": 2.1913422856819084e-05, "loss": 1.9403, "step": 40310 }, { "epoch": 6.853646098929118, "grad_norm": 13.351761817932129, "learning_rate": 2.1910589835118138e-05, "loss": 1.8502, "step": 40320 }, { "epoch": 6.855345911949685, "grad_norm": 17.06722640991211, "learning_rate": 2.190775681341719e-05, "loss": 1.9365, "step": 40330 }, { "epoch": 6.857045724970253, "grad_norm": 16.506698608398438, "learning_rate": 2.1904923791716245e-05, "loss": 1.8001, "step": 40340 }, { "epoch": 6.858745537990821, "grad_norm": 14.57226276397705, "learning_rate": 2.19020907700153e-05, "loss": 1.9055, "step": 40350 }, { "epoch": 6.860445351011389, "grad_norm": 20.692947387695312, "learning_rate": 2.1899257748314352e-05, "loss": 1.9031, "step": 40360 }, { "epoch": 6.862145164031957, "grad_norm": 16.48172378540039, "learning_rate": 2.1896424726613405e-05, "loss": 1.7152, "step": 40370 }, { "epoch": 6.8638449770525245, "grad_norm": 13.97290325164795, "learning_rate": 2.1893591704912462e-05, "loss": 1.7076, "step": 40380 }, { "epoch": 6.865544790073092, "grad_norm": 16.34535026550293, "learning_rate": 2.1890758683211512e-05, "loss": 2.0131, "step": 40390 }, { "epoch": 6.86724460309366, "grad_norm": 12.211677551269531, "learning_rate": 2.1887925661510566e-05, "loss": 1.7457, "step": 40400 }, { "epoch": 6.8689444161142275, "grad_norm": 17.623796463012695, "learning_rate": 2.1885092639809623e-05, "loss": 1.8474, "step": 40410 }, { "epoch": 6.870644229134795, "grad_norm": 17.91983413696289, "learning_rate": 2.1882259618108676e-05, "loss": 1.693, "step": 40420 }, { "epoch": 6.872344042155363, "grad_norm": 15.050071716308594, "learning_rate": 2.1879426596407727e-05, "loss": 1.7477, "step": 40430 }, { "epoch": 6.87404385517593, "grad_norm": 15.889287948608398, "learning_rate": 2.1876593574706783e-05, "loss": 1.9379, "step": 40440 }, { "epoch": 6.875743668196498, "grad_norm": 15.55554485321045, "learning_rate": 2.1873760553005837e-05, "loss": 1.8044, "step": 40450 }, { "epoch": 6.877443481217066, "grad_norm": 14.69914436340332, "learning_rate": 2.187092753130489e-05, "loss": 1.7718, "step": 40460 }, { "epoch": 6.879143294237634, "grad_norm": 15.490782737731934, "learning_rate": 2.1868094509603944e-05, "loss": 1.7088, "step": 40470 }, { "epoch": 6.880843107258202, "grad_norm": 12.964055061340332, "learning_rate": 2.1865261487902998e-05, "loss": 1.996, "step": 40480 }, { "epoch": 6.88254292027877, "grad_norm": 16.112831115722656, "learning_rate": 2.186242846620205e-05, "loss": 1.8826, "step": 40490 }, { "epoch": 6.884242733299337, "grad_norm": 13.745627403259277, "learning_rate": 2.1859595444501108e-05, "loss": 1.7787, "step": 40500 }, { "epoch": 6.885942546319905, "grad_norm": 18.555599212646484, "learning_rate": 2.1856762422800158e-05, "loss": 1.8241, "step": 40510 }, { "epoch": 6.8876423593404725, "grad_norm": 12.15076732635498, "learning_rate": 2.185392940109921e-05, "loss": 1.6031, "step": 40520 }, { "epoch": 6.88934217236104, "grad_norm": 13.621265411376953, "learning_rate": 2.185109637939827e-05, "loss": 1.708, "step": 40530 }, { "epoch": 6.891041985381608, "grad_norm": 15.177295684814453, "learning_rate": 2.184826335769732e-05, "loss": 1.7397, "step": 40540 }, { "epoch": 6.8927417984021755, "grad_norm": 12.679082870483398, "learning_rate": 2.1845430335996372e-05, "loss": 1.6773, "step": 40550 }, { "epoch": 6.894441611422743, "grad_norm": 8.465753555297852, "learning_rate": 2.184259731429543e-05, "loss": 1.6802, "step": 40560 }, { "epoch": 6.896141424443311, "grad_norm": 20.862220764160156, "learning_rate": 2.1839764292594483e-05, "loss": 1.7934, "step": 40570 }, { "epoch": 6.897841237463879, "grad_norm": 35.82673263549805, "learning_rate": 2.1836931270893533e-05, "loss": 1.9455, "step": 40580 }, { "epoch": 6.899541050484447, "grad_norm": 13.261860847473145, "learning_rate": 2.183409824919259e-05, "loss": 1.7399, "step": 40590 }, { "epoch": 6.901240863505015, "grad_norm": 18.14429473876953, "learning_rate": 2.1831265227491643e-05, "loss": 1.8248, "step": 40600 }, { "epoch": 6.902940676525582, "grad_norm": 11.011076927185059, "learning_rate": 2.1828432205790697e-05, "loss": 1.9677, "step": 40610 }, { "epoch": 6.90464048954615, "grad_norm": 14.984106063842773, "learning_rate": 2.182559918408975e-05, "loss": 1.7791, "step": 40620 }, { "epoch": 6.906340302566718, "grad_norm": 11.890995025634766, "learning_rate": 2.1822766162388804e-05, "loss": 1.7138, "step": 40630 }, { "epoch": 6.908040115587285, "grad_norm": 17.119720458984375, "learning_rate": 2.1819933140687857e-05, "loss": 1.8195, "step": 40640 }, { "epoch": 6.909739928607853, "grad_norm": 28.209739685058594, "learning_rate": 2.1817100118986914e-05, "loss": 1.6744, "step": 40650 }, { "epoch": 6.911439741628421, "grad_norm": 13.859565734863281, "learning_rate": 2.1814267097285964e-05, "loss": 1.8869, "step": 40660 }, { "epoch": 6.913139554648989, "grad_norm": 16.019792556762695, "learning_rate": 2.1811434075585018e-05, "loss": 1.7562, "step": 40670 }, { "epoch": 6.914839367669556, "grad_norm": 27.069047927856445, "learning_rate": 2.1808601053884075e-05, "loss": 2.0459, "step": 40680 }, { "epoch": 6.916539180690124, "grad_norm": 11.177797317504883, "learning_rate": 2.1805768032183125e-05, "loss": 1.9115, "step": 40690 }, { "epoch": 6.918238993710692, "grad_norm": 14.65888786315918, "learning_rate": 2.180293501048218e-05, "loss": 1.6115, "step": 40700 }, { "epoch": 6.91993880673126, "grad_norm": 13.848367691040039, "learning_rate": 2.1800101988781235e-05, "loss": 1.7247, "step": 40710 }, { "epoch": 6.921638619751827, "grad_norm": 13.963351249694824, "learning_rate": 2.179726896708029e-05, "loss": 1.9065, "step": 40720 }, { "epoch": 6.923338432772395, "grad_norm": 14.066054344177246, "learning_rate": 2.1794435945379342e-05, "loss": 1.9091, "step": 40730 }, { "epoch": 6.925038245792963, "grad_norm": 14.576854705810547, "learning_rate": 2.1791602923678396e-05, "loss": 1.6093, "step": 40740 }, { "epoch": 6.92673805881353, "grad_norm": 14.028197288513184, "learning_rate": 2.178876990197745e-05, "loss": 1.8556, "step": 40750 }, { "epoch": 6.928437871834098, "grad_norm": 17.43362808227539, "learning_rate": 2.1785936880276506e-05, "loss": 1.6691, "step": 40760 }, { "epoch": 6.930137684854666, "grad_norm": 10.96235466003418, "learning_rate": 2.1783103858575556e-05, "loss": 2.0003, "step": 40770 }, { "epoch": 6.931837497875234, "grad_norm": 22.20149040222168, "learning_rate": 2.178027083687461e-05, "loss": 1.7153, "step": 40780 }, { "epoch": 6.933537310895802, "grad_norm": 13.847933769226074, "learning_rate": 2.1777437815173667e-05, "loss": 1.8055, "step": 40790 }, { "epoch": 6.9352371239163695, "grad_norm": 19.013559341430664, "learning_rate": 2.177460479347272e-05, "loss": 1.7601, "step": 40800 }, { "epoch": 6.936936936936937, "grad_norm": 12.553760528564453, "learning_rate": 2.177177177177177e-05, "loss": 2.0197, "step": 40810 }, { "epoch": 6.938636749957505, "grad_norm": 12.942500114440918, "learning_rate": 2.1768938750070827e-05, "loss": 1.6243, "step": 40820 }, { "epoch": 6.940336562978072, "grad_norm": 13.954108238220215, "learning_rate": 2.176610572836988e-05, "loss": 1.671, "step": 40830 }, { "epoch": 6.94203637599864, "grad_norm": 12.614940643310547, "learning_rate": 2.1763272706668934e-05, "loss": 1.875, "step": 40840 }, { "epoch": 6.943736189019208, "grad_norm": 9.696233749389648, "learning_rate": 2.1760439684967988e-05, "loss": 1.9206, "step": 40850 }, { "epoch": 6.945436002039775, "grad_norm": 21.164716720581055, "learning_rate": 2.175760666326704e-05, "loss": 1.7952, "step": 40860 }, { "epoch": 6.947135815060343, "grad_norm": 11.969907760620117, "learning_rate": 2.1754773641566095e-05, "loss": 1.8401, "step": 40870 }, { "epoch": 6.948835628080911, "grad_norm": 18.037927627563477, "learning_rate": 2.175194061986515e-05, "loss": 1.8541, "step": 40880 }, { "epoch": 6.950535441101479, "grad_norm": 13.372387886047363, "learning_rate": 2.1749107598164202e-05, "loss": 1.9425, "step": 40890 }, { "epoch": 6.952235254122047, "grad_norm": 21.666513442993164, "learning_rate": 2.1746274576463256e-05, "loss": 1.6179, "step": 40900 }, { "epoch": 6.953935067142615, "grad_norm": 21.843168258666992, "learning_rate": 2.1743441554762313e-05, "loss": 1.5995, "step": 40910 }, { "epoch": 6.955634880163182, "grad_norm": 11.255897521972656, "learning_rate": 2.1740608533061363e-05, "loss": 2.0146, "step": 40920 }, { "epoch": 6.95733469318375, "grad_norm": 12.39660358428955, "learning_rate": 2.1737775511360416e-05, "loss": 1.8229, "step": 40930 }, { "epoch": 6.9590345062043175, "grad_norm": 12.918526649475098, "learning_rate": 2.1734942489659473e-05, "loss": 2.0281, "step": 40940 }, { "epoch": 6.960734319224885, "grad_norm": 18.15119743347168, "learning_rate": 2.1732109467958527e-05, "loss": 1.8669, "step": 40950 }, { "epoch": 6.962434132245453, "grad_norm": 13.520706176757812, "learning_rate": 2.1729276446257577e-05, "loss": 1.8661, "step": 40960 }, { "epoch": 6.9641339452660205, "grad_norm": 22.64904022216797, "learning_rate": 2.1726443424556634e-05, "loss": 1.7765, "step": 40970 }, { "epoch": 6.965833758286588, "grad_norm": 15.96713638305664, "learning_rate": 2.1723610402855687e-05, "loss": 1.8682, "step": 40980 }, { "epoch": 6.967533571307156, "grad_norm": 19.703489303588867, "learning_rate": 2.172077738115474e-05, "loss": 1.7791, "step": 40990 }, { "epoch": 6.969233384327724, "grad_norm": 13.268729209899902, "learning_rate": 2.1717944359453794e-05, "loss": 1.7813, "step": 41000 }, { "epoch": 6.970933197348292, "grad_norm": 21.234525680541992, "learning_rate": 2.1715111337752848e-05, "loss": 1.7378, "step": 41010 }, { "epoch": 6.97263301036886, "grad_norm": 11.1755952835083, "learning_rate": 2.17122783160519e-05, "loss": 1.8154, "step": 41020 }, { "epoch": 6.974332823389427, "grad_norm": 15.32410717010498, "learning_rate": 2.1709445294350958e-05, "loss": 1.7147, "step": 41030 }, { "epoch": 6.976032636409995, "grad_norm": 9.352566719055176, "learning_rate": 2.1706612272650008e-05, "loss": 1.6423, "step": 41040 }, { "epoch": 6.977732449430563, "grad_norm": 12.274124145507812, "learning_rate": 2.1703779250949062e-05, "loss": 1.7848, "step": 41050 }, { "epoch": 6.97943226245113, "grad_norm": 12.104490280151367, "learning_rate": 2.170094622924812e-05, "loss": 1.8604, "step": 41060 }, { "epoch": 6.981132075471698, "grad_norm": 17.189889907836914, "learning_rate": 2.169811320754717e-05, "loss": 2.035, "step": 41070 }, { "epoch": 6.9828318884922655, "grad_norm": 11.55855655670166, "learning_rate": 2.1695280185846222e-05, "loss": 1.7295, "step": 41080 }, { "epoch": 6.984531701512833, "grad_norm": 14.293230056762695, "learning_rate": 2.169244716414528e-05, "loss": 1.7114, "step": 41090 }, { "epoch": 6.986231514533401, "grad_norm": 21.494752883911133, "learning_rate": 2.1689614142444333e-05, "loss": 1.8216, "step": 41100 }, { "epoch": 6.987931327553969, "grad_norm": 15.359042167663574, "learning_rate": 2.1686781120743383e-05, "loss": 1.6573, "step": 41110 }, { "epoch": 6.989631140574537, "grad_norm": 14.179692268371582, "learning_rate": 2.168394809904244e-05, "loss": 1.7024, "step": 41120 }, { "epoch": 6.991330953595105, "grad_norm": 10.411420822143555, "learning_rate": 2.1681115077341493e-05, "loss": 1.8527, "step": 41130 }, { "epoch": 6.993030766615672, "grad_norm": 13.1371431350708, "learning_rate": 2.1678282055640547e-05, "loss": 1.7995, "step": 41140 }, { "epoch": 6.99473057963624, "grad_norm": 18.10720443725586, "learning_rate": 2.16754490339396e-05, "loss": 1.6907, "step": 41150 }, { "epoch": 6.996430392656808, "grad_norm": 16.14596939086914, "learning_rate": 2.1672616012238654e-05, "loss": 1.9473, "step": 41160 }, { "epoch": 6.998130205677375, "grad_norm": 12.08849048614502, "learning_rate": 2.1669782990537707e-05, "loss": 1.6565, "step": 41170 }, { "epoch": 6.999830018697943, "grad_norm": 23.59228515625, "learning_rate": 2.1666949968836764e-05, "loss": 1.7687, "step": 41180 }, { "epoch": 7.0, "eval_cer": 1.0, "eval_loss": 2.4754433631896973, "eval_runtime": 1957.5832, "eval_samples_per_second": 0.241, "eval_steps_per_second": 0.241, "step": 41181 }, { "epoch": 7.001529831718511, "grad_norm": 21.026599884033203, "learning_rate": 2.1664116947135815e-05, "loss": 1.6785, "step": 41190 }, { "epoch": 7.003229644739078, "grad_norm": 11.7343111038208, "learning_rate": 2.1661283925434868e-05, "loss": 1.825, "step": 41200 }, { "epoch": 7.004929457759647, "grad_norm": 13.175420761108398, "learning_rate": 2.1658450903733925e-05, "loss": 1.6516, "step": 41210 }, { "epoch": 7.0066292707802145, "grad_norm": 12.614302635192871, "learning_rate": 2.1655617882032975e-05, "loss": 1.7303, "step": 41220 }, { "epoch": 7.008329083800782, "grad_norm": 16.900415420532227, "learning_rate": 2.165278486033203e-05, "loss": 1.5006, "step": 41230 }, { "epoch": 7.01002889682135, "grad_norm": 17.28676986694336, "learning_rate": 2.1649951838631086e-05, "loss": 1.5981, "step": 41240 }, { "epoch": 7.011728709841917, "grad_norm": 12.548722267150879, "learning_rate": 2.164711881693014e-05, "loss": 1.6121, "step": 41250 }, { "epoch": 7.013428522862485, "grad_norm": 17.129438400268555, "learning_rate": 2.164428579522919e-05, "loss": 1.6232, "step": 41260 }, { "epoch": 7.015128335883053, "grad_norm": 17.680025100708008, "learning_rate": 2.1641452773528246e-05, "loss": 1.6319, "step": 41270 }, { "epoch": 7.01682814890362, "grad_norm": 11.517790794372559, "learning_rate": 2.16386197518273e-05, "loss": 1.5411, "step": 41280 }, { "epoch": 7.018527961924188, "grad_norm": 13.18049430847168, "learning_rate": 2.1635786730126353e-05, "loss": 1.8367, "step": 41290 }, { "epoch": 7.020227774944756, "grad_norm": 14.262937545776367, "learning_rate": 2.1632953708425407e-05, "loss": 1.4782, "step": 41300 }, { "epoch": 7.021927587965324, "grad_norm": 25.706544876098633, "learning_rate": 2.163012068672446e-05, "loss": 1.5226, "step": 41310 }, { "epoch": 7.023627400985892, "grad_norm": 14.416077613830566, "learning_rate": 2.1627287665023514e-05, "loss": 1.8475, "step": 41320 }, { "epoch": 7.0253272140064595, "grad_norm": 13.43303394317627, "learning_rate": 2.162445464332257e-05, "loss": 1.6665, "step": 41330 }, { "epoch": 7.027027027027027, "grad_norm": 14.938092231750488, "learning_rate": 2.162162162162162e-05, "loss": 1.8643, "step": 41340 }, { "epoch": 7.028726840047595, "grad_norm": 12.527925491333008, "learning_rate": 2.1618788599920674e-05, "loss": 1.6021, "step": 41350 }, { "epoch": 7.0304266530681625, "grad_norm": 14.949700355529785, "learning_rate": 2.161595557821973e-05, "loss": 1.5441, "step": 41360 }, { "epoch": 7.03212646608873, "grad_norm": 12.532227516174316, "learning_rate": 2.1613122556518785e-05, "loss": 1.5198, "step": 41370 }, { "epoch": 7.033826279109298, "grad_norm": 13.410629272460938, "learning_rate": 2.1610289534817835e-05, "loss": 1.5943, "step": 41380 }, { "epoch": 7.0355260921298655, "grad_norm": 15.998420715332031, "learning_rate": 2.1607456513116892e-05, "loss": 1.5504, "step": 41390 }, { "epoch": 7.037225905150433, "grad_norm": 16.52683448791504, "learning_rate": 2.1604623491415945e-05, "loss": 1.6879, "step": 41400 }, { "epoch": 7.038925718171001, "grad_norm": 12.820547103881836, "learning_rate": 2.1601790469714995e-05, "loss": 1.4175, "step": 41410 }, { "epoch": 7.040625531191569, "grad_norm": 14.921589851379395, "learning_rate": 2.1598957448014052e-05, "loss": 1.5696, "step": 41420 }, { "epoch": 7.042325344212137, "grad_norm": 27.355344772338867, "learning_rate": 2.1596124426313106e-05, "loss": 1.7442, "step": 41430 }, { "epoch": 7.044025157232705, "grad_norm": 20.52952766418457, "learning_rate": 2.159329140461216e-05, "loss": 1.5525, "step": 41440 }, { "epoch": 7.045724970253272, "grad_norm": 13.33159351348877, "learning_rate": 2.1590458382911213e-05, "loss": 1.4024, "step": 41450 }, { "epoch": 7.04742478327384, "grad_norm": 17.582286834716797, "learning_rate": 2.1587625361210266e-05, "loss": 1.4084, "step": 41460 }, { "epoch": 7.049124596294408, "grad_norm": 23.240095138549805, "learning_rate": 2.1584792339509323e-05, "loss": 1.6462, "step": 41470 }, { "epoch": 7.050824409314975, "grad_norm": 17.41910171508789, "learning_rate": 2.1581959317808377e-05, "loss": 1.7747, "step": 41480 }, { "epoch": 7.052524222335543, "grad_norm": 11.477282524108887, "learning_rate": 2.1579126296107427e-05, "loss": 1.7158, "step": 41490 }, { "epoch": 7.0542240353561105, "grad_norm": 20.13143539428711, "learning_rate": 2.1576293274406484e-05, "loss": 1.6141, "step": 41500 }, { "epoch": 7.055923848376678, "grad_norm": 10.94710922241211, "learning_rate": 2.1573460252705537e-05, "loss": 1.6278, "step": 41510 }, { "epoch": 7.057623661397247, "grad_norm": 21.49480628967285, "learning_rate": 2.157062723100459e-05, "loss": 1.4611, "step": 41520 }, { "epoch": 7.059323474417814, "grad_norm": 15.471678733825684, "learning_rate": 2.1567794209303644e-05, "loss": 1.8215, "step": 41530 }, { "epoch": 7.061023287438382, "grad_norm": 12.526866912841797, "learning_rate": 2.1564961187602698e-05, "loss": 1.7563, "step": 41540 }, { "epoch": 7.06272310045895, "grad_norm": 14.710000991821289, "learning_rate": 2.156212816590175e-05, "loss": 1.4838, "step": 41550 }, { "epoch": 7.064422913479517, "grad_norm": 11.79326057434082, "learning_rate": 2.1559295144200805e-05, "loss": 1.7388, "step": 41560 }, { "epoch": 7.066122726500085, "grad_norm": 19.164548873901367, "learning_rate": 2.155646212249986e-05, "loss": 1.8352, "step": 41570 }, { "epoch": 7.067822539520653, "grad_norm": 13.134312629699707, "learning_rate": 2.1553629100798912e-05, "loss": 1.5562, "step": 41580 }, { "epoch": 7.06952235254122, "grad_norm": 16.895557403564453, "learning_rate": 2.155079607909797e-05, "loss": 1.6768, "step": 41590 }, { "epoch": 7.071222165561788, "grad_norm": 16.688270568847656, "learning_rate": 2.154796305739702e-05, "loss": 1.4507, "step": 41600 }, { "epoch": 7.072921978582356, "grad_norm": 16.084674835205078, "learning_rate": 2.1545130035696073e-05, "loss": 1.6029, "step": 41610 }, { "epoch": 7.074621791602923, "grad_norm": 15.232086181640625, "learning_rate": 2.154229701399513e-05, "loss": 1.5898, "step": 41620 }, { "epoch": 7.076321604623492, "grad_norm": 13.792753219604492, "learning_rate": 2.1539463992294183e-05, "loss": 1.4385, "step": 41630 }, { "epoch": 7.0780214176440595, "grad_norm": 10.83409309387207, "learning_rate": 2.1536630970593233e-05, "loss": 1.8355, "step": 41640 }, { "epoch": 7.079721230664627, "grad_norm": 16.760244369506836, "learning_rate": 2.153379794889229e-05, "loss": 1.7711, "step": 41650 }, { "epoch": 7.081421043685195, "grad_norm": 15.591584205627441, "learning_rate": 2.1530964927191344e-05, "loss": 1.6804, "step": 41660 }, { "epoch": 7.083120856705762, "grad_norm": 13.58819580078125, "learning_rate": 2.1528131905490397e-05, "loss": 1.7257, "step": 41670 }, { "epoch": 7.08482066972633, "grad_norm": 11.149852752685547, "learning_rate": 2.152529888378945e-05, "loss": 1.5867, "step": 41680 }, { "epoch": 7.086520482746898, "grad_norm": 15.951730728149414, "learning_rate": 2.1522465862088504e-05, "loss": 1.5572, "step": 41690 }, { "epoch": 7.088220295767465, "grad_norm": 21.8632755279541, "learning_rate": 2.1519632840387558e-05, "loss": 1.526, "step": 41700 }, { "epoch": 7.089920108788033, "grad_norm": 12.22976016998291, "learning_rate": 2.1516799818686615e-05, "loss": 1.4748, "step": 41710 }, { "epoch": 7.091619921808601, "grad_norm": 14.477691650390625, "learning_rate": 2.1513966796985665e-05, "loss": 1.6909, "step": 41720 }, { "epoch": 7.093319734829169, "grad_norm": 19.531282424926758, "learning_rate": 2.1511133775284718e-05, "loss": 1.7213, "step": 41730 }, { "epoch": 7.095019547849737, "grad_norm": 10.687703132629395, "learning_rate": 2.1508300753583775e-05, "loss": 1.7592, "step": 41740 }, { "epoch": 7.0967193608703045, "grad_norm": 15.9103364944458, "learning_rate": 2.1505467731882825e-05, "loss": 1.7118, "step": 41750 }, { "epoch": 7.098419173890872, "grad_norm": 14.037731170654297, "learning_rate": 2.150263471018188e-05, "loss": 1.6283, "step": 41760 }, { "epoch": 7.10011898691144, "grad_norm": 32.381900787353516, "learning_rate": 2.1499801688480936e-05, "loss": 1.3027, "step": 41770 }, { "epoch": 7.1018187999320075, "grad_norm": 18.98016357421875, "learning_rate": 2.149696866677999e-05, "loss": 1.375, "step": 41780 }, { "epoch": 7.103518612952575, "grad_norm": 8.321721076965332, "learning_rate": 2.149413564507904e-05, "loss": 1.6043, "step": 41790 }, { "epoch": 7.105218425973143, "grad_norm": 14.237537384033203, "learning_rate": 2.1491302623378096e-05, "loss": 1.7022, "step": 41800 }, { "epoch": 7.1069182389937104, "grad_norm": 16.36956214904785, "learning_rate": 2.148846960167715e-05, "loss": 1.5404, "step": 41810 }, { "epoch": 7.108618052014278, "grad_norm": 10.245550155639648, "learning_rate": 2.1485636579976203e-05, "loss": 1.8827, "step": 41820 }, { "epoch": 7.110317865034846, "grad_norm": 12.358661651611328, "learning_rate": 2.1482803558275257e-05, "loss": 1.8377, "step": 41830 }, { "epoch": 7.112017678055414, "grad_norm": 10.544513702392578, "learning_rate": 2.147997053657431e-05, "loss": 1.7553, "step": 41840 }, { "epoch": 7.113717491075982, "grad_norm": 30.128860473632812, "learning_rate": 2.1477137514873364e-05, "loss": 1.5969, "step": 41850 }, { "epoch": 7.11541730409655, "grad_norm": 13.881462097167969, "learning_rate": 2.147430449317242e-05, "loss": 1.6175, "step": 41860 }, { "epoch": 7.117117117117117, "grad_norm": 13.433767318725586, "learning_rate": 2.147147147147147e-05, "loss": 1.6854, "step": 41870 }, { "epoch": 7.118816930137685, "grad_norm": 12.535269737243652, "learning_rate": 2.1468638449770524e-05, "loss": 1.6434, "step": 41880 }, { "epoch": 7.120516743158253, "grad_norm": 10.760557174682617, "learning_rate": 2.146580542806958e-05, "loss": 1.633, "step": 41890 }, { "epoch": 7.12221655617882, "grad_norm": 14.463682174682617, "learning_rate": 2.146297240636863e-05, "loss": 1.5756, "step": 41900 }, { "epoch": 7.123916369199388, "grad_norm": 13.464812278747559, "learning_rate": 2.1460139384667685e-05, "loss": 1.8724, "step": 41910 }, { "epoch": 7.1256161822199555, "grad_norm": 29.34613800048828, "learning_rate": 2.1457306362966742e-05, "loss": 1.4663, "step": 41920 }, { "epoch": 7.127315995240523, "grad_norm": 13.358259201049805, "learning_rate": 2.1454473341265795e-05, "loss": 1.85, "step": 41930 }, { "epoch": 7.129015808261091, "grad_norm": 16.871944427490234, "learning_rate": 2.1451640319564846e-05, "loss": 1.7728, "step": 41940 }, { "epoch": 7.130715621281659, "grad_norm": 18.773160934448242, "learning_rate": 2.1448807297863903e-05, "loss": 1.6317, "step": 41950 }, { "epoch": 7.132415434302227, "grad_norm": 20.474775314331055, "learning_rate": 2.1445974276162956e-05, "loss": 1.5171, "step": 41960 }, { "epoch": 7.134115247322795, "grad_norm": 14.1187162399292, "learning_rate": 2.144314125446201e-05, "loss": 1.6436, "step": 41970 }, { "epoch": 7.135815060343362, "grad_norm": 14.49551010131836, "learning_rate": 2.1440308232761063e-05, "loss": 1.5524, "step": 41980 }, { "epoch": 7.13751487336393, "grad_norm": 19.13874053955078, "learning_rate": 2.1437475211060117e-05, "loss": 1.6796, "step": 41990 }, { "epoch": 7.139214686384498, "grad_norm": 15.094208717346191, "learning_rate": 2.143464218935917e-05, "loss": 1.7445, "step": 42000 }, { "epoch": 7.140914499405065, "grad_norm": 24.314693450927734, "learning_rate": 2.1431809167658227e-05, "loss": 1.415, "step": 42010 }, { "epoch": 7.142614312425633, "grad_norm": 17.29979133605957, "learning_rate": 2.1428976145957277e-05, "loss": 1.7824, "step": 42020 }, { "epoch": 7.144314125446201, "grad_norm": 12.071562767028809, "learning_rate": 2.142614312425633e-05, "loss": 1.6025, "step": 42030 }, { "epoch": 7.146013938466768, "grad_norm": 12.689298629760742, "learning_rate": 2.1423310102555388e-05, "loss": 1.7218, "step": 42040 }, { "epoch": 7.147713751487337, "grad_norm": 20.101346969604492, "learning_rate": 2.142047708085444e-05, "loss": 1.8005, "step": 42050 }, { "epoch": 7.149413564507904, "grad_norm": 15.642182350158691, "learning_rate": 2.141764405915349e-05, "loss": 1.5544, "step": 42060 }, { "epoch": 7.151113377528472, "grad_norm": 16.13962173461914, "learning_rate": 2.1414811037452548e-05, "loss": 1.614, "step": 42070 }, { "epoch": 7.15281319054904, "grad_norm": 15.036710739135742, "learning_rate": 2.14119780157516e-05, "loss": 1.6747, "step": 42080 }, { "epoch": 7.154513003569607, "grad_norm": 15.9573392868042, "learning_rate": 2.1409144994050652e-05, "loss": 1.802, "step": 42090 }, { "epoch": 7.156212816590175, "grad_norm": 13.433531761169434, "learning_rate": 2.140631197234971e-05, "loss": 1.6125, "step": 42100 }, { "epoch": 7.157912629610743, "grad_norm": 15.833388328552246, "learning_rate": 2.1403478950648762e-05, "loss": 1.8049, "step": 42110 }, { "epoch": 7.15961244263131, "grad_norm": 13.335165023803711, "learning_rate": 2.1400645928947816e-05, "loss": 1.5003, "step": 42120 }, { "epoch": 7.161312255651878, "grad_norm": 9.981047630310059, "learning_rate": 2.139781290724687e-05, "loss": 1.8379, "step": 42130 }, { "epoch": 7.163012068672446, "grad_norm": 15.466458320617676, "learning_rate": 2.1394979885545923e-05, "loss": 1.6469, "step": 42140 }, { "epoch": 7.164711881693014, "grad_norm": 14.519145965576172, "learning_rate": 2.1392146863844976e-05, "loss": 1.6312, "step": 42150 }, { "epoch": 7.166411694713582, "grad_norm": 13.531012535095215, "learning_rate": 2.1389313842144033e-05, "loss": 1.6879, "step": 42160 }, { "epoch": 7.1681115077341495, "grad_norm": 22.316434860229492, "learning_rate": 2.1386480820443083e-05, "loss": 1.9793, "step": 42170 }, { "epoch": 7.169811320754717, "grad_norm": 23.514862060546875, "learning_rate": 2.138364779874214e-05, "loss": 1.7201, "step": 42180 }, { "epoch": 7.171511133775285, "grad_norm": 13.73726749420166, "learning_rate": 2.1380814777041194e-05, "loss": 1.6189, "step": 42190 }, { "epoch": 7.1732109467958525, "grad_norm": 23.789432525634766, "learning_rate": 2.1377981755340247e-05, "loss": 1.3924, "step": 42200 }, { "epoch": 7.17491075981642, "grad_norm": 9.484853744506836, "learning_rate": 2.13751487336393e-05, "loss": 1.8068, "step": 42210 }, { "epoch": 7.176610572836988, "grad_norm": 9.809715270996094, "learning_rate": 2.1372315711938354e-05, "loss": 1.5918, "step": 42220 }, { "epoch": 7.178310385857555, "grad_norm": 12.364228248596191, "learning_rate": 2.1369482690237408e-05, "loss": 1.7051, "step": 42230 }, { "epoch": 7.180010198878123, "grad_norm": 15.815011024475098, "learning_rate": 2.1366649668536465e-05, "loss": 1.6441, "step": 42240 }, { "epoch": 7.181710011898691, "grad_norm": 13.630472183227539, "learning_rate": 2.1363816646835515e-05, "loss": 1.584, "step": 42250 }, { "epoch": 7.183409824919259, "grad_norm": 12.527220726013184, "learning_rate": 2.136098362513457e-05, "loss": 1.68, "step": 42260 }, { "epoch": 7.185109637939827, "grad_norm": 20.521896362304688, "learning_rate": 2.1358150603433625e-05, "loss": 1.8139, "step": 42270 }, { "epoch": 7.186809450960395, "grad_norm": 18.242347717285156, "learning_rate": 2.1355317581732676e-05, "loss": 1.7292, "step": 42280 }, { "epoch": 7.188509263980962, "grad_norm": 14.156418800354004, "learning_rate": 2.135248456003173e-05, "loss": 1.6989, "step": 42290 }, { "epoch": 7.19020907700153, "grad_norm": 17.483766555786133, "learning_rate": 2.1349651538330786e-05, "loss": 1.544, "step": 42300 }, { "epoch": 7.1919088900220975, "grad_norm": 18.779827117919922, "learning_rate": 2.134681851662984e-05, "loss": 1.6619, "step": 42310 }, { "epoch": 7.193608703042665, "grad_norm": 29.409536361694336, "learning_rate": 2.134398549492889e-05, "loss": 1.5947, "step": 42320 }, { "epoch": 7.195308516063233, "grad_norm": 21.141708374023438, "learning_rate": 2.1341152473227946e-05, "loss": 1.551, "step": 42330 }, { "epoch": 7.1970083290838005, "grad_norm": 14.668641090393066, "learning_rate": 2.1338319451527e-05, "loss": 1.7142, "step": 42340 }, { "epoch": 7.198708142104368, "grad_norm": 14.162991523742676, "learning_rate": 2.1335486429826054e-05, "loss": 1.4385, "step": 42350 }, { "epoch": 7.200407955124936, "grad_norm": 13.744098663330078, "learning_rate": 2.1332653408125107e-05, "loss": 1.3328, "step": 42360 }, { "epoch": 7.202107768145504, "grad_norm": 12.345176696777344, "learning_rate": 2.132982038642416e-05, "loss": 1.8828, "step": 42370 }, { "epoch": 7.203807581166072, "grad_norm": 13.276362419128418, "learning_rate": 2.1326987364723214e-05, "loss": 1.7221, "step": 42380 }, { "epoch": 7.20550739418664, "grad_norm": 14.634676933288574, "learning_rate": 2.132415434302227e-05, "loss": 1.4769, "step": 42390 }, { "epoch": 7.207207207207207, "grad_norm": 13.46336841583252, "learning_rate": 2.132132132132132e-05, "loss": 1.7081, "step": 42400 }, { "epoch": 7.208907020227775, "grad_norm": 13.53248119354248, "learning_rate": 2.1318488299620375e-05, "loss": 1.5306, "step": 42410 }, { "epoch": 7.210606833248343, "grad_norm": 11.71660041809082, "learning_rate": 2.131565527791943e-05, "loss": 1.5013, "step": 42420 }, { "epoch": 7.21230664626891, "grad_norm": 19.678464889526367, "learning_rate": 2.1312822256218482e-05, "loss": 1.5949, "step": 42430 }, { "epoch": 7.214006459289478, "grad_norm": 20.174741744995117, "learning_rate": 2.1309989234517535e-05, "loss": 1.712, "step": 42440 }, { "epoch": 7.215706272310046, "grad_norm": 12.365859031677246, "learning_rate": 2.1307156212816592e-05, "loss": 1.4638, "step": 42450 }, { "epoch": 7.217406085330613, "grad_norm": 21.009254455566406, "learning_rate": 2.1304323191115646e-05, "loss": 1.3994, "step": 42460 }, { "epoch": 7.219105898351182, "grad_norm": 12.532490730285645, "learning_rate": 2.1301490169414696e-05, "loss": 1.6816, "step": 42470 }, { "epoch": 7.220805711371749, "grad_norm": 13.966293334960938, "learning_rate": 2.1298657147713753e-05, "loss": 1.652, "step": 42480 }, { "epoch": 7.222505524392317, "grad_norm": 12.060592651367188, "learning_rate": 2.1295824126012806e-05, "loss": 1.6184, "step": 42490 }, { "epoch": 7.224205337412885, "grad_norm": 15.8040189743042, "learning_rate": 2.129299110431186e-05, "loss": 1.5894, "step": 42500 }, { "epoch": 7.225905150433452, "grad_norm": 12.518697738647461, "learning_rate": 2.1290158082610913e-05, "loss": 1.6364, "step": 42510 }, { "epoch": 7.22760496345402, "grad_norm": 18.845212936401367, "learning_rate": 2.1287325060909967e-05, "loss": 1.4864, "step": 42520 }, { "epoch": 7.229304776474588, "grad_norm": 11.923099517822266, "learning_rate": 2.128449203920902e-05, "loss": 1.5592, "step": 42530 }, { "epoch": 7.231004589495155, "grad_norm": 26.34255027770996, "learning_rate": 2.1281659017508077e-05, "loss": 1.6241, "step": 42540 }, { "epoch": 7.232704402515723, "grad_norm": 13.2662353515625, "learning_rate": 2.1278825995807127e-05, "loss": 1.7597, "step": 42550 }, { "epoch": 7.234404215536291, "grad_norm": 10.625321388244629, "learning_rate": 2.127599297410618e-05, "loss": 1.3424, "step": 42560 }, { "epoch": 7.236104028556859, "grad_norm": 11.888321876525879, "learning_rate": 2.1273159952405238e-05, "loss": 1.5804, "step": 42570 }, { "epoch": 7.237803841577427, "grad_norm": 12.523812294006348, "learning_rate": 2.127032693070429e-05, "loss": 1.5605, "step": 42580 }, { "epoch": 7.2395036545979945, "grad_norm": 15.087888717651367, "learning_rate": 2.126749390900334e-05, "loss": 1.6343, "step": 42590 }, { "epoch": 7.241203467618562, "grad_norm": 16.907756805419922, "learning_rate": 2.12646608873024e-05, "loss": 1.6888, "step": 42600 }, { "epoch": 7.24290328063913, "grad_norm": 16.63336181640625, "learning_rate": 2.1261827865601452e-05, "loss": 1.6699, "step": 42610 }, { "epoch": 7.2446030936596975, "grad_norm": 17.17121124267578, "learning_rate": 2.1258994843900502e-05, "loss": 1.5632, "step": 42620 }, { "epoch": 7.246302906680265, "grad_norm": 14.169830322265625, "learning_rate": 2.125616182219956e-05, "loss": 1.4185, "step": 42630 }, { "epoch": 7.248002719700833, "grad_norm": 12.308494567871094, "learning_rate": 2.1253328800498612e-05, "loss": 1.7154, "step": 42640 }, { "epoch": 7.2497025327214, "grad_norm": 16.652780532836914, "learning_rate": 2.1250495778797666e-05, "loss": 1.5794, "step": 42650 }, { "epoch": 7.251402345741968, "grad_norm": 14.015704154968262, "learning_rate": 2.124766275709672e-05, "loss": 1.6597, "step": 42660 }, { "epoch": 7.253102158762536, "grad_norm": 19.628110885620117, "learning_rate": 2.1244829735395773e-05, "loss": 1.6385, "step": 42670 }, { "epoch": 7.254801971783104, "grad_norm": 17.0112247467041, "learning_rate": 2.1241996713694827e-05, "loss": 1.5579, "step": 42680 }, { "epoch": 7.256501784803672, "grad_norm": 9.76003360748291, "learning_rate": 2.1239163691993883e-05, "loss": 1.877, "step": 42690 }, { "epoch": 7.25820159782424, "grad_norm": 14.63697338104248, "learning_rate": 2.1236330670292934e-05, "loss": 1.6793, "step": 42700 }, { "epoch": 7.259901410844807, "grad_norm": 17.099693298339844, "learning_rate": 2.1233497648591987e-05, "loss": 1.7469, "step": 42710 }, { "epoch": 7.261601223865375, "grad_norm": 19.31508445739746, "learning_rate": 2.1230664626891044e-05, "loss": 1.8076, "step": 42720 }, { "epoch": 7.2633010368859425, "grad_norm": 10.35291862487793, "learning_rate": 2.1227831605190098e-05, "loss": 1.7343, "step": 42730 }, { "epoch": 7.26500084990651, "grad_norm": 14.572660446166992, "learning_rate": 2.1224998583489148e-05, "loss": 1.5733, "step": 42740 }, { "epoch": 7.266700662927078, "grad_norm": 22.885881423950195, "learning_rate": 2.1222165561788205e-05, "loss": 1.7293, "step": 42750 }, { "epoch": 7.2684004759476455, "grad_norm": 13.054100036621094, "learning_rate": 2.1219332540087258e-05, "loss": 1.7385, "step": 42760 }, { "epoch": 7.270100288968213, "grad_norm": 11.008301734924316, "learning_rate": 2.1216499518386308e-05, "loss": 1.4349, "step": 42770 }, { "epoch": 7.271800101988781, "grad_norm": 20.530250549316406, "learning_rate": 2.1213666496685365e-05, "loss": 1.4203, "step": 42780 }, { "epoch": 7.273499915009349, "grad_norm": 15.010034561157227, "learning_rate": 2.121083347498442e-05, "loss": 1.6232, "step": 42790 }, { "epoch": 7.275199728029917, "grad_norm": 17.884109497070312, "learning_rate": 2.1208000453283472e-05, "loss": 1.6735, "step": 42800 }, { "epoch": 7.276899541050485, "grad_norm": 13.020681381225586, "learning_rate": 2.1205167431582526e-05, "loss": 1.6349, "step": 42810 }, { "epoch": 7.278599354071052, "grad_norm": 10.804179191589355, "learning_rate": 2.120233440988158e-05, "loss": 1.5114, "step": 42820 }, { "epoch": 7.28029916709162, "grad_norm": 12.488953590393066, "learning_rate": 2.1199501388180633e-05, "loss": 1.6962, "step": 42830 }, { "epoch": 7.281998980112188, "grad_norm": 26.951980590820312, "learning_rate": 2.119666836647969e-05, "loss": 1.5157, "step": 42840 }, { "epoch": 7.283698793132755, "grad_norm": 12.112492561340332, "learning_rate": 2.119383534477874e-05, "loss": 1.8476, "step": 42850 }, { "epoch": 7.285398606153323, "grad_norm": 14.56634521484375, "learning_rate": 2.1191002323077793e-05, "loss": 1.477, "step": 42860 }, { "epoch": 7.287098419173891, "grad_norm": 18.961772918701172, "learning_rate": 2.118816930137685e-05, "loss": 1.6296, "step": 42870 }, { "epoch": 7.288798232194458, "grad_norm": 15.29734992980957, "learning_rate": 2.1185336279675904e-05, "loss": 1.7279, "step": 42880 }, { "epoch": 7.290498045215027, "grad_norm": 11.45982551574707, "learning_rate": 2.1182503257974954e-05, "loss": 1.7888, "step": 42890 }, { "epoch": 7.292197858235594, "grad_norm": 13.45290756225586, "learning_rate": 2.117967023627401e-05, "loss": 1.4322, "step": 42900 }, { "epoch": 7.293897671256162, "grad_norm": 15.031009674072266, "learning_rate": 2.1176837214573064e-05, "loss": 1.7428, "step": 42910 }, { "epoch": 7.29559748427673, "grad_norm": 18.582752227783203, "learning_rate": 2.1174004192872118e-05, "loss": 1.687, "step": 42920 }, { "epoch": 7.297297297297297, "grad_norm": 14.280660629272461, "learning_rate": 2.117117117117117e-05, "loss": 1.7399, "step": 42930 }, { "epoch": 7.298997110317865, "grad_norm": 18.57752227783203, "learning_rate": 2.1168338149470225e-05, "loss": 1.5693, "step": 42940 }, { "epoch": 7.300696923338433, "grad_norm": 16.548154830932617, "learning_rate": 2.1165505127769282e-05, "loss": 1.4345, "step": 42950 }, { "epoch": 7.302396736359, "grad_norm": 13.961898803710938, "learning_rate": 2.1162672106068332e-05, "loss": 1.7594, "step": 42960 }, { "epoch": 7.304096549379568, "grad_norm": 13.422758102416992, "learning_rate": 2.1159839084367385e-05, "loss": 1.4537, "step": 42970 }, { "epoch": 7.305796362400136, "grad_norm": 16.44378662109375, "learning_rate": 2.1157006062666442e-05, "loss": 1.7408, "step": 42980 }, { "epoch": 7.307496175420704, "grad_norm": 17.6437931060791, "learning_rate": 2.1154173040965496e-05, "loss": 1.611, "step": 42990 }, { "epoch": 7.309195988441272, "grad_norm": 14.317646026611328, "learning_rate": 2.1151340019264546e-05, "loss": 1.6239, "step": 43000 }, { "epoch": 7.3108958014618395, "grad_norm": 12.721348762512207, "learning_rate": 2.1148506997563603e-05, "loss": 1.6169, "step": 43010 }, { "epoch": 7.312595614482407, "grad_norm": 16.45740509033203, "learning_rate": 2.1145673975862656e-05, "loss": 1.5547, "step": 43020 }, { "epoch": 7.314295427502975, "grad_norm": 32.621768951416016, "learning_rate": 2.114284095416171e-05, "loss": 1.7297, "step": 43030 }, { "epoch": 7.315995240523542, "grad_norm": 19.551864624023438, "learning_rate": 2.1140007932460763e-05, "loss": 1.5889, "step": 43040 }, { "epoch": 7.31769505354411, "grad_norm": 15.01638412475586, "learning_rate": 2.1137174910759817e-05, "loss": 1.5733, "step": 43050 }, { "epoch": 7.319394866564678, "grad_norm": 23.069761276245117, "learning_rate": 2.113434188905887e-05, "loss": 1.7193, "step": 43060 }, { "epoch": 7.321094679585245, "grad_norm": 32.64913558959961, "learning_rate": 2.1131508867357927e-05, "loss": 1.4613, "step": 43070 }, { "epoch": 7.322794492605813, "grad_norm": 19.060304641723633, "learning_rate": 2.1128675845656978e-05, "loss": 1.6543, "step": 43080 }, { "epoch": 7.324494305626381, "grad_norm": 17.49575424194336, "learning_rate": 2.112584282395603e-05, "loss": 1.5548, "step": 43090 }, { "epoch": 7.326194118646949, "grad_norm": 14.138091087341309, "learning_rate": 2.1123009802255088e-05, "loss": 1.7628, "step": 43100 }, { "epoch": 7.327893931667517, "grad_norm": 14.089706420898438, "learning_rate": 2.1120176780554138e-05, "loss": 1.5738, "step": 43110 }, { "epoch": 7.3295937446880846, "grad_norm": 21.895387649536133, "learning_rate": 2.111734375885319e-05, "loss": 1.7112, "step": 43120 }, { "epoch": 7.331293557708652, "grad_norm": 17.31975555419922, "learning_rate": 2.111451073715225e-05, "loss": 1.6984, "step": 43130 }, { "epoch": 7.33299337072922, "grad_norm": 11.033025741577148, "learning_rate": 2.1111677715451302e-05, "loss": 1.7725, "step": 43140 }, { "epoch": 7.3346931837497875, "grad_norm": 13.133259773254395, "learning_rate": 2.1108844693750352e-05, "loss": 1.5538, "step": 43150 }, { "epoch": 7.336392996770355, "grad_norm": 12.746463775634766, "learning_rate": 2.110601167204941e-05, "loss": 1.6586, "step": 43160 }, { "epoch": 7.338092809790923, "grad_norm": 14.554314613342285, "learning_rate": 2.1103178650348463e-05, "loss": 1.6687, "step": 43170 }, { "epoch": 7.3397926228114905, "grad_norm": 24.31812858581543, "learning_rate": 2.1100345628647516e-05, "loss": 1.5623, "step": 43180 }, { "epoch": 7.341492435832058, "grad_norm": 18.867656707763672, "learning_rate": 2.109751260694657e-05, "loss": 1.5752, "step": 43190 }, { "epoch": 7.343192248852626, "grad_norm": 10.267888069152832, "learning_rate": 2.1094679585245623e-05, "loss": 1.582, "step": 43200 }, { "epoch": 7.344892061873194, "grad_norm": 17.252960205078125, "learning_rate": 2.1091846563544677e-05, "loss": 1.611, "step": 43210 }, { "epoch": 7.346591874893762, "grad_norm": 14.821025848388672, "learning_rate": 2.1089013541843734e-05, "loss": 1.6812, "step": 43220 }, { "epoch": 7.34829168791433, "grad_norm": 15.912641525268555, "learning_rate": 2.1086180520142784e-05, "loss": 1.522, "step": 43230 }, { "epoch": 7.349991500934897, "grad_norm": 14.163125038146973, "learning_rate": 2.1083347498441837e-05, "loss": 1.7523, "step": 43240 }, { "epoch": 7.351691313955465, "grad_norm": 17.47947120666504, "learning_rate": 2.1080514476740894e-05, "loss": 1.676, "step": 43250 }, { "epoch": 7.353391126976033, "grad_norm": 16.62589454650879, "learning_rate": 2.1077681455039948e-05, "loss": 1.6947, "step": 43260 }, { "epoch": 7.3550909399966, "grad_norm": 12.364176750183105, "learning_rate": 2.1074848433338998e-05, "loss": 1.6181, "step": 43270 }, { "epoch": 7.356790753017168, "grad_norm": 13.891237258911133, "learning_rate": 2.1072015411638055e-05, "loss": 1.6217, "step": 43280 }, { "epoch": 7.3584905660377355, "grad_norm": 16.996288299560547, "learning_rate": 2.106918238993711e-05, "loss": 1.6838, "step": 43290 }, { "epoch": 7.360190379058303, "grad_norm": 11.11865520477295, "learning_rate": 2.106634936823616e-05, "loss": 2.0026, "step": 43300 }, { "epoch": 7.361890192078871, "grad_norm": 18.244966506958008, "learning_rate": 2.1063516346535215e-05, "loss": 1.6121, "step": 43310 }, { "epoch": 7.363590005099439, "grad_norm": 13.874201774597168, "learning_rate": 2.106068332483427e-05, "loss": 1.6045, "step": 43320 }, { "epoch": 7.365289818120007, "grad_norm": 15.349428176879883, "learning_rate": 2.1057850303133322e-05, "loss": 1.4295, "step": 43330 }, { "epoch": 7.366989631140575, "grad_norm": 12.129046440124512, "learning_rate": 2.1055017281432376e-05, "loss": 1.7931, "step": 43340 }, { "epoch": 7.368689444161142, "grad_norm": 14.031295776367188, "learning_rate": 2.105218425973143e-05, "loss": 1.614, "step": 43350 }, { "epoch": 7.37038925718171, "grad_norm": 25.674800872802734, "learning_rate": 2.1049351238030483e-05, "loss": 1.5971, "step": 43360 }, { "epoch": 7.372089070202278, "grad_norm": 11.740612030029297, "learning_rate": 2.104651821632954e-05, "loss": 1.646, "step": 43370 }, { "epoch": 7.373788883222845, "grad_norm": 19.3652286529541, "learning_rate": 2.104368519462859e-05, "loss": 1.3621, "step": 43380 }, { "epoch": 7.375488696243413, "grad_norm": 15.55501937866211, "learning_rate": 2.1040852172927644e-05, "loss": 1.3496, "step": 43390 }, { "epoch": 7.377188509263981, "grad_norm": 13.559833526611328, "learning_rate": 2.10380191512267e-05, "loss": 1.563, "step": 43400 }, { "epoch": 7.378888322284549, "grad_norm": 12.900776863098145, "learning_rate": 2.1035186129525754e-05, "loss": 1.8271, "step": 43410 }, { "epoch": 7.380588135305117, "grad_norm": 16.169483184814453, "learning_rate": 2.1032353107824804e-05, "loss": 1.5793, "step": 43420 }, { "epoch": 7.3822879483256845, "grad_norm": 11.064230918884277, "learning_rate": 2.102952008612386e-05, "loss": 1.6918, "step": 43430 }, { "epoch": 7.383987761346252, "grad_norm": 13.972280502319336, "learning_rate": 2.1026687064422915e-05, "loss": 1.5399, "step": 43440 }, { "epoch": 7.38568757436682, "grad_norm": 16.580310821533203, "learning_rate": 2.1023854042721968e-05, "loss": 1.7726, "step": 43450 }, { "epoch": 7.387387387387387, "grad_norm": 14.416356086730957, "learning_rate": 2.102102102102102e-05, "loss": 1.7995, "step": 43460 }, { "epoch": 7.389087200407955, "grad_norm": 22.476070404052734, "learning_rate": 2.1018187999320075e-05, "loss": 1.9412, "step": 43470 }, { "epoch": 7.390787013428523, "grad_norm": 12.837530136108398, "learning_rate": 2.101535497761913e-05, "loss": 1.5763, "step": 43480 }, { "epoch": 7.39248682644909, "grad_norm": 11.760686874389648, "learning_rate": 2.1012521955918182e-05, "loss": 1.7218, "step": 43490 }, { "epoch": 7.394186639469658, "grad_norm": 13.303491592407227, "learning_rate": 2.1009688934217236e-05, "loss": 1.6905, "step": 43500 }, { "epoch": 7.395886452490226, "grad_norm": 11.878945350646973, "learning_rate": 2.100685591251629e-05, "loss": 1.7716, "step": 43510 }, { "epoch": 7.397586265510794, "grad_norm": 10.238083839416504, "learning_rate": 2.1004022890815346e-05, "loss": 1.6506, "step": 43520 }, { "epoch": 7.399286078531362, "grad_norm": 23.240406036376953, "learning_rate": 2.1001189869114396e-05, "loss": 1.5866, "step": 43530 }, { "epoch": 7.4009858915519295, "grad_norm": 14.150433540344238, "learning_rate": 2.099835684741345e-05, "loss": 1.6439, "step": 43540 }, { "epoch": 7.402685704572497, "grad_norm": 13.71342945098877, "learning_rate": 2.0995523825712507e-05, "loss": 1.6528, "step": 43550 }, { "epoch": 7.404385517593065, "grad_norm": 15.259117126464844, "learning_rate": 2.099269080401156e-05, "loss": 1.5297, "step": 43560 }, { "epoch": 7.4060853306136325, "grad_norm": 17.800973892211914, "learning_rate": 2.098985778231061e-05, "loss": 1.5854, "step": 43570 }, { "epoch": 7.4077851436342, "grad_norm": 16.321901321411133, "learning_rate": 2.0987024760609667e-05, "loss": 1.8089, "step": 43580 }, { "epoch": 7.409484956654768, "grad_norm": 15.156267166137695, "learning_rate": 2.098419173890872e-05, "loss": 1.5588, "step": 43590 }, { "epoch": 7.4111847696753355, "grad_norm": 12.166963577270508, "learning_rate": 2.0981358717207774e-05, "loss": 1.7431, "step": 43600 }, { "epoch": 7.412884582695903, "grad_norm": 10.561677932739258, "learning_rate": 2.0978525695506828e-05, "loss": 1.7054, "step": 43610 }, { "epoch": 7.414584395716471, "grad_norm": 15.23112964630127, "learning_rate": 2.097569267380588e-05, "loss": 1.5714, "step": 43620 }, { "epoch": 7.416284208737039, "grad_norm": 11.724963188171387, "learning_rate": 2.0972859652104935e-05, "loss": 1.5208, "step": 43630 }, { "epoch": 7.417984021757607, "grad_norm": 14.982566833496094, "learning_rate": 2.097002663040399e-05, "loss": 1.77, "step": 43640 }, { "epoch": 7.419683834778175, "grad_norm": 14.182234764099121, "learning_rate": 2.0967193608703042e-05, "loss": 1.4822, "step": 43650 }, { "epoch": 7.421383647798742, "grad_norm": 18.09058380126953, "learning_rate": 2.09643605870021e-05, "loss": 1.5194, "step": 43660 }, { "epoch": 7.42308346081931, "grad_norm": 18.615633010864258, "learning_rate": 2.0961527565301152e-05, "loss": 1.7204, "step": 43670 }, { "epoch": 7.424783273839878, "grad_norm": 19.309322357177734, "learning_rate": 2.0958694543600202e-05, "loss": 1.5749, "step": 43680 }, { "epoch": 7.426483086860445, "grad_norm": 13.991707801818848, "learning_rate": 2.095586152189926e-05, "loss": 1.8775, "step": 43690 }, { "epoch": 7.428182899881013, "grad_norm": 17.856706619262695, "learning_rate": 2.0953028500198313e-05, "loss": 1.6466, "step": 43700 }, { "epoch": 7.4298827129015805, "grad_norm": 17.819583892822266, "learning_rate": 2.0950195478497366e-05, "loss": 1.4458, "step": 43710 }, { "epoch": 7.431582525922148, "grad_norm": 12.75303840637207, "learning_rate": 2.094736245679642e-05, "loss": 1.7174, "step": 43720 }, { "epoch": 7.433282338942716, "grad_norm": 9.972938537597656, "learning_rate": 2.0944529435095473e-05, "loss": 1.6847, "step": 43730 }, { "epoch": 7.434982151963284, "grad_norm": 16.459030151367188, "learning_rate": 2.0941696413394527e-05, "loss": 1.4212, "step": 43740 }, { "epoch": 7.436681964983852, "grad_norm": 52.40408706665039, "learning_rate": 2.0938863391693584e-05, "loss": 1.6329, "step": 43750 }, { "epoch": 7.43838177800442, "grad_norm": 17.052820205688477, "learning_rate": 2.0936030369992634e-05, "loss": 1.8225, "step": 43760 }, { "epoch": 7.440081591024987, "grad_norm": 15.010054588317871, "learning_rate": 2.0933197348291688e-05, "loss": 1.6321, "step": 43770 }, { "epoch": 7.441781404045555, "grad_norm": 8.191797256469727, "learning_rate": 2.0930364326590744e-05, "loss": 1.689, "step": 43780 }, { "epoch": 7.443481217066123, "grad_norm": 14.820839881896973, "learning_rate": 2.0927531304889798e-05, "loss": 1.4088, "step": 43790 }, { "epoch": 7.44518103008669, "grad_norm": 16.204437255859375, "learning_rate": 2.0924698283188848e-05, "loss": 1.7965, "step": 43800 }, { "epoch": 7.446880843107258, "grad_norm": 17.5737247467041, "learning_rate": 2.0921865261487905e-05, "loss": 1.5397, "step": 43810 }, { "epoch": 7.448580656127826, "grad_norm": 9.365333557128906, "learning_rate": 2.091903223978696e-05, "loss": 1.7797, "step": 43820 }, { "epoch": 7.450280469148394, "grad_norm": 13.005634307861328, "learning_rate": 2.091619921808601e-05, "loss": 1.8305, "step": 43830 }, { "epoch": 7.451980282168962, "grad_norm": 19.24053955078125, "learning_rate": 2.0913366196385066e-05, "loss": 1.6802, "step": 43840 }, { "epoch": 7.4536800951895295, "grad_norm": 16.627885818481445, "learning_rate": 2.091053317468412e-05, "loss": 1.5603, "step": 43850 }, { "epoch": 7.455379908210097, "grad_norm": 12.856938362121582, "learning_rate": 2.0907700152983173e-05, "loss": 1.7898, "step": 43860 }, { "epoch": 7.457079721230665, "grad_norm": 16.30610466003418, "learning_rate": 2.0904867131282226e-05, "loss": 1.6224, "step": 43870 }, { "epoch": 7.458779534251232, "grad_norm": 11.336835861206055, "learning_rate": 2.090203410958128e-05, "loss": 1.6383, "step": 43880 }, { "epoch": 7.4604793472718, "grad_norm": 13.180130004882812, "learning_rate": 2.0899201087880333e-05, "loss": 1.6958, "step": 43890 }, { "epoch": 7.462179160292368, "grad_norm": 11.529035568237305, "learning_rate": 2.089636806617939e-05, "loss": 1.6383, "step": 43900 }, { "epoch": 7.463878973312935, "grad_norm": 12.792647361755371, "learning_rate": 2.089353504447844e-05, "loss": 1.6304, "step": 43910 }, { "epoch": 7.465578786333503, "grad_norm": 16.241539001464844, "learning_rate": 2.0890702022777494e-05, "loss": 1.3359, "step": 43920 }, { "epoch": 7.467278599354071, "grad_norm": 15.570693969726562, "learning_rate": 2.088786900107655e-05, "loss": 1.5113, "step": 43930 }, { "epoch": 7.468978412374639, "grad_norm": 13.878300666809082, "learning_rate": 2.0885035979375604e-05, "loss": 1.5933, "step": 43940 }, { "epoch": 7.470678225395207, "grad_norm": 13.140159606933594, "learning_rate": 2.0882202957674654e-05, "loss": 1.6597, "step": 43950 }, { "epoch": 7.4723780384157745, "grad_norm": 14.157088279724121, "learning_rate": 2.087936993597371e-05, "loss": 1.515, "step": 43960 }, { "epoch": 7.474077851436342, "grad_norm": 18.65081024169922, "learning_rate": 2.0876536914272765e-05, "loss": 1.7372, "step": 43970 }, { "epoch": 7.47577766445691, "grad_norm": 28.270597457885742, "learning_rate": 2.0873703892571815e-05, "loss": 1.6922, "step": 43980 }, { "epoch": 7.4774774774774775, "grad_norm": 16.21110725402832, "learning_rate": 2.0870870870870872e-05, "loss": 1.5319, "step": 43990 }, { "epoch": 7.479177290498045, "grad_norm": 16.663232803344727, "learning_rate": 2.0868037849169925e-05, "loss": 1.6852, "step": 44000 }, { "epoch": 7.480877103518613, "grad_norm": 17.716289520263672, "learning_rate": 2.086520482746898e-05, "loss": 1.6674, "step": 44010 }, { "epoch": 7.4825769165391804, "grad_norm": 31.182674407958984, "learning_rate": 2.0862371805768032e-05, "loss": 1.4695, "step": 44020 }, { "epoch": 7.484276729559748, "grad_norm": 11.572696685791016, "learning_rate": 2.0859538784067086e-05, "loss": 1.4379, "step": 44030 }, { "epoch": 7.485976542580316, "grad_norm": 17.465084075927734, "learning_rate": 2.085670576236614e-05, "loss": 1.4615, "step": 44040 }, { "epoch": 7.487676355600884, "grad_norm": 17.1124210357666, "learning_rate": 2.0853872740665196e-05, "loss": 1.5149, "step": 44050 }, { "epoch": 7.489376168621452, "grad_norm": 14.792805671691895, "learning_rate": 2.0851039718964246e-05, "loss": 1.713, "step": 44060 }, { "epoch": 7.49107598164202, "grad_norm": 14.359855651855469, "learning_rate": 2.08482066972633e-05, "loss": 1.673, "step": 44070 }, { "epoch": 7.492775794662587, "grad_norm": 22.769163131713867, "learning_rate": 2.0845373675562357e-05, "loss": 1.9627, "step": 44080 }, { "epoch": 7.494475607683155, "grad_norm": 13.757356643676758, "learning_rate": 2.084254065386141e-05, "loss": 1.6227, "step": 44090 }, { "epoch": 7.496175420703723, "grad_norm": 12.149821281433105, "learning_rate": 2.083970763216046e-05, "loss": 1.7061, "step": 44100 }, { "epoch": 7.49787523372429, "grad_norm": 11.741154670715332, "learning_rate": 2.0836874610459517e-05, "loss": 1.4157, "step": 44110 }, { "epoch": 7.499575046744858, "grad_norm": 14.511570930480957, "learning_rate": 2.083404158875857e-05, "loss": 1.6191, "step": 44120 }, { "epoch": 7.5012748597654255, "grad_norm": 12.29056453704834, "learning_rate": 2.0831208567057624e-05, "loss": 1.8787, "step": 44130 }, { "epoch": 7.502974672785993, "grad_norm": 13.659476280212402, "learning_rate": 2.0828375545356678e-05, "loss": 1.7019, "step": 44140 }, { "epoch": 7.504674485806561, "grad_norm": 13.058775901794434, "learning_rate": 2.082554252365573e-05, "loss": 1.6482, "step": 44150 }, { "epoch": 7.506374298827129, "grad_norm": 12.735987663269043, "learning_rate": 2.0822709501954785e-05, "loss": 1.6192, "step": 44160 }, { "epoch": 7.508074111847697, "grad_norm": 10.732285499572754, "learning_rate": 2.081987648025384e-05, "loss": 1.7866, "step": 44170 }, { "epoch": 7.509773924868265, "grad_norm": 14.220600128173828, "learning_rate": 2.0817043458552892e-05, "loss": 1.6396, "step": 44180 }, { "epoch": 7.511473737888832, "grad_norm": 14.765325546264648, "learning_rate": 2.0814210436851946e-05, "loss": 1.6764, "step": 44190 }, { "epoch": 7.5131735509094, "grad_norm": 13.182655334472656, "learning_rate": 2.0811377415151003e-05, "loss": 1.7638, "step": 44200 }, { "epoch": 7.514873363929968, "grad_norm": 11.181289672851562, "learning_rate": 2.0808544393450053e-05, "loss": 1.5398, "step": 44210 }, { "epoch": 7.516573176950535, "grad_norm": 11.532358169555664, "learning_rate": 2.0805711371749106e-05, "loss": 1.7718, "step": 44220 }, { "epoch": 7.518272989971103, "grad_norm": 13.977749824523926, "learning_rate": 2.0802878350048163e-05, "loss": 1.4655, "step": 44230 }, { "epoch": 7.519972802991671, "grad_norm": 17.167831420898438, "learning_rate": 2.0800045328347217e-05, "loss": 1.7085, "step": 44240 }, { "epoch": 7.521672616012239, "grad_norm": 20.779634475708008, "learning_rate": 2.0797212306646267e-05, "loss": 1.5319, "step": 44250 }, { "epoch": 7.523372429032806, "grad_norm": 15.025511741638184, "learning_rate": 2.0794379284945324e-05, "loss": 1.7551, "step": 44260 }, { "epoch": 7.525072242053374, "grad_norm": 22.2257080078125, "learning_rate": 2.0791546263244377e-05, "loss": 1.4603, "step": 44270 }, { "epoch": 7.526772055073942, "grad_norm": 12.661194801330566, "learning_rate": 2.078871324154343e-05, "loss": 1.6676, "step": 44280 }, { "epoch": 7.52847186809451, "grad_norm": 10.790369987487793, "learning_rate": 2.0785880219842484e-05, "loss": 1.5257, "step": 44290 }, { "epoch": 7.530171681115077, "grad_norm": 13.763663291931152, "learning_rate": 2.0783047198141538e-05, "loss": 1.6443, "step": 44300 }, { "epoch": 7.531871494135645, "grad_norm": 15.64745044708252, "learning_rate": 2.078021417644059e-05, "loss": 1.581, "step": 44310 }, { "epoch": 7.533571307156213, "grad_norm": 12.274993896484375, "learning_rate": 2.0777381154739645e-05, "loss": 1.6541, "step": 44320 }, { "epoch": 7.53527112017678, "grad_norm": 23.035741806030273, "learning_rate": 2.0774548133038698e-05, "loss": 1.6061, "step": 44330 }, { "epoch": 7.536970933197348, "grad_norm": 14.625794410705566, "learning_rate": 2.0771715111337752e-05, "loss": 1.6437, "step": 44340 }, { "epoch": 7.538670746217916, "grad_norm": 17.64766502380371, "learning_rate": 2.076888208963681e-05, "loss": 1.516, "step": 44350 }, { "epoch": 7.540370559238484, "grad_norm": 17.160717010498047, "learning_rate": 2.076604906793586e-05, "loss": 1.7436, "step": 44360 }, { "epoch": 7.542070372259052, "grad_norm": 13.070586204528809, "learning_rate": 2.0763216046234916e-05, "loss": 1.6936, "step": 44370 }, { "epoch": 7.5437701852796195, "grad_norm": 18.22646713256836, "learning_rate": 2.076038302453397e-05, "loss": 1.7368, "step": 44380 }, { "epoch": 7.545469998300187, "grad_norm": 16.479904174804688, "learning_rate": 2.0757550002833023e-05, "loss": 1.5302, "step": 44390 }, { "epoch": 7.547169811320755, "grad_norm": 14.918062210083008, "learning_rate": 2.0754716981132076e-05, "loss": 1.7069, "step": 44400 }, { "epoch": 7.5488696243413225, "grad_norm": 12.375064849853516, "learning_rate": 2.075188395943113e-05, "loss": 1.6309, "step": 44410 }, { "epoch": 7.55056943736189, "grad_norm": 18.570478439331055, "learning_rate": 2.0749050937730183e-05, "loss": 1.5514, "step": 44420 }, { "epoch": 7.552269250382458, "grad_norm": 28.425355911254883, "learning_rate": 2.074621791602924e-05, "loss": 1.683, "step": 44430 }, { "epoch": 7.553969063403025, "grad_norm": 17.370746612548828, "learning_rate": 2.074338489432829e-05, "loss": 1.5308, "step": 44440 }, { "epoch": 7.555668876423593, "grad_norm": 16.378334045410156, "learning_rate": 2.0740551872627344e-05, "loss": 1.4664, "step": 44450 }, { "epoch": 7.557368689444161, "grad_norm": 11.848971366882324, "learning_rate": 2.07377188509264e-05, "loss": 1.8046, "step": 44460 }, { "epoch": 7.559068502464729, "grad_norm": 13.305365562438965, "learning_rate": 2.0734885829225454e-05, "loss": 1.5223, "step": 44470 }, { "epoch": 7.560768315485297, "grad_norm": 29.360605239868164, "learning_rate": 2.0732052807524505e-05, "loss": 1.7326, "step": 44480 }, { "epoch": 7.562468128505865, "grad_norm": 14.219125747680664, "learning_rate": 2.072921978582356e-05, "loss": 1.5171, "step": 44490 }, { "epoch": 7.564167941526432, "grad_norm": 18.381933212280273, "learning_rate": 2.0726386764122615e-05, "loss": 1.4234, "step": 44500 }, { "epoch": 7.565867754547, "grad_norm": 12.51939868927002, "learning_rate": 2.0723553742421665e-05, "loss": 1.6134, "step": 44510 }, { "epoch": 7.5675675675675675, "grad_norm": 14.960911750793457, "learning_rate": 2.0720720720720722e-05, "loss": 1.8532, "step": 44520 }, { "epoch": 7.569267380588135, "grad_norm": 11.837510108947754, "learning_rate": 2.0717887699019776e-05, "loss": 1.8122, "step": 44530 }, { "epoch": 7.570967193608703, "grad_norm": 16.470449447631836, "learning_rate": 2.071505467731883e-05, "loss": 1.9252, "step": 44540 }, { "epoch": 7.5726670066292705, "grad_norm": 20.725521087646484, "learning_rate": 2.0712221655617883e-05, "loss": 1.5704, "step": 44550 }, { "epoch": 7.574366819649838, "grad_norm": 11.611315727233887, "learning_rate": 2.0709388633916936e-05, "loss": 1.7573, "step": 44560 }, { "epoch": 7.576066632670406, "grad_norm": 13.289590835571289, "learning_rate": 2.070655561221599e-05, "loss": 1.9799, "step": 44570 }, { "epoch": 7.577766445690974, "grad_norm": 21.861268997192383, "learning_rate": 2.0703722590515047e-05, "loss": 1.6982, "step": 44580 }, { "epoch": 7.579466258711542, "grad_norm": 13.401601791381836, "learning_rate": 2.0700889568814097e-05, "loss": 1.7437, "step": 44590 }, { "epoch": 7.58116607173211, "grad_norm": 13.490230560302734, "learning_rate": 2.069805654711315e-05, "loss": 1.6173, "step": 44600 }, { "epoch": 7.582865884752677, "grad_norm": 12.497056007385254, "learning_rate": 2.0695223525412207e-05, "loss": 1.5954, "step": 44610 }, { "epoch": 7.584565697773245, "grad_norm": 13.388496398925781, "learning_rate": 2.069239050371126e-05, "loss": 1.6306, "step": 44620 }, { "epoch": 7.586265510793813, "grad_norm": 13.457444190979004, "learning_rate": 2.068955748201031e-05, "loss": 1.4921, "step": 44630 }, { "epoch": 7.58796532381438, "grad_norm": 18.38181495666504, "learning_rate": 2.0686724460309368e-05, "loss": 1.7086, "step": 44640 }, { "epoch": 7.589665136834948, "grad_norm": 11.941003799438477, "learning_rate": 2.068389143860842e-05, "loss": 1.8624, "step": 44650 }, { "epoch": 7.591364949855516, "grad_norm": 16.189929962158203, "learning_rate": 2.0681058416907475e-05, "loss": 1.5215, "step": 44660 }, { "epoch": 7.593064762876084, "grad_norm": 15.232178688049316, "learning_rate": 2.0678225395206528e-05, "loss": 1.7863, "step": 44670 }, { "epoch": 7.594764575896651, "grad_norm": 13.051770210266113, "learning_rate": 2.0675392373505582e-05, "loss": 1.6488, "step": 44680 }, { "epoch": 7.596464388917219, "grad_norm": 12.765039443969727, "learning_rate": 2.0672559351804635e-05, "loss": 1.6403, "step": 44690 }, { "epoch": 7.598164201937787, "grad_norm": 12.685999870300293, "learning_rate": 2.066972633010369e-05, "loss": 1.5495, "step": 44700 }, { "epoch": 7.599864014958355, "grad_norm": 26.1155948638916, "learning_rate": 2.0666893308402742e-05, "loss": 1.3854, "step": 44710 }, { "epoch": 7.601563827978922, "grad_norm": 14.137091636657715, "learning_rate": 2.0664060286701796e-05, "loss": 1.6489, "step": 44720 }, { "epoch": 7.60326364099949, "grad_norm": 15.866375923156738, "learning_rate": 2.0661227265000853e-05, "loss": 1.4917, "step": 44730 }, { "epoch": 7.604963454020058, "grad_norm": 14.005044937133789, "learning_rate": 2.0658394243299903e-05, "loss": 1.4272, "step": 44740 }, { "epoch": 7.606663267040625, "grad_norm": 19.3802490234375, "learning_rate": 2.0655561221598956e-05, "loss": 1.7401, "step": 44750 }, { "epoch": 7.608363080061193, "grad_norm": 17.320659637451172, "learning_rate": 2.0652728199898013e-05, "loss": 1.8166, "step": 44760 }, { "epoch": 7.610062893081761, "grad_norm": 12.342357635498047, "learning_rate": 2.0649895178197067e-05, "loss": 1.7349, "step": 44770 }, { "epoch": 7.611762706102329, "grad_norm": 20.077280044555664, "learning_rate": 2.0647062156496117e-05, "loss": 1.8593, "step": 44780 }, { "epoch": 7.613462519122897, "grad_norm": 16.866926193237305, "learning_rate": 2.0644229134795174e-05, "loss": 1.6337, "step": 44790 }, { "epoch": 7.6151623321434645, "grad_norm": 31.246965408325195, "learning_rate": 2.0641396113094227e-05, "loss": 1.7321, "step": 44800 }, { "epoch": 7.616862145164032, "grad_norm": 21.73634147644043, "learning_rate": 2.063856309139328e-05, "loss": 1.7225, "step": 44810 }, { "epoch": 7.6185619581846, "grad_norm": 13.881326675415039, "learning_rate": 2.0635730069692334e-05, "loss": 1.7283, "step": 44820 }, { "epoch": 7.6202617712051675, "grad_norm": 18.52943992614746, "learning_rate": 2.0632897047991388e-05, "loss": 1.66, "step": 44830 }, { "epoch": 7.621961584225735, "grad_norm": 20.319549560546875, "learning_rate": 2.063006402629044e-05, "loss": 1.8403, "step": 44840 }, { "epoch": 7.623661397246303, "grad_norm": 16.53774070739746, "learning_rate": 2.0627231004589495e-05, "loss": 1.6502, "step": 44850 }, { "epoch": 7.62536121026687, "grad_norm": 16.24591827392578, "learning_rate": 2.062439798288855e-05, "loss": 1.6546, "step": 44860 }, { "epoch": 7.627061023287438, "grad_norm": 23.0468692779541, "learning_rate": 2.0621564961187602e-05, "loss": 1.7941, "step": 44870 }, { "epoch": 7.628760836308006, "grad_norm": 12.711905479431152, "learning_rate": 2.061873193948666e-05, "loss": 1.6936, "step": 44880 }, { "epoch": 7.630460649328574, "grad_norm": 17.639549255371094, "learning_rate": 2.061589891778571e-05, "loss": 1.6834, "step": 44890 }, { "epoch": 7.632160462349142, "grad_norm": 24.478261947631836, "learning_rate": 2.0613065896084763e-05, "loss": 1.742, "step": 44900 }, { "epoch": 7.63386027536971, "grad_norm": 19.930662155151367, "learning_rate": 2.061023287438382e-05, "loss": 1.4882, "step": 44910 }, { "epoch": 7.635560088390277, "grad_norm": 14.333263397216797, "learning_rate": 2.0607399852682873e-05, "loss": 1.6804, "step": 44920 }, { "epoch": 7.637259901410845, "grad_norm": 10.16380786895752, "learning_rate": 2.0604566830981923e-05, "loss": 1.5708, "step": 44930 }, { "epoch": 7.6389597144314125, "grad_norm": 12.514114379882812, "learning_rate": 2.060173380928098e-05, "loss": 1.5549, "step": 44940 }, { "epoch": 7.64065952745198, "grad_norm": 18.161699295043945, "learning_rate": 2.0598900787580034e-05, "loss": 1.4778, "step": 44950 }, { "epoch": 7.642359340472548, "grad_norm": 13.492716789245605, "learning_rate": 2.0596067765879087e-05, "loss": 1.6674, "step": 44960 }, { "epoch": 7.6440591534931155, "grad_norm": 23.3574161529541, "learning_rate": 2.059323474417814e-05, "loss": 1.5919, "step": 44970 }, { "epoch": 7.645758966513683, "grad_norm": 11.22929573059082, "learning_rate": 2.0590401722477194e-05, "loss": 1.8277, "step": 44980 }, { "epoch": 7.647458779534251, "grad_norm": 19.529346466064453, "learning_rate": 2.0587568700776248e-05, "loss": 1.7183, "step": 44990 }, { "epoch": 7.649158592554819, "grad_norm": 12.696344375610352, "learning_rate": 2.0584735679075305e-05, "loss": 1.6211, "step": 45000 }, { "epoch": 7.650858405575387, "grad_norm": 17.492881774902344, "learning_rate": 2.0581902657374355e-05, "loss": 1.5825, "step": 45010 }, { "epoch": 7.652558218595955, "grad_norm": 11.036242485046387, "learning_rate": 2.0579069635673408e-05, "loss": 1.6849, "step": 45020 }, { "epoch": 7.654258031616522, "grad_norm": 15.15809154510498, "learning_rate": 2.0576236613972465e-05, "loss": 1.5818, "step": 45030 }, { "epoch": 7.65595784463709, "grad_norm": 19.761051177978516, "learning_rate": 2.0573403592271515e-05, "loss": 1.5274, "step": 45040 }, { "epoch": 7.657657657657658, "grad_norm": 19.48860740661621, "learning_rate": 2.057057057057057e-05, "loss": 1.7114, "step": 45050 }, { "epoch": 7.659357470678225, "grad_norm": 9.710772514343262, "learning_rate": 2.0567737548869626e-05, "loss": 1.7864, "step": 45060 }, { "epoch": 7.661057283698793, "grad_norm": 17.395206451416016, "learning_rate": 2.056490452716868e-05, "loss": 1.6718, "step": 45070 }, { "epoch": 7.662757096719361, "grad_norm": 13.473532676696777, "learning_rate": 2.056207150546773e-05, "loss": 1.4028, "step": 45080 }, { "epoch": 7.664456909739929, "grad_norm": 11.915247917175293, "learning_rate": 2.0559238483766786e-05, "loss": 1.7745, "step": 45090 }, { "epoch": 7.666156722760496, "grad_norm": 13.230196952819824, "learning_rate": 2.055640546206584e-05, "loss": 1.7933, "step": 45100 }, { "epoch": 7.667856535781064, "grad_norm": 14.99953556060791, "learning_rate": 2.0553572440364897e-05, "loss": 1.5091, "step": 45110 }, { "epoch": 7.669556348801632, "grad_norm": 18.69377899169922, "learning_rate": 2.0550739418663947e-05, "loss": 1.6208, "step": 45120 }, { "epoch": 7.6712561618222, "grad_norm": 20.07754898071289, "learning_rate": 2.0547906396963e-05, "loss": 1.5204, "step": 45130 }, { "epoch": 7.672955974842767, "grad_norm": 12.881636619567871, "learning_rate": 2.0545073375262057e-05, "loss": 1.7382, "step": 45140 }, { "epoch": 7.674655787863335, "grad_norm": 13.178875923156738, "learning_rate": 2.054224035356111e-05, "loss": 1.6094, "step": 45150 }, { "epoch": 7.676355600883903, "grad_norm": 13.834095001220703, "learning_rate": 2.053940733186016e-05, "loss": 1.7821, "step": 45160 }, { "epoch": 7.67805541390447, "grad_norm": 12.930194854736328, "learning_rate": 2.0536574310159218e-05, "loss": 1.6449, "step": 45170 }, { "epoch": 7.679755226925038, "grad_norm": 12.225324630737305, "learning_rate": 2.053374128845827e-05, "loss": 1.6661, "step": 45180 }, { "epoch": 7.681455039945606, "grad_norm": 13.103157043457031, "learning_rate": 2.053090826675732e-05, "loss": 1.6684, "step": 45190 }, { "epoch": 7.683154852966174, "grad_norm": 16.482566833496094, "learning_rate": 2.052807524505638e-05, "loss": 1.6981, "step": 45200 }, { "epoch": 7.684854665986742, "grad_norm": 18.5834903717041, "learning_rate": 2.0525242223355432e-05, "loss": 1.6837, "step": 45210 }, { "epoch": 7.6865544790073095, "grad_norm": 17.682355880737305, "learning_rate": 2.0522409201654485e-05, "loss": 1.6295, "step": 45220 }, { "epoch": 7.688254292027877, "grad_norm": 13.238829612731934, "learning_rate": 2.051957617995354e-05, "loss": 1.5412, "step": 45230 }, { "epoch": 7.689954105048445, "grad_norm": 15.791421890258789, "learning_rate": 2.0516743158252593e-05, "loss": 1.6565, "step": 45240 }, { "epoch": 7.691653918069012, "grad_norm": 11.738368034362793, "learning_rate": 2.0513910136551646e-05, "loss": 1.7762, "step": 45250 }, { "epoch": 7.69335373108958, "grad_norm": 15.186776161193848, "learning_rate": 2.0511077114850703e-05, "loss": 1.585, "step": 45260 }, { "epoch": 7.695053544110148, "grad_norm": 12.333303451538086, "learning_rate": 2.0508244093149753e-05, "loss": 1.4601, "step": 45270 }, { "epoch": 7.696753357130715, "grad_norm": 13.374676704406738, "learning_rate": 2.0505411071448807e-05, "loss": 1.6319, "step": 45280 }, { "epoch": 7.698453170151283, "grad_norm": 13.245141983032227, "learning_rate": 2.0502578049747863e-05, "loss": 1.822, "step": 45290 }, { "epoch": 7.700152983171851, "grad_norm": 17.459943771362305, "learning_rate": 2.0499745028046917e-05, "loss": 1.8124, "step": 45300 }, { "epoch": 7.701852796192419, "grad_norm": 14.964795112609863, "learning_rate": 2.0496912006345967e-05, "loss": 1.7441, "step": 45310 }, { "epoch": 7.703552609212987, "grad_norm": 13.208563804626465, "learning_rate": 2.0494078984645024e-05, "loss": 1.6055, "step": 45320 }, { "epoch": 7.7052524222335546, "grad_norm": 17.41577911376953, "learning_rate": 2.0491245962944078e-05, "loss": 1.5946, "step": 45330 }, { "epoch": 7.706952235254122, "grad_norm": 21.584514617919922, "learning_rate": 2.048841294124313e-05, "loss": 1.4579, "step": 45340 }, { "epoch": 7.70865204827469, "grad_norm": 16.49098777770996, "learning_rate": 2.0485579919542185e-05, "loss": 1.5612, "step": 45350 }, { "epoch": 7.7103518612952575, "grad_norm": 16.53464698791504, "learning_rate": 2.0482746897841238e-05, "loss": 1.3754, "step": 45360 }, { "epoch": 7.712051674315825, "grad_norm": 18.55403709411621, "learning_rate": 2.047991387614029e-05, "loss": 1.4832, "step": 45370 }, { "epoch": 7.713751487336393, "grad_norm": 13.534860610961914, "learning_rate": 2.0477080854439345e-05, "loss": 1.6715, "step": 45380 }, { "epoch": 7.7154513003569605, "grad_norm": 15.623556137084961, "learning_rate": 2.04742478327384e-05, "loss": 1.6201, "step": 45390 }, { "epoch": 7.717151113377528, "grad_norm": 17.80730628967285, "learning_rate": 2.0471414811037452e-05, "loss": 1.5873, "step": 45400 }, { "epoch": 7.718850926398096, "grad_norm": 16.54896354675293, "learning_rate": 2.046858178933651e-05, "loss": 1.7128, "step": 45410 }, { "epoch": 7.720550739418664, "grad_norm": 17.821931838989258, "learning_rate": 2.046574876763556e-05, "loss": 1.6788, "step": 45420 }, { "epoch": 7.722250552439232, "grad_norm": 20.589208602905273, "learning_rate": 2.0462915745934613e-05, "loss": 1.532, "step": 45430 }, { "epoch": 7.7239503654598, "grad_norm": 17.33382797241211, "learning_rate": 2.046008272423367e-05, "loss": 1.3251, "step": 45440 }, { "epoch": 7.725650178480367, "grad_norm": 15.241890907287598, "learning_rate": 2.0457249702532723e-05, "loss": 1.4632, "step": 45450 }, { "epoch": 7.727349991500935, "grad_norm": 13.976974487304688, "learning_rate": 2.0454416680831773e-05, "loss": 1.7842, "step": 45460 }, { "epoch": 7.729049804521503, "grad_norm": 15.191699981689453, "learning_rate": 2.045158365913083e-05, "loss": 1.5064, "step": 45470 }, { "epoch": 7.73074961754207, "grad_norm": 12.891672134399414, "learning_rate": 2.0448750637429884e-05, "loss": 1.6327, "step": 45480 }, { "epoch": 7.732449430562638, "grad_norm": 12.281730651855469, "learning_rate": 2.0445917615728937e-05, "loss": 1.6424, "step": 45490 }, { "epoch": 7.7341492435832055, "grad_norm": 15.895419120788574, "learning_rate": 2.044308459402799e-05, "loss": 1.6208, "step": 45500 }, { "epoch": 7.735849056603773, "grad_norm": 14.09974479675293, "learning_rate": 2.0440251572327044e-05, "loss": 1.6753, "step": 45510 }, { "epoch": 7.737548869624341, "grad_norm": 14.550470352172852, "learning_rate": 2.0437418550626098e-05, "loss": 1.4973, "step": 45520 }, { "epoch": 7.739248682644909, "grad_norm": 15.491938591003418, "learning_rate": 2.0434585528925155e-05, "loss": 1.7792, "step": 45530 }, { "epoch": 7.740948495665477, "grad_norm": 19.4895076751709, "learning_rate": 2.0431752507224205e-05, "loss": 1.5581, "step": 45540 }, { "epoch": 7.742648308686045, "grad_norm": 15.048927307128906, "learning_rate": 2.042891948552326e-05, "loss": 1.6885, "step": 45550 }, { "epoch": 7.744348121706612, "grad_norm": 10.690398216247559, "learning_rate": 2.0426086463822315e-05, "loss": 1.7014, "step": 45560 }, { "epoch": 7.74604793472718, "grad_norm": 11.265619277954102, "learning_rate": 2.0423253442121365e-05, "loss": 1.7443, "step": 45570 }, { "epoch": 7.747747747747748, "grad_norm": 15.155975341796875, "learning_rate": 2.042042042042042e-05, "loss": 1.5368, "step": 45580 }, { "epoch": 7.749447560768315, "grad_norm": 15.777804374694824, "learning_rate": 2.0417587398719476e-05, "loss": 1.4849, "step": 45590 }, { "epoch": 7.751147373788883, "grad_norm": 15.414321899414062, "learning_rate": 2.041475437701853e-05, "loss": 1.3845, "step": 45600 }, { "epoch": 7.752847186809451, "grad_norm": 15.669923782348633, "learning_rate": 2.041192135531758e-05, "loss": 1.6947, "step": 45610 }, { "epoch": 7.754546999830019, "grad_norm": 13.70506477355957, "learning_rate": 2.0409088333616636e-05, "loss": 1.6765, "step": 45620 }, { "epoch": 7.756246812850587, "grad_norm": 16.96025848388672, "learning_rate": 2.040625531191569e-05, "loss": 1.501, "step": 45630 }, { "epoch": 7.7579466258711545, "grad_norm": 12.379813194274902, "learning_rate": 2.0403422290214744e-05, "loss": 1.6104, "step": 45640 }, { "epoch": 7.759646438891722, "grad_norm": 13.811040878295898, "learning_rate": 2.0400589268513797e-05, "loss": 1.6856, "step": 45650 }, { "epoch": 7.76134625191229, "grad_norm": 14.365350723266602, "learning_rate": 2.039775624681285e-05, "loss": 1.4661, "step": 45660 }, { "epoch": 7.763046064932857, "grad_norm": 13.691903114318848, "learning_rate": 2.0394923225111904e-05, "loss": 1.7073, "step": 45670 }, { "epoch": 7.764745877953425, "grad_norm": 32.72737121582031, "learning_rate": 2.039209020341096e-05, "loss": 1.4898, "step": 45680 }, { "epoch": 7.766445690973993, "grad_norm": 11.582450866699219, "learning_rate": 2.038925718171001e-05, "loss": 1.6882, "step": 45690 }, { "epoch": 7.76814550399456, "grad_norm": 11.25244426727295, "learning_rate": 2.0386424160009065e-05, "loss": 1.6613, "step": 45700 }, { "epoch": 7.769845317015128, "grad_norm": 15.612195014953613, "learning_rate": 2.038359113830812e-05, "loss": 1.7513, "step": 45710 }, { "epoch": 7.771545130035696, "grad_norm": 15.23444652557373, "learning_rate": 2.0380758116607172e-05, "loss": 1.5826, "step": 45720 }, { "epoch": 7.773244943056264, "grad_norm": 10.548694610595703, "learning_rate": 2.0377925094906225e-05, "loss": 1.3881, "step": 45730 }, { "epoch": 7.774944756076832, "grad_norm": 17.723451614379883, "learning_rate": 2.0375092073205282e-05, "loss": 1.5526, "step": 45740 }, { "epoch": 7.7766445690973995, "grad_norm": 13.288897514343262, "learning_rate": 2.0372259051504336e-05, "loss": 1.7307, "step": 45750 }, { "epoch": 7.778344382117967, "grad_norm": 13.8507080078125, "learning_rate": 2.0369426029803386e-05, "loss": 1.3546, "step": 45760 }, { "epoch": 7.780044195138535, "grad_norm": 19.167903900146484, "learning_rate": 2.0366593008102443e-05, "loss": 1.5084, "step": 45770 }, { "epoch": 7.7817440081591025, "grad_norm": 13.844181060791016, "learning_rate": 2.0363759986401496e-05, "loss": 1.7755, "step": 45780 }, { "epoch": 7.78344382117967, "grad_norm": 15.099093437194824, "learning_rate": 2.036092696470055e-05, "loss": 1.5203, "step": 45790 }, { "epoch": 7.785143634200238, "grad_norm": 18.46085548400879, "learning_rate": 2.0358093942999603e-05, "loss": 1.7333, "step": 45800 }, { "epoch": 7.7868434472208055, "grad_norm": 12.806768417358398, "learning_rate": 2.0355260921298657e-05, "loss": 1.7877, "step": 45810 }, { "epoch": 7.788543260241373, "grad_norm": 13.061568260192871, "learning_rate": 2.035242789959771e-05, "loss": 1.6149, "step": 45820 }, { "epoch": 7.790243073261941, "grad_norm": 15.900388717651367, "learning_rate": 2.0349594877896767e-05, "loss": 1.4671, "step": 45830 }, { "epoch": 7.791942886282509, "grad_norm": 10.091837882995605, "learning_rate": 2.0346761856195817e-05, "loss": 1.6881, "step": 45840 }, { "epoch": 7.793642699303077, "grad_norm": 12.601543426513672, "learning_rate": 2.0343928834494874e-05, "loss": 1.9339, "step": 45850 }, { "epoch": 7.795342512323645, "grad_norm": 15.194778442382812, "learning_rate": 2.0341095812793928e-05, "loss": 1.7229, "step": 45860 }, { "epoch": 7.797042325344212, "grad_norm": 16.255651473999023, "learning_rate": 2.033826279109298e-05, "loss": 1.4938, "step": 45870 }, { "epoch": 7.79874213836478, "grad_norm": 13.117627143859863, "learning_rate": 2.0335429769392035e-05, "loss": 1.6343, "step": 45880 }, { "epoch": 7.800441951385348, "grad_norm": 17.28510093688965, "learning_rate": 2.033259674769109e-05, "loss": 1.4333, "step": 45890 }, { "epoch": 7.802141764405915, "grad_norm": 14.058059692382812, "learning_rate": 2.0329763725990142e-05, "loss": 1.4821, "step": 45900 }, { "epoch": 7.803841577426483, "grad_norm": 18.1672306060791, "learning_rate": 2.0326930704289195e-05, "loss": 1.6629, "step": 45910 }, { "epoch": 7.8055413904470505, "grad_norm": 16.42350196838379, "learning_rate": 2.032409768258825e-05, "loss": 1.8573, "step": 45920 }, { "epoch": 7.807241203467618, "grad_norm": 15.437012672424316, "learning_rate": 2.0321264660887302e-05, "loss": 1.6321, "step": 45930 }, { "epoch": 7.808941016488186, "grad_norm": 13.689114570617676, "learning_rate": 2.031843163918636e-05, "loss": 1.6773, "step": 45940 }, { "epoch": 7.810640829508754, "grad_norm": 16.298913955688477, "learning_rate": 2.031559861748541e-05, "loss": 1.4962, "step": 45950 }, { "epoch": 7.812340642529322, "grad_norm": 11.484477043151855, "learning_rate": 2.0312765595784463e-05, "loss": 1.5171, "step": 45960 }, { "epoch": 7.81404045554989, "grad_norm": 13.244976043701172, "learning_rate": 2.030993257408352e-05, "loss": 1.6971, "step": 45970 }, { "epoch": 7.815740268570457, "grad_norm": 13.898792266845703, "learning_rate": 2.0307099552382573e-05, "loss": 1.661, "step": 45980 }, { "epoch": 7.817440081591025, "grad_norm": 12.55581283569336, "learning_rate": 2.0304266530681624e-05, "loss": 1.7036, "step": 45990 }, { "epoch": 7.819139894611593, "grad_norm": 26.67789649963379, "learning_rate": 2.030143350898068e-05, "loss": 1.6474, "step": 46000 }, { "epoch": 7.82083970763216, "grad_norm": 14.67484188079834, "learning_rate": 2.0298600487279734e-05, "loss": 1.6924, "step": 46010 }, { "epoch": 7.822539520652728, "grad_norm": 12.201498985290527, "learning_rate": 2.0295767465578788e-05, "loss": 1.5321, "step": 46020 }, { "epoch": 7.824239333673296, "grad_norm": 10.271016120910645, "learning_rate": 2.029293444387784e-05, "loss": 1.3401, "step": 46030 }, { "epoch": 7.825939146693864, "grad_norm": 12.93957233428955, "learning_rate": 2.0290101422176895e-05, "loss": 1.7106, "step": 46040 }, { "epoch": 7.827638959714432, "grad_norm": 22.054349899291992, "learning_rate": 2.0287268400475948e-05, "loss": 1.5419, "step": 46050 }, { "epoch": 7.8293387727349995, "grad_norm": 15.015115737915039, "learning_rate": 2.0284435378775e-05, "loss": 1.7911, "step": 46060 }, { "epoch": 7.831038585755567, "grad_norm": 12.395306587219238, "learning_rate": 2.0281602357074055e-05, "loss": 1.7958, "step": 46070 }, { "epoch": 7.832738398776135, "grad_norm": 15.743638038635254, "learning_rate": 2.027876933537311e-05, "loss": 1.5495, "step": 46080 }, { "epoch": 7.834438211796702, "grad_norm": 15.093864440917969, "learning_rate": 2.0275936313672166e-05, "loss": 1.7701, "step": 46090 }, { "epoch": 7.83613802481727, "grad_norm": 18.077543258666992, "learning_rate": 2.0273103291971216e-05, "loss": 1.7253, "step": 46100 }, { "epoch": 7.837837837837838, "grad_norm": 12.310922622680664, "learning_rate": 2.027027027027027e-05, "loss": 1.6091, "step": 46110 }, { "epoch": 7.839537650858405, "grad_norm": 18.452878952026367, "learning_rate": 2.0267437248569326e-05, "loss": 1.6062, "step": 46120 }, { "epoch": 7.841237463878973, "grad_norm": 111.17326354980469, "learning_rate": 2.026460422686838e-05, "loss": 1.7102, "step": 46130 }, { "epoch": 7.842937276899541, "grad_norm": 18.246519088745117, "learning_rate": 2.026177120516743e-05, "loss": 1.519, "step": 46140 }, { "epoch": 7.844637089920109, "grad_norm": 13.618546485900879, "learning_rate": 2.0258938183466487e-05, "loss": 1.5866, "step": 46150 }, { "epoch": 7.846336902940677, "grad_norm": 16.30621337890625, "learning_rate": 2.025610516176554e-05, "loss": 1.4869, "step": 46160 }, { "epoch": 7.8480367159612445, "grad_norm": 22.52690315246582, "learning_rate": 2.0253272140064594e-05, "loss": 1.6664, "step": 46170 }, { "epoch": 7.849736528981812, "grad_norm": 16.2188777923584, "learning_rate": 2.0250439118363647e-05, "loss": 1.6445, "step": 46180 }, { "epoch": 7.85143634200238, "grad_norm": 12.461923599243164, "learning_rate": 2.02476060966627e-05, "loss": 1.6292, "step": 46190 }, { "epoch": 7.8531361550229475, "grad_norm": 15.625273704528809, "learning_rate": 2.0244773074961754e-05, "loss": 1.5976, "step": 46200 }, { "epoch": 7.854835968043515, "grad_norm": 14.610669136047363, "learning_rate": 2.024194005326081e-05, "loss": 1.5947, "step": 46210 }, { "epoch": 7.856535781064083, "grad_norm": 13.872315406799316, "learning_rate": 2.023910703155986e-05, "loss": 1.4708, "step": 46220 }, { "epoch": 7.85823559408465, "grad_norm": 16.151050567626953, "learning_rate": 2.0236274009858915e-05, "loss": 1.5742, "step": 46230 }, { "epoch": 7.859935407105218, "grad_norm": 15.174132347106934, "learning_rate": 2.0233440988157972e-05, "loss": 1.5178, "step": 46240 }, { "epoch": 7.861635220125786, "grad_norm": 21.4987850189209, "learning_rate": 2.0230607966457022e-05, "loss": 1.5705, "step": 46250 }, { "epoch": 7.863335033146354, "grad_norm": 13.93229866027832, "learning_rate": 2.0227774944756075e-05, "loss": 1.6127, "step": 46260 }, { "epoch": 7.865034846166922, "grad_norm": 10.180944442749023, "learning_rate": 2.0224941923055132e-05, "loss": 1.5774, "step": 46270 }, { "epoch": 7.86673465918749, "grad_norm": 16.356006622314453, "learning_rate": 2.0222108901354186e-05, "loss": 1.364, "step": 46280 }, { "epoch": 7.868434472208057, "grad_norm": 12.11681079864502, "learning_rate": 2.0219275879653236e-05, "loss": 1.6924, "step": 46290 }, { "epoch": 7.870134285228625, "grad_norm": 12.931962966918945, "learning_rate": 2.0216442857952293e-05, "loss": 1.6583, "step": 46300 }, { "epoch": 7.871834098249193, "grad_norm": 14.057150840759277, "learning_rate": 2.0213609836251346e-05, "loss": 1.5516, "step": 46310 }, { "epoch": 7.87353391126976, "grad_norm": 13.883777618408203, "learning_rate": 2.02107768145504e-05, "loss": 1.5505, "step": 46320 }, { "epoch": 7.875233724290328, "grad_norm": 14.329371452331543, "learning_rate": 2.0207943792849453e-05, "loss": 1.7323, "step": 46330 }, { "epoch": 7.8769335373108955, "grad_norm": 22.24783706665039, "learning_rate": 2.0205110771148507e-05, "loss": 1.6954, "step": 46340 }, { "epoch": 7.878633350331463, "grad_norm": 13.46994686126709, "learning_rate": 2.020227774944756e-05, "loss": 1.503, "step": 46350 }, { "epoch": 7.880333163352031, "grad_norm": 14.664113998413086, "learning_rate": 2.0199444727746617e-05, "loss": 1.8756, "step": 46360 }, { "epoch": 7.882032976372599, "grad_norm": 13.963496208190918, "learning_rate": 2.0196611706045668e-05, "loss": 1.6208, "step": 46370 }, { "epoch": 7.883732789393167, "grad_norm": 15.466348648071289, "learning_rate": 2.019377868434472e-05, "loss": 1.7882, "step": 46380 }, { "epoch": 7.885432602413735, "grad_norm": 9.511576652526855, "learning_rate": 2.0190945662643778e-05, "loss": 1.9109, "step": 46390 }, { "epoch": 7.887132415434302, "grad_norm": 11.932540893554688, "learning_rate": 2.0188112640942828e-05, "loss": 1.6999, "step": 46400 }, { "epoch": 7.88883222845487, "grad_norm": 19.50860023498535, "learning_rate": 2.018527961924188e-05, "loss": 1.4438, "step": 46410 }, { "epoch": 7.890532041475438, "grad_norm": 12.982909202575684, "learning_rate": 2.018244659754094e-05, "loss": 1.752, "step": 46420 }, { "epoch": 7.892231854496005, "grad_norm": 15.486723899841309, "learning_rate": 2.0179613575839992e-05, "loss": 1.5257, "step": 46430 }, { "epoch": 7.893931667516573, "grad_norm": 12.604508399963379, "learning_rate": 2.0176780554139042e-05, "loss": 1.3609, "step": 46440 }, { "epoch": 7.895631480537141, "grad_norm": 16.546283721923828, "learning_rate": 2.01739475324381e-05, "loss": 1.6675, "step": 46450 }, { "epoch": 7.897331293557709, "grad_norm": 15.352052688598633, "learning_rate": 2.0171114510737153e-05, "loss": 1.847, "step": 46460 }, { "epoch": 7.899031106578276, "grad_norm": 15.049092292785645, "learning_rate": 2.0168281489036206e-05, "loss": 1.7328, "step": 46470 }, { "epoch": 7.900730919598844, "grad_norm": 15.465317726135254, "learning_rate": 2.016544846733526e-05, "loss": 1.3858, "step": 46480 }, { "epoch": 7.902430732619412, "grad_norm": 14.771841049194336, "learning_rate": 2.0162615445634313e-05, "loss": 1.5001, "step": 46490 }, { "epoch": 7.90413054563998, "grad_norm": 24.68517303466797, "learning_rate": 2.0159782423933367e-05, "loss": 1.4615, "step": 46500 }, { "epoch": 7.905830358660547, "grad_norm": 16.690736770629883, "learning_rate": 2.0156949402232424e-05, "loss": 1.5988, "step": 46510 }, { "epoch": 7.907530171681115, "grad_norm": 18.940004348754883, "learning_rate": 2.0154116380531474e-05, "loss": 1.6612, "step": 46520 }, { "epoch": 7.909229984701683, "grad_norm": 14.647299766540527, "learning_rate": 2.0151283358830527e-05, "loss": 1.534, "step": 46530 }, { "epoch": 7.91092979772225, "grad_norm": 15.160614013671875, "learning_rate": 2.0148450337129584e-05, "loss": 1.7612, "step": 46540 }, { "epoch": 7.912629610742818, "grad_norm": 17.306386947631836, "learning_rate": 2.0145617315428638e-05, "loss": 1.6569, "step": 46550 }, { "epoch": 7.914329423763386, "grad_norm": 16.116287231445312, "learning_rate": 2.014278429372769e-05, "loss": 1.6596, "step": 46560 }, { "epoch": 7.916029236783954, "grad_norm": 13.811527252197266, "learning_rate": 2.0139951272026745e-05, "loss": 1.7878, "step": 46570 }, { "epoch": 7.917729049804522, "grad_norm": 18.05400276184082, "learning_rate": 2.0137118250325798e-05, "loss": 1.8113, "step": 46580 }, { "epoch": 7.9194288628250895, "grad_norm": 12.102497100830078, "learning_rate": 2.0134285228624852e-05, "loss": 1.6457, "step": 46590 }, { "epoch": 7.921128675845657, "grad_norm": 13.013218879699707, "learning_rate": 2.0131452206923905e-05, "loss": 1.6977, "step": 46600 }, { "epoch": 7.922828488866225, "grad_norm": 16.55135154724121, "learning_rate": 2.012861918522296e-05, "loss": 1.6159, "step": 46610 }, { "epoch": 7.9245283018867925, "grad_norm": 17.67045021057129, "learning_rate": 2.0125786163522016e-05, "loss": 1.5271, "step": 46620 }, { "epoch": 7.92622811490736, "grad_norm": 18.96387481689453, "learning_rate": 2.0122953141821066e-05, "loss": 1.9801, "step": 46630 }, { "epoch": 7.927927927927928, "grad_norm": 18.966533660888672, "learning_rate": 2.012012012012012e-05, "loss": 1.6591, "step": 46640 }, { "epoch": 7.929627740948495, "grad_norm": 18.92184066772461, "learning_rate": 2.0117287098419176e-05, "loss": 1.8224, "step": 46650 }, { "epoch": 7.931327553969063, "grad_norm": 23.34796905517578, "learning_rate": 2.011445407671823e-05, "loss": 1.6975, "step": 46660 }, { "epoch": 7.933027366989631, "grad_norm": 21.223308563232422, "learning_rate": 2.011162105501728e-05, "loss": 1.6469, "step": 46670 }, { "epoch": 7.934727180010199, "grad_norm": 16.60779571533203, "learning_rate": 2.0108788033316337e-05, "loss": 1.6774, "step": 46680 }, { "epoch": 7.936426993030767, "grad_norm": 19.558862686157227, "learning_rate": 2.010595501161539e-05, "loss": 1.8054, "step": 46690 }, { "epoch": 7.938126806051335, "grad_norm": 13.860349655151367, "learning_rate": 2.0103121989914444e-05, "loss": 1.8104, "step": 46700 }, { "epoch": 7.939826619071902, "grad_norm": 14.956341743469238, "learning_rate": 2.0100288968213497e-05, "loss": 1.5601, "step": 46710 }, { "epoch": 7.94152643209247, "grad_norm": 14.693888664245605, "learning_rate": 2.009745594651255e-05, "loss": 1.6948, "step": 46720 }, { "epoch": 7.9432262451130375, "grad_norm": 11.293800354003906, "learning_rate": 2.0094622924811605e-05, "loss": 1.6395, "step": 46730 }, { "epoch": 7.944926058133605, "grad_norm": 16.2112979888916, "learning_rate": 2.009178990311066e-05, "loss": 1.5423, "step": 46740 }, { "epoch": 7.946625871154173, "grad_norm": 16.335424423217773, "learning_rate": 2.008895688140971e-05, "loss": 1.5667, "step": 46750 }, { "epoch": 7.9483256841747405, "grad_norm": 10.91880989074707, "learning_rate": 2.0086123859708765e-05, "loss": 1.8939, "step": 46760 }, { "epoch": 7.950025497195308, "grad_norm": 14.452629089355469, "learning_rate": 2.0083290838007822e-05, "loss": 1.7191, "step": 46770 }, { "epoch": 7.951725310215876, "grad_norm": 16.807823181152344, "learning_rate": 2.0080457816306872e-05, "loss": 1.6173, "step": 46780 }, { "epoch": 7.953425123236444, "grad_norm": 13.255596160888672, "learning_rate": 2.0077624794605926e-05, "loss": 1.5444, "step": 46790 }, { "epoch": 7.955124936257012, "grad_norm": 18.34597396850586, "learning_rate": 2.0074791772904983e-05, "loss": 1.4525, "step": 46800 }, { "epoch": 7.95682474927758, "grad_norm": 10.542242050170898, "learning_rate": 2.0071958751204036e-05, "loss": 1.7073, "step": 46810 }, { "epoch": 7.958524562298147, "grad_norm": 24.174558639526367, "learning_rate": 2.0069125729503086e-05, "loss": 1.6519, "step": 46820 }, { "epoch": 7.960224375318715, "grad_norm": 17.601009368896484, "learning_rate": 2.0066292707802143e-05, "loss": 1.5775, "step": 46830 }, { "epoch": 7.961924188339283, "grad_norm": 15.319402694702148, "learning_rate": 2.0063459686101197e-05, "loss": 1.7049, "step": 46840 }, { "epoch": 7.96362400135985, "grad_norm": 12.584393501281738, "learning_rate": 2.006062666440025e-05, "loss": 1.6986, "step": 46850 }, { "epoch": 7.965323814380418, "grad_norm": 13.590289115905762, "learning_rate": 2.0057793642699304e-05, "loss": 1.5798, "step": 46860 }, { "epoch": 7.967023627400986, "grad_norm": 14.01019287109375, "learning_rate": 2.0054960620998357e-05, "loss": 1.6691, "step": 46870 }, { "epoch": 7.968723440421554, "grad_norm": 12.134774208068848, "learning_rate": 2.005212759929741e-05, "loss": 1.6754, "step": 46880 }, { "epoch": 7.970423253442121, "grad_norm": 15.585359573364258, "learning_rate": 2.0049294577596468e-05, "loss": 1.5484, "step": 46890 }, { "epoch": 7.972123066462689, "grad_norm": 11.23525619506836, "learning_rate": 2.0046461555895518e-05, "loss": 1.5444, "step": 46900 }, { "epoch": 7.973822879483257, "grad_norm": 16.20392417907715, "learning_rate": 2.004362853419457e-05, "loss": 1.6852, "step": 46910 }, { "epoch": 7.975522692503825, "grad_norm": 14.296004295349121, "learning_rate": 2.0040795512493628e-05, "loss": 1.6508, "step": 46920 }, { "epoch": 7.977222505524392, "grad_norm": 18.03374481201172, "learning_rate": 2.003796249079268e-05, "loss": 1.7394, "step": 46930 }, { "epoch": 7.97892231854496, "grad_norm": 22.739641189575195, "learning_rate": 2.0035129469091732e-05, "loss": 1.6078, "step": 46940 }, { "epoch": 7.980622131565528, "grad_norm": 18.460386276245117, "learning_rate": 2.003229644739079e-05, "loss": 1.5648, "step": 46950 }, { "epoch": 7.982321944586095, "grad_norm": 17.485628128051758, "learning_rate": 2.0029463425689842e-05, "loss": 1.7104, "step": 46960 }, { "epoch": 7.984021757606663, "grad_norm": 17.260683059692383, "learning_rate": 2.0026630403988892e-05, "loss": 1.5055, "step": 46970 }, { "epoch": 7.985721570627231, "grad_norm": 16.15277671813965, "learning_rate": 2.002379738228795e-05, "loss": 1.7348, "step": 46980 }, { "epoch": 7.987421383647799, "grad_norm": 17.022357940673828, "learning_rate": 2.0020964360587003e-05, "loss": 1.7545, "step": 46990 }, { "epoch": 7.989121196668367, "grad_norm": 14.895740509033203, "learning_rate": 2.0018131338886056e-05, "loss": 1.5359, "step": 47000 }, { "epoch": 7.9908210096889345, "grad_norm": 17.985057830810547, "learning_rate": 2.001529831718511e-05, "loss": 1.7867, "step": 47010 }, { "epoch": 7.992520822709502, "grad_norm": 22.65934181213379, "learning_rate": 2.0012465295484163e-05, "loss": 1.7556, "step": 47020 }, { "epoch": 7.99422063573007, "grad_norm": 16.732315063476562, "learning_rate": 2.0009632273783217e-05, "loss": 1.5694, "step": 47030 }, { "epoch": 7.9959204487506375, "grad_norm": 16.38052749633789, "learning_rate": 2.0006799252082274e-05, "loss": 1.6095, "step": 47040 }, { "epoch": 7.997620261771205, "grad_norm": 14.68635368347168, "learning_rate": 2.0003966230381324e-05, "loss": 1.7391, "step": 47050 }, { "epoch": 7.999320074791773, "grad_norm": 12.368148803710938, "learning_rate": 2.0001133208680378e-05, "loss": 1.5841, "step": 47060 }, { "epoch": 8.0, "eval_cer": 1.0, "eval_loss": 2.5345845222473145, "eval_runtime": 1959.1701, "eval_samples_per_second": 0.24, "eval_steps_per_second": 0.24, "step": 47064 }, { "epoch": 8.00101988781234, "grad_norm": 13.894981384277344, "learning_rate": 1.9998300186979434e-05, "loss": 1.5464, "step": 47070 }, { "epoch": 8.002719700832909, "grad_norm": 16.730175018310547, "learning_rate": 1.9995467165278488e-05, "loss": 1.6038, "step": 47080 }, { "epoch": 8.004419513853476, "grad_norm": 25.457395553588867, "learning_rate": 1.9992634143577538e-05, "loss": 1.295, "step": 47090 }, { "epoch": 8.006119326874044, "grad_norm": 8.759279251098633, "learning_rate": 1.9989801121876595e-05, "loss": 1.3128, "step": 47100 }, { "epoch": 8.007819139894611, "grad_norm": 16.90046501159668, "learning_rate": 1.998696810017565e-05, "loss": 1.277, "step": 47110 }, { "epoch": 8.00951895291518, "grad_norm": 14.800543785095215, "learning_rate": 1.99841350784747e-05, "loss": 1.2826, "step": 47120 }, { "epoch": 8.011218765935746, "grad_norm": 17.427894592285156, "learning_rate": 1.9981302056773756e-05, "loss": 1.5749, "step": 47130 }, { "epoch": 8.012918578956315, "grad_norm": 19.098934173583984, "learning_rate": 1.997846903507281e-05, "loss": 1.5376, "step": 47140 }, { "epoch": 8.014618391976883, "grad_norm": 14.585938453674316, "learning_rate": 1.9975636013371863e-05, "loss": 1.4614, "step": 47150 }, { "epoch": 8.01631820499745, "grad_norm": 17.4891357421875, "learning_rate": 1.9972802991670916e-05, "loss": 1.2665, "step": 47160 }, { "epoch": 8.018018018018019, "grad_norm": 20.499717712402344, "learning_rate": 1.996996996996997e-05, "loss": 1.3785, "step": 47170 }, { "epoch": 8.019717831038585, "grad_norm": 14.13591480255127, "learning_rate": 1.9967136948269023e-05, "loss": 1.3327, "step": 47180 }, { "epoch": 8.021417644059154, "grad_norm": 19.251371383666992, "learning_rate": 1.996430392656808e-05, "loss": 1.2777, "step": 47190 }, { "epoch": 8.02311745707972, "grad_norm": 11.01679801940918, "learning_rate": 1.996147090486713e-05, "loss": 1.5636, "step": 47200 }, { "epoch": 8.02481727010029, "grad_norm": 10.534008979797363, "learning_rate": 1.9958637883166184e-05, "loss": 1.4187, "step": 47210 }, { "epoch": 8.026517083120856, "grad_norm": 14.590827941894531, "learning_rate": 1.995580486146524e-05, "loss": 1.5753, "step": 47220 }, { "epoch": 8.028216896141425, "grad_norm": 17.6296329498291, "learning_rate": 1.9952971839764294e-05, "loss": 1.5194, "step": 47230 }, { "epoch": 8.029916709161991, "grad_norm": 12.082611083984375, "learning_rate": 1.9950138818063344e-05, "loss": 1.4768, "step": 47240 }, { "epoch": 8.03161652218256, "grad_norm": 15.43915843963623, "learning_rate": 1.99473057963624e-05, "loss": 1.5714, "step": 47250 }, { "epoch": 8.033316335203128, "grad_norm": 14.110713005065918, "learning_rate": 1.9944472774661455e-05, "loss": 1.6429, "step": 47260 }, { "epoch": 8.035016148223695, "grad_norm": 14.583035469055176, "learning_rate": 1.9941639752960508e-05, "loss": 1.3053, "step": 47270 }, { "epoch": 8.036715961244264, "grad_norm": 15.854816436767578, "learning_rate": 1.9938806731259562e-05, "loss": 1.681, "step": 47280 }, { "epoch": 8.03841577426483, "grad_norm": 17.87114143371582, "learning_rate": 1.9935973709558615e-05, "loss": 1.4415, "step": 47290 }, { "epoch": 8.040115587285399, "grad_norm": 16.516809463500977, "learning_rate": 1.9933140687857672e-05, "loss": 1.4755, "step": 47300 }, { "epoch": 8.041815400305966, "grad_norm": 11.60367488861084, "learning_rate": 1.9930307666156722e-05, "loss": 1.3532, "step": 47310 }, { "epoch": 8.043515213326534, "grad_norm": 12.025951385498047, "learning_rate": 1.9927474644455776e-05, "loss": 1.1864, "step": 47320 }, { "epoch": 8.045215026347101, "grad_norm": 14.112098693847656, "learning_rate": 1.9924641622754833e-05, "loss": 1.6237, "step": 47330 }, { "epoch": 8.04691483936767, "grad_norm": 14.577850341796875, "learning_rate": 1.9921808601053886e-05, "loss": 1.4742, "step": 47340 }, { "epoch": 8.048614652388236, "grad_norm": 13.51563835144043, "learning_rate": 1.9918975579352936e-05, "loss": 1.5198, "step": 47350 }, { "epoch": 8.050314465408805, "grad_norm": 11.26042652130127, "learning_rate": 1.9916142557651993e-05, "loss": 1.5135, "step": 47360 }, { "epoch": 8.052014278429374, "grad_norm": 14.76728630065918, "learning_rate": 1.9913309535951047e-05, "loss": 1.3901, "step": 47370 }, { "epoch": 8.05371409144994, "grad_norm": 16.586376190185547, "learning_rate": 1.99104765142501e-05, "loss": 1.4205, "step": 47380 }, { "epoch": 8.055413904470509, "grad_norm": 13.852712631225586, "learning_rate": 1.9907643492549154e-05, "loss": 1.4906, "step": 47390 }, { "epoch": 8.057113717491076, "grad_norm": 13.532713890075684, "learning_rate": 1.9904810470848207e-05, "loss": 1.45, "step": 47400 }, { "epoch": 8.058813530511644, "grad_norm": 17.253034591674805, "learning_rate": 1.990197744914726e-05, "loss": 1.471, "step": 47410 }, { "epoch": 8.060513343532211, "grad_norm": 14.473877906799316, "learning_rate": 1.9899144427446318e-05, "loss": 1.5516, "step": 47420 }, { "epoch": 8.06221315655278, "grad_norm": 16.963449478149414, "learning_rate": 1.9896311405745368e-05, "loss": 1.4765, "step": 47430 }, { "epoch": 8.063912969573346, "grad_norm": 17.0103759765625, "learning_rate": 1.989347838404442e-05, "loss": 1.5172, "step": 47440 }, { "epoch": 8.065612782593915, "grad_norm": 13.657273292541504, "learning_rate": 1.989064536234348e-05, "loss": 1.6825, "step": 47450 }, { "epoch": 8.067312595614482, "grad_norm": 18.53358268737793, "learning_rate": 1.988781234064253e-05, "loss": 1.5014, "step": 47460 }, { "epoch": 8.06901240863505, "grad_norm": 12.728409767150879, "learning_rate": 1.9884979318941582e-05, "loss": 1.4592, "step": 47470 }, { "epoch": 8.070712221655619, "grad_norm": 14.49951457977295, "learning_rate": 1.988214629724064e-05, "loss": 1.3558, "step": 47480 }, { "epoch": 8.072412034676185, "grad_norm": 13.688368797302246, "learning_rate": 1.9879313275539693e-05, "loss": 1.413, "step": 47490 }, { "epoch": 8.074111847696754, "grad_norm": 11.493253707885742, "learning_rate": 1.9876480253838743e-05, "loss": 1.5091, "step": 47500 }, { "epoch": 8.07581166071732, "grad_norm": 11.957959175109863, "learning_rate": 1.98736472321378e-05, "loss": 1.5565, "step": 47510 }, { "epoch": 8.07751147373789, "grad_norm": 13.344049453735352, "learning_rate": 1.9870814210436853e-05, "loss": 1.5012, "step": 47520 }, { "epoch": 8.079211286758456, "grad_norm": 21.198896408081055, "learning_rate": 1.9867981188735907e-05, "loss": 1.4441, "step": 47530 }, { "epoch": 8.080911099779025, "grad_norm": 10.87236499786377, "learning_rate": 1.986514816703496e-05, "loss": 1.4625, "step": 47540 }, { "epoch": 8.082610912799591, "grad_norm": 15.861610412597656, "learning_rate": 1.9862315145334014e-05, "loss": 1.4503, "step": 47550 }, { "epoch": 8.08431072582016, "grad_norm": 13.963499069213867, "learning_rate": 1.9859482123633067e-05, "loss": 1.5945, "step": 47560 }, { "epoch": 8.086010538840728, "grad_norm": 19.93811798095703, "learning_rate": 1.9856649101932124e-05, "loss": 1.2523, "step": 47570 }, { "epoch": 8.087710351861295, "grad_norm": 13.52685260772705, "learning_rate": 1.9853816080231174e-05, "loss": 1.5774, "step": 47580 }, { "epoch": 8.089410164881864, "grad_norm": 14.672311782836914, "learning_rate": 1.9850983058530228e-05, "loss": 1.5409, "step": 47590 }, { "epoch": 8.09110997790243, "grad_norm": 16.494577407836914, "learning_rate": 1.9848150036829285e-05, "loss": 1.5395, "step": 47600 }, { "epoch": 8.092809790922999, "grad_norm": 13.7894926071167, "learning_rate": 1.9845317015128335e-05, "loss": 1.686, "step": 47610 }, { "epoch": 8.094509603943566, "grad_norm": 20.33984375, "learning_rate": 1.9842483993427388e-05, "loss": 1.3435, "step": 47620 }, { "epoch": 8.096209416964134, "grad_norm": 15.688507080078125, "learning_rate": 1.9839650971726445e-05, "loss": 1.4796, "step": 47630 }, { "epoch": 8.097909229984701, "grad_norm": 15.253721237182617, "learning_rate": 1.98368179500255e-05, "loss": 1.6932, "step": 47640 }, { "epoch": 8.09960904300527, "grad_norm": 21.06658172607422, "learning_rate": 1.983398492832455e-05, "loss": 1.4991, "step": 47650 }, { "epoch": 8.101308856025836, "grad_norm": 15.4439697265625, "learning_rate": 1.9831151906623606e-05, "loss": 1.5309, "step": 47660 }, { "epoch": 8.103008669046405, "grad_norm": 17.93879508972168, "learning_rate": 1.982831888492266e-05, "loss": 1.4097, "step": 47670 }, { "epoch": 8.104708482066973, "grad_norm": 24.30222511291504, "learning_rate": 1.9825485863221713e-05, "loss": 1.446, "step": 47680 }, { "epoch": 8.10640829508754, "grad_norm": 15.081693649291992, "learning_rate": 1.9822652841520766e-05, "loss": 1.3127, "step": 47690 }, { "epoch": 8.108108108108109, "grad_norm": 17.481990814208984, "learning_rate": 1.981981981981982e-05, "loss": 1.4051, "step": 47700 }, { "epoch": 8.109807921128676, "grad_norm": 17.29779624938965, "learning_rate": 1.9816986798118873e-05, "loss": 1.495, "step": 47710 }, { "epoch": 8.111507734149244, "grad_norm": 17.49005699157715, "learning_rate": 1.981415377641793e-05, "loss": 1.1058, "step": 47720 }, { "epoch": 8.11320754716981, "grad_norm": 11.80483627319336, "learning_rate": 1.981132075471698e-05, "loss": 1.4299, "step": 47730 }, { "epoch": 8.11490736019038, "grad_norm": 15.262704849243164, "learning_rate": 1.9808487733016034e-05, "loss": 1.6228, "step": 47740 }, { "epoch": 8.116607173210946, "grad_norm": 16.247966766357422, "learning_rate": 1.980565471131509e-05, "loss": 1.4681, "step": 47750 }, { "epoch": 8.118306986231515, "grad_norm": 19.899456024169922, "learning_rate": 1.9802821689614144e-05, "loss": 1.5572, "step": 47760 }, { "epoch": 8.120006799252081, "grad_norm": 13.825891494750977, "learning_rate": 1.9799988667913195e-05, "loss": 1.2718, "step": 47770 }, { "epoch": 8.12170661227265, "grad_norm": 12.52393627166748, "learning_rate": 1.979715564621225e-05, "loss": 1.5201, "step": 47780 }, { "epoch": 8.123406425293219, "grad_norm": 12.94683837890625, "learning_rate": 1.9794322624511305e-05, "loss": 1.6496, "step": 47790 }, { "epoch": 8.125106238313785, "grad_norm": 14.333255767822266, "learning_rate": 1.9791489602810355e-05, "loss": 1.6673, "step": 47800 }, { "epoch": 8.126806051334354, "grad_norm": 12.011157989501953, "learning_rate": 1.9788656581109412e-05, "loss": 1.2097, "step": 47810 }, { "epoch": 8.12850586435492, "grad_norm": 12.927244186401367, "learning_rate": 1.9785823559408466e-05, "loss": 1.4853, "step": 47820 }, { "epoch": 8.13020567737549, "grad_norm": 14.989509582519531, "learning_rate": 1.978299053770752e-05, "loss": 1.6529, "step": 47830 }, { "epoch": 8.131905490396056, "grad_norm": 10.189419746398926, "learning_rate": 1.9780157516006573e-05, "loss": 1.4442, "step": 47840 }, { "epoch": 8.133605303416624, "grad_norm": 14.293309211730957, "learning_rate": 1.9777324494305626e-05, "loss": 1.5026, "step": 47850 }, { "epoch": 8.135305116437191, "grad_norm": 20.567960739135742, "learning_rate": 1.977449147260468e-05, "loss": 1.5047, "step": 47860 }, { "epoch": 8.13700492945776, "grad_norm": 14.741894721984863, "learning_rate": 1.9771658450903737e-05, "loss": 1.305, "step": 47870 }, { "epoch": 8.138704742478327, "grad_norm": 16.758493423461914, "learning_rate": 1.9768825429202787e-05, "loss": 1.2702, "step": 47880 }, { "epoch": 8.140404555498895, "grad_norm": 13.733566284179688, "learning_rate": 1.976599240750184e-05, "loss": 1.2985, "step": 47890 }, { "epoch": 8.142104368519464, "grad_norm": 11.430124282836914, "learning_rate": 1.9763159385800897e-05, "loss": 1.6612, "step": 47900 }, { "epoch": 8.14380418154003, "grad_norm": 16.33230209350586, "learning_rate": 1.976032636409995e-05, "loss": 1.4635, "step": 47910 }, { "epoch": 8.145503994560599, "grad_norm": 12.043024063110352, "learning_rate": 1.9757493342399e-05, "loss": 1.2847, "step": 47920 }, { "epoch": 8.147203807581166, "grad_norm": 17.047697067260742, "learning_rate": 1.9754660320698058e-05, "loss": 1.8385, "step": 47930 }, { "epoch": 8.148903620601734, "grad_norm": 13.057690620422363, "learning_rate": 1.975182729899711e-05, "loss": 1.4057, "step": 47940 }, { "epoch": 8.150603433622301, "grad_norm": 11.406107902526855, "learning_rate": 1.9748994277296165e-05, "loss": 1.4223, "step": 47950 }, { "epoch": 8.15230324664287, "grad_norm": 14.291711807250977, "learning_rate": 1.9746161255595218e-05, "loss": 1.3186, "step": 47960 }, { "epoch": 8.154003059663436, "grad_norm": 13.959522247314453, "learning_rate": 1.9743328233894272e-05, "loss": 1.3914, "step": 47970 }, { "epoch": 8.155702872684005, "grad_norm": 15.601202011108398, "learning_rate": 1.9740495212193325e-05, "loss": 1.4537, "step": 47980 }, { "epoch": 8.157402685704572, "grad_norm": 17.27667999267578, "learning_rate": 1.973766219049238e-05, "loss": 1.5727, "step": 47990 }, { "epoch": 8.15910249872514, "grad_norm": 21.81532096862793, "learning_rate": 1.9734829168791432e-05, "loss": 1.3436, "step": 48000 }, { "epoch": 8.160802311745709, "grad_norm": 16.643041610717773, "learning_rate": 1.9731996147090486e-05, "loss": 1.3445, "step": 48010 }, { "epoch": 8.162502124766275, "grad_norm": 16.26200294494629, "learning_rate": 1.9729163125389543e-05, "loss": 1.5988, "step": 48020 }, { "epoch": 8.164201937786844, "grad_norm": 12.867048263549805, "learning_rate": 1.9726330103688593e-05, "loss": 1.2929, "step": 48030 }, { "epoch": 8.16590175080741, "grad_norm": 15.221369743347168, "learning_rate": 1.972349708198765e-05, "loss": 1.3916, "step": 48040 }, { "epoch": 8.16760156382798, "grad_norm": 14.656600952148438, "learning_rate": 1.9720664060286703e-05, "loss": 1.5887, "step": 48050 }, { "epoch": 8.169301376848546, "grad_norm": 18.61640739440918, "learning_rate": 1.9717831038585757e-05, "loss": 1.5254, "step": 48060 }, { "epoch": 8.171001189869115, "grad_norm": 12.87083625793457, "learning_rate": 1.971499801688481e-05, "loss": 1.2734, "step": 48070 }, { "epoch": 8.172701002889681, "grad_norm": 14.460030555725098, "learning_rate": 1.9712164995183864e-05, "loss": 1.4713, "step": 48080 }, { "epoch": 8.17440081591025, "grad_norm": 15.70753288269043, "learning_rate": 1.9709331973482917e-05, "loss": 1.4459, "step": 48090 }, { "epoch": 8.176100628930818, "grad_norm": 14.980108261108398, "learning_rate": 1.9706498951781974e-05, "loss": 1.2356, "step": 48100 }, { "epoch": 8.177800441951385, "grad_norm": 13.795439720153809, "learning_rate": 1.9703665930081024e-05, "loss": 1.5237, "step": 48110 }, { "epoch": 8.179500254971954, "grad_norm": 16.860322952270508, "learning_rate": 1.9700832908380078e-05, "loss": 1.6053, "step": 48120 }, { "epoch": 8.18120006799252, "grad_norm": 17.66278839111328, "learning_rate": 1.9697999886679135e-05, "loss": 1.2388, "step": 48130 }, { "epoch": 8.182899881013089, "grad_norm": 14.13290023803711, "learning_rate": 1.9695166864978185e-05, "loss": 1.4253, "step": 48140 }, { "epoch": 8.184599694033656, "grad_norm": 11.66247844696045, "learning_rate": 1.969233384327724e-05, "loss": 1.5787, "step": 48150 }, { "epoch": 8.186299507054224, "grad_norm": 13.125110626220703, "learning_rate": 1.9689500821576295e-05, "loss": 1.4058, "step": 48160 }, { "epoch": 8.187999320074791, "grad_norm": 20.73091697692871, "learning_rate": 1.968666779987535e-05, "loss": 1.5441, "step": 48170 }, { "epoch": 8.18969913309536, "grad_norm": 16.25905418395996, "learning_rate": 1.96838347781744e-05, "loss": 1.555, "step": 48180 }, { "epoch": 8.191398946115926, "grad_norm": 38.6923942565918, "learning_rate": 1.9681001756473456e-05, "loss": 1.3843, "step": 48190 }, { "epoch": 8.193098759136495, "grad_norm": 14.852294921875, "learning_rate": 1.967816873477251e-05, "loss": 1.4771, "step": 48200 }, { "epoch": 8.194798572157064, "grad_norm": 17.0783748626709, "learning_rate": 1.9675335713071563e-05, "loss": 1.3601, "step": 48210 }, { "epoch": 8.19649838517763, "grad_norm": 18.04972267150879, "learning_rate": 1.9672502691370617e-05, "loss": 1.2962, "step": 48220 }, { "epoch": 8.198198198198199, "grad_norm": 17.019899368286133, "learning_rate": 1.966966966966967e-05, "loss": 1.6175, "step": 48230 }, { "epoch": 8.199898011218766, "grad_norm": 11.247371673583984, "learning_rate": 1.9666836647968724e-05, "loss": 1.4278, "step": 48240 }, { "epoch": 8.201597824239334, "grad_norm": 16.60127067565918, "learning_rate": 1.966400362626778e-05, "loss": 1.6261, "step": 48250 }, { "epoch": 8.203297637259901, "grad_norm": 15.143275260925293, "learning_rate": 1.966117060456683e-05, "loss": 1.5709, "step": 48260 }, { "epoch": 8.20499745028047, "grad_norm": 14.784162521362305, "learning_rate": 1.9658337582865884e-05, "loss": 1.6319, "step": 48270 }, { "epoch": 8.206697263301036, "grad_norm": 17.689695358276367, "learning_rate": 1.965550456116494e-05, "loss": 1.4317, "step": 48280 }, { "epoch": 8.208397076321605, "grad_norm": 14.22397232055664, "learning_rate": 1.9652671539463995e-05, "loss": 1.5593, "step": 48290 }, { "epoch": 8.210096889342172, "grad_norm": 13.886396408081055, "learning_rate": 1.9649838517763045e-05, "loss": 1.3972, "step": 48300 }, { "epoch": 8.21179670236274, "grad_norm": 22.53569984436035, "learning_rate": 1.96470054960621e-05, "loss": 1.546, "step": 48310 }, { "epoch": 8.213496515383309, "grad_norm": 17.604938507080078, "learning_rate": 1.9644172474361155e-05, "loss": 1.5181, "step": 48320 }, { "epoch": 8.215196328403875, "grad_norm": 21.714662551879883, "learning_rate": 1.9641339452660205e-05, "loss": 1.3741, "step": 48330 }, { "epoch": 8.216896141424444, "grad_norm": 12.570064544677734, "learning_rate": 1.9638506430959262e-05, "loss": 1.7821, "step": 48340 }, { "epoch": 8.21859595444501, "grad_norm": 10.31430721282959, "learning_rate": 1.9635673409258316e-05, "loss": 1.5401, "step": 48350 }, { "epoch": 8.22029576746558, "grad_norm": 17.489608764648438, "learning_rate": 1.963284038755737e-05, "loss": 1.3251, "step": 48360 }, { "epoch": 8.221995580486146, "grad_norm": 14.841327667236328, "learning_rate": 1.9630007365856423e-05, "loss": 1.9447, "step": 48370 }, { "epoch": 8.223695393506715, "grad_norm": 18.984556198120117, "learning_rate": 1.9627174344155476e-05, "loss": 1.5611, "step": 48380 }, { "epoch": 8.225395206527281, "grad_norm": 16.205738067626953, "learning_rate": 1.962434132245453e-05, "loss": 1.5832, "step": 48390 }, { "epoch": 8.22709501954785, "grad_norm": 17.52104949951172, "learning_rate": 1.9621508300753587e-05, "loss": 1.4606, "step": 48400 }, { "epoch": 8.228794832568418, "grad_norm": 13.698378562927246, "learning_rate": 1.9618675279052637e-05, "loss": 1.3557, "step": 48410 }, { "epoch": 8.230494645588985, "grad_norm": 11.448545455932617, "learning_rate": 1.961584225735169e-05, "loss": 1.5007, "step": 48420 }, { "epoch": 8.232194458609554, "grad_norm": 14.111413955688477, "learning_rate": 1.9613009235650747e-05, "loss": 1.5607, "step": 48430 }, { "epoch": 8.23389427163012, "grad_norm": 16.93416404724121, "learning_rate": 1.96101762139498e-05, "loss": 1.4946, "step": 48440 }, { "epoch": 8.235594084650689, "grad_norm": 13.400810241699219, "learning_rate": 1.960734319224885e-05, "loss": 1.5334, "step": 48450 }, { "epoch": 8.237293897671256, "grad_norm": 19.754859924316406, "learning_rate": 1.9604510170547908e-05, "loss": 1.3582, "step": 48460 }, { "epoch": 8.238993710691824, "grad_norm": 11.708166122436523, "learning_rate": 1.960167714884696e-05, "loss": 1.3267, "step": 48470 }, { "epoch": 8.240693523712391, "grad_norm": 16.83407974243164, "learning_rate": 1.959884412714601e-05, "loss": 1.3187, "step": 48480 }, { "epoch": 8.24239333673296, "grad_norm": 22.139528274536133, "learning_rate": 1.959601110544507e-05, "loss": 1.3852, "step": 48490 }, { "epoch": 8.244093149753526, "grad_norm": 11.59757137298584, "learning_rate": 1.9593178083744122e-05, "loss": 1.5077, "step": 48500 }, { "epoch": 8.245792962774095, "grad_norm": 15.624076843261719, "learning_rate": 1.9590345062043175e-05, "loss": 1.2415, "step": 48510 }, { "epoch": 8.247492775794663, "grad_norm": 17.34515380859375, "learning_rate": 1.958751204034223e-05, "loss": 1.592, "step": 48520 }, { "epoch": 8.24919258881523, "grad_norm": 23.18039321899414, "learning_rate": 1.9584679018641282e-05, "loss": 1.4002, "step": 48530 }, { "epoch": 8.250892401835799, "grad_norm": 17.05654525756836, "learning_rate": 1.9581845996940336e-05, "loss": 1.466, "step": 48540 }, { "epoch": 8.252592214856366, "grad_norm": 18.12542152404785, "learning_rate": 1.9579012975239393e-05, "loss": 1.5608, "step": 48550 }, { "epoch": 8.254292027876934, "grad_norm": 21.103609085083008, "learning_rate": 1.9576179953538443e-05, "loss": 1.3569, "step": 48560 }, { "epoch": 8.2559918408975, "grad_norm": 15.699748992919922, "learning_rate": 1.9573346931837497e-05, "loss": 1.3537, "step": 48570 }, { "epoch": 8.25769165391807, "grad_norm": 16.542814254760742, "learning_rate": 1.9570513910136553e-05, "loss": 1.5767, "step": 48580 }, { "epoch": 8.259391466938636, "grad_norm": 27.174943923950195, "learning_rate": 1.9567680888435607e-05, "loss": 1.3019, "step": 48590 }, { "epoch": 8.261091279959205, "grad_norm": 11.411260604858398, "learning_rate": 1.9564847866734657e-05, "loss": 1.281, "step": 48600 }, { "epoch": 8.262791092979771, "grad_norm": 12.416095733642578, "learning_rate": 1.9562014845033714e-05, "loss": 1.3102, "step": 48610 }, { "epoch": 8.26449090600034, "grad_norm": 14.154905319213867, "learning_rate": 1.9559181823332768e-05, "loss": 1.3739, "step": 48620 }, { "epoch": 8.266190719020909, "grad_norm": 15.212918281555176, "learning_rate": 1.955634880163182e-05, "loss": 1.6203, "step": 48630 }, { "epoch": 8.267890532041475, "grad_norm": 20.17672348022461, "learning_rate": 1.9553515779930875e-05, "loss": 1.5642, "step": 48640 }, { "epoch": 8.269590345062044, "grad_norm": 13.134618759155273, "learning_rate": 1.9550682758229928e-05, "loss": 1.554, "step": 48650 }, { "epoch": 8.27129015808261, "grad_norm": 15.871551513671875, "learning_rate": 1.954784973652898e-05, "loss": 1.4629, "step": 48660 }, { "epoch": 8.27298997110318, "grad_norm": 22.204364776611328, "learning_rate": 1.9545016714828035e-05, "loss": 1.4713, "step": 48670 }, { "epoch": 8.274689784123746, "grad_norm": 35.90005874633789, "learning_rate": 1.954218369312709e-05, "loss": 1.3236, "step": 48680 }, { "epoch": 8.276389597144314, "grad_norm": 13.517556190490723, "learning_rate": 1.9539350671426142e-05, "loss": 1.479, "step": 48690 }, { "epoch": 8.278089410164881, "grad_norm": 12.504782676696777, "learning_rate": 1.95365176497252e-05, "loss": 1.7486, "step": 48700 }, { "epoch": 8.27978922318545, "grad_norm": 14.998722076416016, "learning_rate": 1.953368462802425e-05, "loss": 1.548, "step": 48710 }, { "epoch": 8.281489036206017, "grad_norm": 22.327411651611328, "learning_rate": 1.9530851606323303e-05, "loss": 1.398, "step": 48720 }, { "epoch": 8.283188849226585, "grad_norm": 15.302849769592285, "learning_rate": 1.952801858462236e-05, "loss": 1.454, "step": 48730 }, { "epoch": 8.284888662247154, "grad_norm": 15.307734489440918, "learning_rate": 1.9525185562921413e-05, "loss": 1.6533, "step": 48740 }, { "epoch": 8.28658847526772, "grad_norm": 18.941699981689453, "learning_rate": 1.9522352541220467e-05, "loss": 1.4704, "step": 48750 }, { "epoch": 8.288288288288289, "grad_norm": 21.17153549194336, "learning_rate": 1.951951951951952e-05, "loss": 1.3793, "step": 48760 }, { "epoch": 8.289988101308856, "grad_norm": 13.333123207092285, "learning_rate": 1.9516686497818574e-05, "loss": 1.3668, "step": 48770 }, { "epoch": 8.291687914329424, "grad_norm": 19.110363006591797, "learning_rate": 1.951385347611763e-05, "loss": 1.5356, "step": 48780 }, { "epoch": 8.293387727349991, "grad_norm": 16.943885803222656, "learning_rate": 1.951102045441668e-05, "loss": 1.459, "step": 48790 }, { "epoch": 8.29508754037056, "grad_norm": 18.578203201293945, "learning_rate": 1.9508187432715734e-05, "loss": 1.3533, "step": 48800 }, { "epoch": 8.296787353391126, "grad_norm": 14.824593544006348, "learning_rate": 1.950535441101479e-05, "loss": 1.5981, "step": 48810 }, { "epoch": 8.298487166411695, "grad_norm": 10.818184852600098, "learning_rate": 1.950252138931384e-05, "loss": 1.5511, "step": 48820 }, { "epoch": 8.300186979432262, "grad_norm": 12.801305770874023, "learning_rate": 1.9499688367612895e-05, "loss": 1.443, "step": 48830 }, { "epoch": 8.30188679245283, "grad_norm": 14.04128360748291, "learning_rate": 1.9496855345911952e-05, "loss": 1.5572, "step": 48840 }, { "epoch": 8.303586605473399, "grad_norm": 14.106844902038574, "learning_rate": 1.9494022324211005e-05, "loss": 1.3431, "step": 48850 }, { "epoch": 8.305286418493965, "grad_norm": 12.838894844055176, "learning_rate": 1.9491189302510055e-05, "loss": 1.526, "step": 48860 }, { "epoch": 8.306986231514534, "grad_norm": 12.468385696411133, "learning_rate": 1.9488356280809112e-05, "loss": 1.4968, "step": 48870 }, { "epoch": 8.3086860445351, "grad_norm": 9.31106948852539, "learning_rate": 1.9485523259108166e-05, "loss": 1.6596, "step": 48880 }, { "epoch": 8.31038585755567, "grad_norm": 20.411386489868164, "learning_rate": 1.948269023740722e-05, "loss": 1.2817, "step": 48890 }, { "epoch": 8.312085670576236, "grad_norm": 18.877599716186523, "learning_rate": 1.9479857215706273e-05, "loss": 1.5572, "step": 48900 }, { "epoch": 8.313785483596805, "grad_norm": 19.115385055541992, "learning_rate": 1.9477024194005326e-05, "loss": 1.3684, "step": 48910 }, { "epoch": 8.315485296617371, "grad_norm": 17.155197143554688, "learning_rate": 1.947419117230438e-05, "loss": 1.4632, "step": 48920 }, { "epoch": 8.31718510963794, "grad_norm": 21.180673599243164, "learning_rate": 1.9471358150603437e-05, "loss": 1.5175, "step": 48930 }, { "epoch": 8.318884922658508, "grad_norm": 11.254162788391113, "learning_rate": 1.9468525128902487e-05, "loss": 1.2141, "step": 48940 }, { "epoch": 8.320584735679075, "grad_norm": 12.626816749572754, "learning_rate": 1.946569210720154e-05, "loss": 1.4571, "step": 48950 }, { "epoch": 8.322284548699644, "grad_norm": 16.10625648498535, "learning_rate": 1.9462859085500597e-05, "loss": 1.6362, "step": 48960 }, { "epoch": 8.32398436172021, "grad_norm": 22.01361846923828, "learning_rate": 1.946002606379965e-05, "loss": 1.4028, "step": 48970 }, { "epoch": 8.325684174740779, "grad_norm": 15.261032104492188, "learning_rate": 1.94571930420987e-05, "loss": 1.4648, "step": 48980 }, { "epoch": 8.327383987761346, "grad_norm": 16.100547790527344, "learning_rate": 1.9454360020397758e-05, "loss": 1.5021, "step": 48990 }, { "epoch": 8.329083800781914, "grad_norm": 15.559648513793945, "learning_rate": 1.945152699869681e-05, "loss": 1.4226, "step": 49000 }, { "epoch": 8.330783613802481, "grad_norm": 13.149099349975586, "learning_rate": 1.9448693976995862e-05, "loss": 1.4099, "step": 49010 }, { "epoch": 8.33248342682305, "grad_norm": 14.243228912353516, "learning_rate": 1.944586095529492e-05, "loss": 1.5251, "step": 49020 }, { "epoch": 8.334183239843616, "grad_norm": 11.988452911376953, "learning_rate": 1.9443027933593972e-05, "loss": 1.4685, "step": 49030 }, { "epoch": 8.335883052864185, "grad_norm": 14.393546104431152, "learning_rate": 1.9440194911893026e-05, "loss": 1.1797, "step": 49040 }, { "epoch": 8.337582865884754, "grad_norm": 13.054429054260254, "learning_rate": 1.943736189019208e-05, "loss": 1.4317, "step": 49050 }, { "epoch": 8.33928267890532, "grad_norm": 14.352288246154785, "learning_rate": 1.9434528868491133e-05, "loss": 1.5582, "step": 49060 }, { "epoch": 8.340982491925889, "grad_norm": 17.80474281311035, "learning_rate": 1.9431695846790186e-05, "loss": 1.3373, "step": 49070 }, { "epoch": 8.342682304946456, "grad_norm": 17.001157760620117, "learning_rate": 1.9428862825089243e-05, "loss": 1.4856, "step": 49080 }, { "epoch": 8.344382117967024, "grad_norm": 14.389717102050781, "learning_rate": 1.9426029803388293e-05, "loss": 1.5689, "step": 49090 }, { "epoch": 8.34608193098759, "grad_norm": 20.895143508911133, "learning_rate": 1.9423196781687347e-05, "loss": 1.6231, "step": 49100 }, { "epoch": 8.34778174400816, "grad_norm": 22.014175415039062, "learning_rate": 1.9420363759986404e-05, "loss": 1.4335, "step": 49110 }, { "epoch": 8.349481557028726, "grad_norm": 17.072986602783203, "learning_rate": 1.9417530738285457e-05, "loss": 1.5566, "step": 49120 }, { "epoch": 8.351181370049295, "grad_norm": 15.15111255645752, "learning_rate": 1.9414697716584507e-05, "loss": 1.5985, "step": 49130 }, { "epoch": 8.352881183069861, "grad_norm": 21.57499122619629, "learning_rate": 1.9411864694883564e-05, "loss": 1.2447, "step": 49140 }, { "epoch": 8.35458099609043, "grad_norm": 11.597533226013184, "learning_rate": 1.9409031673182618e-05, "loss": 1.5328, "step": 49150 }, { "epoch": 8.356280809110999, "grad_norm": 21.729162216186523, "learning_rate": 1.940619865148167e-05, "loss": 1.3888, "step": 49160 }, { "epoch": 8.357980622131565, "grad_norm": 12.332221031188965, "learning_rate": 1.9403365629780725e-05, "loss": 1.3512, "step": 49170 }, { "epoch": 8.359680435152134, "grad_norm": 13.586244583129883, "learning_rate": 1.940053260807978e-05, "loss": 1.5119, "step": 49180 }, { "epoch": 8.3613802481727, "grad_norm": 16.212507247924805, "learning_rate": 1.9397699586378832e-05, "loss": 1.3786, "step": 49190 }, { "epoch": 8.36308006119327, "grad_norm": 11.387866020202637, "learning_rate": 1.9394866564677885e-05, "loss": 1.4387, "step": 49200 }, { "epoch": 8.364779874213836, "grad_norm": 12.349690437316895, "learning_rate": 1.939203354297694e-05, "loss": 1.3888, "step": 49210 }, { "epoch": 8.366479687234404, "grad_norm": 11.99411392211914, "learning_rate": 1.9389200521275992e-05, "loss": 1.6463, "step": 49220 }, { "epoch": 8.368179500254971, "grad_norm": 12.274632453918457, "learning_rate": 1.938636749957505e-05, "loss": 1.5743, "step": 49230 }, { "epoch": 8.36987931327554, "grad_norm": 11.553925514221191, "learning_rate": 1.93835344778741e-05, "loss": 1.5048, "step": 49240 }, { "epoch": 8.371579126296108, "grad_norm": 16.55424690246582, "learning_rate": 1.9380701456173153e-05, "loss": 1.5279, "step": 49250 }, { "epoch": 8.373278939316675, "grad_norm": 16.104738235473633, "learning_rate": 1.937786843447221e-05, "loss": 1.3919, "step": 49260 }, { "epoch": 8.374978752337244, "grad_norm": 14.890530586242676, "learning_rate": 1.9375035412771263e-05, "loss": 1.3846, "step": 49270 }, { "epoch": 8.37667856535781, "grad_norm": 13.26028823852539, "learning_rate": 1.9372202391070314e-05, "loss": 1.5443, "step": 49280 }, { "epoch": 8.378378378378379, "grad_norm": 14.569647789001465, "learning_rate": 1.936936936936937e-05, "loss": 1.4836, "step": 49290 }, { "epoch": 8.380078191398946, "grad_norm": 16.909530639648438, "learning_rate": 1.9366536347668424e-05, "loss": 1.6586, "step": 49300 }, { "epoch": 8.381778004419514, "grad_norm": 15.997077941894531, "learning_rate": 1.9363703325967478e-05, "loss": 1.6138, "step": 49310 }, { "epoch": 8.383477817440081, "grad_norm": 18.096786499023438, "learning_rate": 1.936087030426653e-05, "loss": 1.073, "step": 49320 }, { "epoch": 8.38517763046065, "grad_norm": 14.808124542236328, "learning_rate": 1.9358037282565585e-05, "loss": 1.5395, "step": 49330 }, { "epoch": 8.386877443481216, "grad_norm": 15.968195915222168, "learning_rate": 1.9355204260864638e-05, "loss": 1.409, "step": 49340 }, { "epoch": 8.388577256501785, "grad_norm": 15.759886741638184, "learning_rate": 1.935237123916369e-05, "loss": 1.5149, "step": 49350 }, { "epoch": 8.390277069522352, "grad_norm": 13.809943199157715, "learning_rate": 1.9349538217462745e-05, "loss": 1.4752, "step": 49360 }, { "epoch": 8.39197688254292, "grad_norm": 11.87376594543457, "learning_rate": 1.93467051957618e-05, "loss": 1.4446, "step": 49370 }, { "epoch": 8.393676695563489, "grad_norm": 20.121278762817383, "learning_rate": 1.9343872174060856e-05, "loss": 1.5552, "step": 49380 }, { "epoch": 8.395376508584055, "grad_norm": 31.714262008666992, "learning_rate": 1.9341039152359906e-05, "loss": 1.5483, "step": 49390 }, { "epoch": 8.397076321604624, "grad_norm": 37.05161666870117, "learning_rate": 1.933820613065896e-05, "loss": 1.3769, "step": 49400 }, { "epoch": 8.39877613462519, "grad_norm": 20.294221878051758, "learning_rate": 1.9335373108958016e-05, "loss": 1.3519, "step": 49410 }, { "epoch": 8.40047594764576, "grad_norm": 14.105467796325684, "learning_rate": 1.933254008725707e-05, "loss": 1.569, "step": 49420 }, { "epoch": 8.402175760666326, "grad_norm": 13.915611267089844, "learning_rate": 1.932970706555612e-05, "loss": 1.7375, "step": 49430 }, { "epoch": 8.403875573686895, "grad_norm": 10.551023483276367, "learning_rate": 1.9326874043855177e-05, "loss": 1.4327, "step": 49440 }, { "epoch": 8.405575386707461, "grad_norm": 13.88833236694336, "learning_rate": 1.932404102215423e-05, "loss": 1.3675, "step": 49450 }, { "epoch": 8.40727519972803, "grad_norm": 15.970592498779297, "learning_rate": 1.9321208000453284e-05, "loss": 1.6029, "step": 49460 }, { "epoch": 8.408975012748598, "grad_norm": 15.882207870483398, "learning_rate": 1.9318374978752337e-05, "loss": 1.3819, "step": 49470 }, { "epoch": 8.410674825769165, "grad_norm": 14.957392692565918, "learning_rate": 1.931554195705139e-05, "loss": 1.514, "step": 49480 }, { "epoch": 8.412374638789734, "grad_norm": 11.094042778015137, "learning_rate": 1.9312708935350448e-05, "loss": 1.3768, "step": 49490 }, { "epoch": 8.4140744518103, "grad_norm": 14.870136260986328, "learning_rate": 1.93098759136495e-05, "loss": 1.2486, "step": 49500 }, { "epoch": 8.415774264830869, "grad_norm": 13.99505615234375, "learning_rate": 1.930704289194855e-05, "loss": 1.4416, "step": 49510 }, { "epoch": 8.417474077851436, "grad_norm": 12.974578857421875, "learning_rate": 1.9304209870247608e-05, "loss": 1.5803, "step": 49520 }, { "epoch": 8.419173890872004, "grad_norm": 14.944841384887695, "learning_rate": 1.9301376848546662e-05, "loss": 1.2319, "step": 49530 }, { "epoch": 8.420873703892571, "grad_norm": 18.269729614257812, "learning_rate": 1.9298543826845712e-05, "loss": 1.3789, "step": 49540 }, { "epoch": 8.42257351691314, "grad_norm": 17.704498291015625, "learning_rate": 1.929571080514477e-05, "loss": 1.8746, "step": 49550 }, { "epoch": 8.424273329933706, "grad_norm": 17.40545654296875, "learning_rate": 1.9292877783443822e-05, "loss": 1.6115, "step": 49560 }, { "epoch": 8.425973142954275, "grad_norm": 13.31190299987793, "learning_rate": 1.9290044761742876e-05, "loss": 1.6341, "step": 49570 }, { "epoch": 8.427672955974844, "grad_norm": 17.430383682250977, "learning_rate": 1.928721174004193e-05, "loss": 1.3546, "step": 49580 }, { "epoch": 8.42937276899541, "grad_norm": 16.59412384033203, "learning_rate": 1.9284378718340983e-05, "loss": 1.4336, "step": 49590 }, { "epoch": 8.431072582015979, "grad_norm": 12.31139087677002, "learning_rate": 1.9281545696640036e-05, "loss": 1.4329, "step": 49600 }, { "epoch": 8.432772395036546, "grad_norm": 15.956844329833984, "learning_rate": 1.9278712674939093e-05, "loss": 1.3578, "step": 49610 }, { "epoch": 8.434472208057114, "grad_norm": 13.332984924316406, "learning_rate": 1.9275879653238143e-05, "loss": 1.7931, "step": 49620 }, { "epoch": 8.436172021077681, "grad_norm": 13.3731689453125, "learning_rate": 1.9273046631537197e-05, "loss": 1.4718, "step": 49630 }, { "epoch": 8.43787183409825, "grad_norm": 16.93317985534668, "learning_rate": 1.9270213609836254e-05, "loss": 1.3792, "step": 49640 }, { "epoch": 8.439571647118816, "grad_norm": 12.827962875366211, "learning_rate": 1.9267380588135307e-05, "loss": 1.4278, "step": 49650 }, { "epoch": 8.441271460139385, "grad_norm": 13.894896507263184, "learning_rate": 1.9264547566434358e-05, "loss": 1.5069, "step": 49660 }, { "epoch": 8.442971273159952, "grad_norm": 15.625662803649902, "learning_rate": 1.9261714544733414e-05, "loss": 1.7235, "step": 49670 }, { "epoch": 8.44467108618052, "grad_norm": 13.279179573059082, "learning_rate": 1.9258881523032468e-05, "loss": 1.393, "step": 49680 }, { "epoch": 8.446370899201089, "grad_norm": 13.796674728393555, "learning_rate": 1.9256048501331518e-05, "loss": 1.553, "step": 49690 }, { "epoch": 8.448070712221655, "grad_norm": 12.960586547851562, "learning_rate": 1.9253215479630575e-05, "loss": 1.3734, "step": 49700 }, { "epoch": 8.449770525242224, "grad_norm": 16.90865135192871, "learning_rate": 1.925038245792963e-05, "loss": 1.544, "step": 49710 }, { "epoch": 8.45147033826279, "grad_norm": 11.739420890808105, "learning_rate": 1.9247549436228682e-05, "loss": 1.4673, "step": 49720 }, { "epoch": 8.45317015128336, "grad_norm": 11.878060340881348, "learning_rate": 1.9244716414527736e-05, "loss": 1.3712, "step": 49730 }, { "epoch": 8.454869964303926, "grad_norm": 20.498659133911133, "learning_rate": 1.924188339282679e-05, "loss": 1.5708, "step": 49740 }, { "epoch": 8.456569777324495, "grad_norm": 13.386493682861328, "learning_rate": 1.9239050371125843e-05, "loss": 1.6105, "step": 49750 }, { "epoch": 8.458269590345061, "grad_norm": 11.858169555664062, "learning_rate": 1.92362173494249e-05, "loss": 1.3189, "step": 49760 }, { "epoch": 8.45996940336563, "grad_norm": 13.670690536499023, "learning_rate": 1.923338432772395e-05, "loss": 1.4519, "step": 49770 }, { "epoch": 8.461669216386198, "grad_norm": 19.559419631958008, "learning_rate": 1.9230551306023003e-05, "loss": 1.4625, "step": 49780 }, { "epoch": 8.463369029406765, "grad_norm": 15.507299423217773, "learning_rate": 1.922771828432206e-05, "loss": 1.3678, "step": 49790 }, { "epoch": 8.465068842427334, "grad_norm": 12.309423446655273, "learning_rate": 1.9224885262621114e-05, "loss": 1.5117, "step": 49800 }, { "epoch": 8.4667686554479, "grad_norm": 18.94719123840332, "learning_rate": 1.9222052240920164e-05, "loss": 1.5527, "step": 49810 }, { "epoch": 8.468468468468469, "grad_norm": 16.41902732849121, "learning_rate": 1.921921921921922e-05, "loss": 1.448, "step": 49820 }, { "epoch": 8.470168281489036, "grad_norm": 17.599170684814453, "learning_rate": 1.9216386197518274e-05, "loss": 1.5853, "step": 49830 }, { "epoch": 8.471868094509604, "grad_norm": 17.61941909790039, "learning_rate": 1.9213553175817328e-05, "loss": 1.4542, "step": 49840 }, { "epoch": 8.473567907530171, "grad_norm": 13.96219253540039, "learning_rate": 1.921072015411638e-05, "loss": 1.542, "step": 49850 }, { "epoch": 8.47526772055074, "grad_norm": 16.30186653137207, "learning_rate": 1.9207887132415435e-05, "loss": 1.6353, "step": 49860 }, { "epoch": 8.476967533571306, "grad_norm": 31.6414794921875, "learning_rate": 1.9205054110714488e-05, "loss": 1.4363, "step": 49870 }, { "epoch": 8.478667346591875, "grad_norm": 16.87853240966797, "learning_rate": 1.9202221089013542e-05, "loss": 1.3542, "step": 49880 }, { "epoch": 8.480367159612443, "grad_norm": 12.341633796691895, "learning_rate": 1.9199388067312595e-05, "loss": 1.5973, "step": 49890 }, { "epoch": 8.48206697263301, "grad_norm": 16.200485229492188, "learning_rate": 1.919655504561165e-05, "loss": 1.3802, "step": 49900 }, { "epoch": 8.483766785653579, "grad_norm": 16.734193801879883, "learning_rate": 1.9193722023910706e-05, "loss": 1.5707, "step": 49910 }, { "epoch": 8.485466598674146, "grad_norm": 20.897289276123047, "learning_rate": 1.9190889002209756e-05, "loss": 1.3992, "step": 49920 }, { "epoch": 8.487166411694714, "grad_norm": 16.860031127929688, "learning_rate": 1.918805598050881e-05, "loss": 1.5459, "step": 49930 }, { "epoch": 8.48886622471528, "grad_norm": 15.919288635253906, "learning_rate": 1.9185222958807866e-05, "loss": 1.3432, "step": 49940 }, { "epoch": 8.49056603773585, "grad_norm": 17.78481101989746, "learning_rate": 1.918238993710692e-05, "loss": 1.4246, "step": 49950 }, { "epoch": 8.492265850756416, "grad_norm": 14.3230562210083, "learning_rate": 1.917955691540597e-05, "loss": 1.4305, "step": 49960 }, { "epoch": 8.493965663776985, "grad_norm": 21.11610984802246, "learning_rate": 1.9176723893705027e-05, "loss": 1.4193, "step": 49970 }, { "epoch": 8.495665476797551, "grad_norm": 13.323955535888672, "learning_rate": 1.917389087200408e-05, "loss": 1.5098, "step": 49980 }, { "epoch": 8.49736528981812, "grad_norm": 11.431485176086426, "learning_rate": 1.9171057850303134e-05, "loss": 1.4076, "step": 49990 }, { "epoch": 8.499065102838689, "grad_norm": 15.05572509765625, "learning_rate": 1.9168224828602187e-05, "loss": 1.276, "step": 50000 }, { "epoch": 8.500764915859255, "grad_norm": 21.780839920043945, "learning_rate": 1.916539180690124e-05, "loss": 1.6514, "step": 50010 }, { "epoch": 8.502464728879824, "grad_norm": 20.85468864440918, "learning_rate": 1.9162558785200295e-05, "loss": 1.6193, "step": 50020 }, { "epoch": 8.50416454190039, "grad_norm": 14.488081932067871, "learning_rate": 1.9159725763499348e-05, "loss": 1.319, "step": 50030 }, { "epoch": 8.50586435492096, "grad_norm": 12.169366836547852, "learning_rate": 1.91568927417984e-05, "loss": 1.4931, "step": 50040 }, { "epoch": 8.507564167941526, "grad_norm": 14.384800910949707, "learning_rate": 1.9154059720097455e-05, "loss": 1.5568, "step": 50050 }, { "epoch": 8.509263980962094, "grad_norm": 15.18106746673584, "learning_rate": 1.9151226698396512e-05, "loss": 1.7803, "step": 50060 }, { "epoch": 8.510963793982661, "grad_norm": 12.433897018432617, "learning_rate": 1.9148393676695562e-05, "loss": 1.5271, "step": 50070 }, { "epoch": 8.51266360700323, "grad_norm": 14.709894180297852, "learning_rate": 1.9145560654994616e-05, "loss": 1.747, "step": 50080 }, { "epoch": 8.514363420023798, "grad_norm": 17.131135940551758, "learning_rate": 1.9142727633293673e-05, "loss": 1.2982, "step": 50090 }, { "epoch": 8.516063233044365, "grad_norm": 13.38113784790039, "learning_rate": 1.9139894611592726e-05, "loss": 1.4003, "step": 50100 }, { "epoch": 8.517763046064934, "grad_norm": 10.460448265075684, "learning_rate": 1.9137061589891776e-05, "loss": 1.5273, "step": 50110 }, { "epoch": 8.5194628590855, "grad_norm": 15.924751281738281, "learning_rate": 1.9134228568190833e-05, "loss": 1.4226, "step": 50120 }, { "epoch": 8.521162672106069, "grad_norm": 16.425504684448242, "learning_rate": 1.9131395546489887e-05, "loss": 1.5487, "step": 50130 }, { "epoch": 8.522862485126636, "grad_norm": 11.238484382629395, "learning_rate": 1.912856252478894e-05, "loss": 1.4299, "step": 50140 }, { "epoch": 8.524562298147204, "grad_norm": 13.873468399047852, "learning_rate": 1.9125729503087994e-05, "loss": 1.4376, "step": 50150 }, { "epoch": 8.526262111167771, "grad_norm": 37.252166748046875, "learning_rate": 1.9122896481387047e-05, "loss": 1.332, "step": 50160 }, { "epoch": 8.52796192418834, "grad_norm": 17.0257625579834, "learning_rate": 1.91200634596861e-05, "loss": 1.5631, "step": 50170 }, { "epoch": 8.529661737208906, "grad_norm": 19.172971725463867, "learning_rate": 1.9117230437985158e-05, "loss": 1.6069, "step": 50180 }, { "epoch": 8.531361550229475, "grad_norm": 18.96186065673828, "learning_rate": 1.9114397416284208e-05, "loss": 1.363, "step": 50190 }, { "epoch": 8.533061363250042, "grad_norm": 12.79029655456543, "learning_rate": 1.9111564394583265e-05, "loss": 1.5375, "step": 50200 }, { "epoch": 8.53476117627061, "grad_norm": 12.828084945678711, "learning_rate": 1.9108731372882318e-05, "loss": 1.5138, "step": 50210 }, { "epoch": 8.536460989291179, "grad_norm": 12.386646270751953, "learning_rate": 1.910589835118137e-05, "loss": 1.4298, "step": 50220 }, { "epoch": 8.538160802311745, "grad_norm": 22.60735321044922, "learning_rate": 1.9103065329480425e-05, "loss": 1.3494, "step": 50230 }, { "epoch": 8.539860615332314, "grad_norm": 15.37696647644043, "learning_rate": 1.910023230777948e-05, "loss": 1.7042, "step": 50240 }, { "epoch": 8.54156042835288, "grad_norm": 20.806299209594727, "learning_rate": 1.9097399286078532e-05, "loss": 1.2905, "step": 50250 }, { "epoch": 8.54326024137345, "grad_norm": 18.83717918395996, "learning_rate": 1.9094566264377586e-05, "loss": 1.432, "step": 50260 }, { "epoch": 8.544960054394016, "grad_norm": 14.084944725036621, "learning_rate": 1.909173324267664e-05, "loss": 1.4638, "step": 50270 }, { "epoch": 8.546659867414585, "grad_norm": 16.106616973876953, "learning_rate": 1.9088900220975693e-05, "loss": 1.3059, "step": 50280 }, { "epoch": 8.548359680435151, "grad_norm": 18.011568069458008, "learning_rate": 1.908606719927475e-05, "loss": 1.5639, "step": 50290 }, { "epoch": 8.55005949345572, "grad_norm": 14.715485572814941, "learning_rate": 1.90832341775738e-05, "loss": 1.4966, "step": 50300 }, { "epoch": 8.551759306476288, "grad_norm": 14.887866020202637, "learning_rate": 1.9080401155872853e-05, "loss": 1.5726, "step": 50310 }, { "epoch": 8.553459119496855, "grad_norm": 18.267732620239258, "learning_rate": 1.907756813417191e-05, "loss": 1.3097, "step": 50320 }, { "epoch": 8.555158932517424, "grad_norm": 11.080357551574707, "learning_rate": 1.9074735112470964e-05, "loss": 1.4616, "step": 50330 }, { "epoch": 8.55685874553799, "grad_norm": 14.569120407104492, "learning_rate": 1.9071902090770014e-05, "loss": 1.5533, "step": 50340 }, { "epoch": 8.558558558558559, "grad_norm": 20.796894073486328, "learning_rate": 1.906906906906907e-05, "loss": 1.3261, "step": 50350 }, { "epoch": 8.560258371579126, "grad_norm": 14.621899604797363, "learning_rate": 1.9066236047368124e-05, "loss": 1.6326, "step": 50360 }, { "epoch": 8.561958184599694, "grad_norm": 15.780755996704102, "learning_rate": 1.9063403025667178e-05, "loss": 1.4291, "step": 50370 }, { "epoch": 8.563657997620261, "grad_norm": 14.077446937561035, "learning_rate": 1.906057000396623e-05, "loss": 1.4065, "step": 50380 }, { "epoch": 8.56535781064083, "grad_norm": 14.995244979858398, "learning_rate": 1.9057736982265285e-05, "loss": 1.5147, "step": 50390 }, { "epoch": 8.567057623661396, "grad_norm": 19.274240493774414, "learning_rate": 1.905490396056434e-05, "loss": 1.7708, "step": 50400 }, { "epoch": 8.568757436681965, "grad_norm": 13.734012603759766, "learning_rate": 1.9052070938863392e-05, "loss": 1.2912, "step": 50410 }, { "epoch": 8.570457249702534, "grad_norm": 15.985599517822266, "learning_rate": 1.9049237917162446e-05, "loss": 1.5789, "step": 50420 }, { "epoch": 8.5721570627231, "grad_norm": 15.51916217803955, "learning_rate": 1.90464048954615e-05, "loss": 1.6446, "step": 50430 }, { "epoch": 8.573856875743669, "grad_norm": 20.016124725341797, "learning_rate": 1.9043571873760556e-05, "loss": 1.7982, "step": 50440 }, { "epoch": 8.575556688764236, "grad_norm": 12.044961929321289, "learning_rate": 1.9040738852059606e-05, "loss": 1.5246, "step": 50450 }, { "epoch": 8.577256501784804, "grad_norm": 12.164101600646973, "learning_rate": 1.903790583035866e-05, "loss": 1.3356, "step": 50460 }, { "epoch": 8.578956314805371, "grad_norm": 24.346952438354492, "learning_rate": 1.9035072808657717e-05, "loss": 1.279, "step": 50470 }, { "epoch": 8.58065612782594, "grad_norm": 20.60580825805664, "learning_rate": 1.903223978695677e-05, "loss": 1.4819, "step": 50480 }, { "epoch": 8.582355940846506, "grad_norm": 16.43256187438965, "learning_rate": 1.902940676525582e-05, "loss": 1.3647, "step": 50490 }, { "epoch": 8.584055753867075, "grad_norm": 16.66977882385254, "learning_rate": 1.9026573743554877e-05, "loss": 1.4572, "step": 50500 }, { "epoch": 8.585755566887642, "grad_norm": 14.436896324157715, "learning_rate": 1.902374072185393e-05, "loss": 1.5242, "step": 50510 }, { "epoch": 8.58745537990821, "grad_norm": 10.983469009399414, "learning_rate": 1.9020907700152984e-05, "loss": 1.6054, "step": 50520 }, { "epoch": 8.589155192928779, "grad_norm": 19.10079002380371, "learning_rate": 1.9018074678452038e-05, "loss": 1.2443, "step": 50530 }, { "epoch": 8.590855005949345, "grad_norm": 12.014434814453125, "learning_rate": 1.901524165675109e-05, "loss": 1.4184, "step": 50540 }, { "epoch": 8.592554818969914, "grad_norm": 16.57256507873535, "learning_rate": 1.9012408635050145e-05, "loss": 1.5458, "step": 50550 }, { "epoch": 8.59425463199048, "grad_norm": 15.764670372009277, "learning_rate": 1.9009575613349198e-05, "loss": 1.3992, "step": 50560 }, { "epoch": 8.59595444501105, "grad_norm": 16.516389846801758, "learning_rate": 1.9006742591648252e-05, "loss": 1.3609, "step": 50570 }, { "epoch": 8.597654258031616, "grad_norm": 10.36406421661377, "learning_rate": 1.9003909569947305e-05, "loss": 1.5703, "step": 50580 }, { "epoch": 8.599354071052185, "grad_norm": 14.50204849243164, "learning_rate": 1.9001076548246362e-05, "loss": 1.6884, "step": 50590 }, { "epoch": 8.601053884072751, "grad_norm": 14.444899559020996, "learning_rate": 1.8998243526545412e-05, "loss": 1.4937, "step": 50600 }, { "epoch": 8.60275369709332, "grad_norm": 18.979026794433594, "learning_rate": 1.8995410504844466e-05, "loss": 1.1496, "step": 50610 }, { "epoch": 8.604453510113888, "grad_norm": 11.543102264404297, "learning_rate": 1.8992577483143523e-05, "loss": 1.5908, "step": 50620 }, { "epoch": 8.606153323134455, "grad_norm": 18.726749420166016, "learning_rate": 1.8989744461442576e-05, "loss": 1.4409, "step": 50630 }, { "epoch": 8.607853136155024, "grad_norm": 14.69508171081543, "learning_rate": 1.8986911439741626e-05, "loss": 1.362, "step": 50640 }, { "epoch": 8.60955294917559, "grad_norm": 12.419857025146484, "learning_rate": 1.8984078418040683e-05, "loss": 1.4015, "step": 50650 }, { "epoch": 8.611252762196159, "grad_norm": 12.718960762023926, "learning_rate": 1.8981245396339737e-05, "loss": 1.4938, "step": 50660 }, { "epoch": 8.612952575216726, "grad_norm": 18.51715850830078, "learning_rate": 1.897841237463879e-05, "loss": 1.4038, "step": 50670 }, { "epoch": 8.614652388237294, "grad_norm": 14.263073921203613, "learning_rate": 1.8975579352937844e-05, "loss": 1.3033, "step": 50680 }, { "epoch": 8.616352201257861, "grad_norm": 18.184999465942383, "learning_rate": 1.8972746331236897e-05, "loss": 1.4966, "step": 50690 }, { "epoch": 8.61805201427843, "grad_norm": 13.810689926147461, "learning_rate": 1.896991330953595e-05, "loss": 1.5032, "step": 50700 }, { "epoch": 8.619751827298996, "grad_norm": 14.04091739654541, "learning_rate": 1.8967080287835008e-05, "loss": 1.3949, "step": 50710 }, { "epoch": 8.621451640319565, "grad_norm": 9.49355697631836, "learning_rate": 1.8964247266134058e-05, "loss": 1.3866, "step": 50720 }, { "epoch": 8.623151453340132, "grad_norm": 14.359103202819824, "learning_rate": 1.896141424443311e-05, "loss": 1.4409, "step": 50730 }, { "epoch": 8.6248512663607, "grad_norm": 14.215043067932129, "learning_rate": 1.895858122273217e-05, "loss": 1.825, "step": 50740 }, { "epoch": 8.626551079381269, "grad_norm": 19.365985870361328, "learning_rate": 1.895574820103122e-05, "loss": 1.5984, "step": 50750 }, { "epoch": 8.628250892401836, "grad_norm": 11.947530746459961, "learning_rate": 1.8952915179330272e-05, "loss": 1.467, "step": 50760 }, { "epoch": 8.629950705422404, "grad_norm": 15.840845108032227, "learning_rate": 1.895008215762933e-05, "loss": 1.4359, "step": 50770 }, { "epoch": 8.63165051844297, "grad_norm": 12.834233283996582, "learning_rate": 1.8947249135928383e-05, "loss": 1.5851, "step": 50780 }, { "epoch": 8.63335033146354, "grad_norm": 15.05781078338623, "learning_rate": 1.8944416114227433e-05, "loss": 1.4615, "step": 50790 }, { "epoch": 8.635050144484106, "grad_norm": 18.90422821044922, "learning_rate": 1.894158309252649e-05, "loss": 1.5906, "step": 50800 }, { "epoch": 8.636749957504675, "grad_norm": 8.185665130615234, "learning_rate": 1.8938750070825543e-05, "loss": 1.2528, "step": 50810 }, { "epoch": 8.638449770525241, "grad_norm": 15.51479721069336, "learning_rate": 1.8935917049124597e-05, "loss": 1.5665, "step": 50820 }, { "epoch": 8.64014958354581, "grad_norm": 13.347878456115723, "learning_rate": 1.893308402742365e-05, "loss": 1.5705, "step": 50830 }, { "epoch": 8.641849396566379, "grad_norm": 13.167309761047363, "learning_rate": 1.8930251005722704e-05, "loss": 1.7574, "step": 50840 }, { "epoch": 8.643549209586945, "grad_norm": 12.46658992767334, "learning_rate": 1.8927417984021757e-05, "loss": 1.5506, "step": 50850 }, { "epoch": 8.645249022607514, "grad_norm": 13.954108238220215, "learning_rate": 1.8924584962320814e-05, "loss": 1.4294, "step": 50860 }, { "epoch": 8.64694883562808, "grad_norm": 16.364360809326172, "learning_rate": 1.8921751940619864e-05, "loss": 1.4664, "step": 50870 }, { "epoch": 8.64864864864865, "grad_norm": 14.167791366577148, "learning_rate": 1.8918918918918918e-05, "loss": 1.5301, "step": 50880 }, { "epoch": 8.650348461669216, "grad_norm": 11.672017097473145, "learning_rate": 1.8916085897217975e-05, "loss": 1.5199, "step": 50890 }, { "epoch": 8.652048274689784, "grad_norm": 11.330592155456543, "learning_rate": 1.8913252875517025e-05, "loss": 1.4477, "step": 50900 }, { "epoch": 8.653748087710351, "grad_norm": 10.157001495361328, "learning_rate": 1.8910419853816078e-05, "loss": 1.3548, "step": 50910 }, { "epoch": 8.65544790073092, "grad_norm": 16.70128631591797, "learning_rate": 1.8907586832115135e-05, "loss": 1.5955, "step": 50920 }, { "epoch": 8.657147713751487, "grad_norm": 15.358477592468262, "learning_rate": 1.890475381041419e-05, "loss": 1.5713, "step": 50930 }, { "epoch": 8.658847526772055, "grad_norm": 13.978132247924805, "learning_rate": 1.8901920788713242e-05, "loss": 1.4133, "step": 50940 }, { "epoch": 8.660547339792624, "grad_norm": 20.853137969970703, "learning_rate": 1.8899087767012296e-05, "loss": 1.5895, "step": 50950 }, { "epoch": 8.66224715281319, "grad_norm": 18.25304412841797, "learning_rate": 1.889625474531135e-05, "loss": 1.6049, "step": 50960 }, { "epoch": 8.663946965833759, "grad_norm": 20.364604949951172, "learning_rate": 1.8893421723610406e-05, "loss": 1.4806, "step": 50970 }, { "epoch": 8.665646778854326, "grad_norm": 25.83648109436035, "learning_rate": 1.8890588701909456e-05, "loss": 1.4114, "step": 50980 }, { "epoch": 8.667346591874894, "grad_norm": 10.356613159179688, "learning_rate": 1.888775568020851e-05, "loss": 1.5022, "step": 50990 }, { "epoch": 8.669046404895461, "grad_norm": 20.349475860595703, "learning_rate": 1.8884922658507567e-05, "loss": 1.3212, "step": 51000 }, { "epoch": 8.67074621791603, "grad_norm": 15.920060157775879, "learning_rate": 1.888208963680662e-05, "loss": 1.4206, "step": 51010 }, { "epoch": 8.672446030936596, "grad_norm": 17.67091178894043, "learning_rate": 1.887925661510567e-05, "loss": 1.5806, "step": 51020 }, { "epoch": 8.674145843957165, "grad_norm": 20.260122299194336, "learning_rate": 1.8876423593404727e-05, "loss": 1.535, "step": 51030 }, { "epoch": 8.675845656977732, "grad_norm": 15.938594818115234, "learning_rate": 1.887359057170378e-05, "loss": 1.4661, "step": 51040 }, { "epoch": 8.6775454699983, "grad_norm": 13.702073097229004, "learning_rate": 1.8870757550002834e-05, "loss": 1.5613, "step": 51050 }, { "epoch": 8.679245283018869, "grad_norm": 25.301616668701172, "learning_rate": 1.8867924528301888e-05, "loss": 1.3405, "step": 51060 }, { "epoch": 8.680945096039435, "grad_norm": 12.386906623840332, "learning_rate": 1.886509150660094e-05, "loss": 1.5233, "step": 51070 }, { "epoch": 8.682644909060004, "grad_norm": 14.476531028747559, "learning_rate": 1.8862258484899995e-05, "loss": 1.5852, "step": 51080 }, { "epoch": 8.68434472208057, "grad_norm": 17.710933685302734, "learning_rate": 1.885942546319905e-05, "loss": 1.5602, "step": 51090 }, { "epoch": 8.68604453510114, "grad_norm": 17.05352210998535, "learning_rate": 1.8856592441498102e-05, "loss": 1.6065, "step": 51100 }, { "epoch": 8.687744348121706, "grad_norm": 15.969565391540527, "learning_rate": 1.8853759419797155e-05, "loss": 1.4373, "step": 51110 }, { "epoch": 8.689444161142275, "grad_norm": 12.6944580078125, "learning_rate": 1.8850926398096212e-05, "loss": 1.4379, "step": 51120 }, { "epoch": 8.691143974162841, "grad_norm": 17.524160385131836, "learning_rate": 1.8848093376395263e-05, "loss": 1.2411, "step": 51130 }, { "epoch": 8.69284378718341, "grad_norm": 16.229022979736328, "learning_rate": 1.8845260354694316e-05, "loss": 1.5921, "step": 51140 }, { "epoch": 8.694543600203978, "grad_norm": 13.94332504272461, "learning_rate": 1.8842427332993373e-05, "loss": 1.3571, "step": 51150 }, { "epoch": 8.696243413224545, "grad_norm": 14.496142387390137, "learning_rate": 1.8839594311292426e-05, "loss": 1.4569, "step": 51160 }, { "epoch": 8.697943226245114, "grad_norm": 12.095478057861328, "learning_rate": 1.8836761289591477e-05, "loss": 1.5243, "step": 51170 }, { "epoch": 8.69964303926568, "grad_norm": 13.008162498474121, "learning_rate": 1.8833928267890534e-05, "loss": 1.3217, "step": 51180 }, { "epoch": 8.701342852286249, "grad_norm": 15.23643970489502, "learning_rate": 1.8831095246189587e-05, "loss": 1.3896, "step": 51190 }, { "epoch": 8.703042665306816, "grad_norm": 28.219593048095703, "learning_rate": 1.882826222448864e-05, "loss": 1.578, "step": 51200 }, { "epoch": 8.704742478327384, "grad_norm": 14.855152130126953, "learning_rate": 1.8825429202787694e-05, "loss": 1.4369, "step": 51210 }, { "epoch": 8.706442291347951, "grad_norm": 13.391202926635742, "learning_rate": 1.8822596181086748e-05, "loss": 1.5748, "step": 51220 }, { "epoch": 8.70814210436852, "grad_norm": 13.580973625183105, "learning_rate": 1.88197631593858e-05, "loss": 1.3882, "step": 51230 }, { "epoch": 8.709841917389086, "grad_norm": 13.665305137634277, "learning_rate": 1.8816930137684855e-05, "loss": 1.2754, "step": 51240 }, { "epoch": 8.711541730409655, "grad_norm": 16.743412017822266, "learning_rate": 1.8814097115983908e-05, "loss": 1.4544, "step": 51250 }, { "epoch": 8.713241543430224, "grad_norm": 16.149383544921875, "learning_rate": 1.8811264094282962e-05, "loss": 1.4864, "step": 51260 }, { "epoch": 8.71494135645079, "grad_norm": 13.711511611938477, "learning_rate": 1.880843107258202e-05, "loss": 1.3284, "step": 51270 }, { "epoch": 8.716641169471359, "grad_norm": 21.934696197509766, "learning_rate": 1.880559805088107e-05, "loss": 1.5814, "step": 51280 }, { "epoch": 8.718340982491926, "grad_norm": 12.346365928649902, "learning_rate": 1.8802765029180122e-05, "loss": 1.5087, "step": 51290 }, { "epoch": 8.720040795512494, "grad_norm": 15.422245025634766, "learning_rate": 1.879993200747918e-05, "loss": 1.5594, "step": 51300 }, { "epoch": 8.72174060853306, "grad_norm": 21.304277420043945, "learning_rate": 1.8797098985778233e-05, "loss": 1.3458, "step": 51310 }, { "epoch": 8.72344042155363, "grad_norm": 49.4707145690918, "learning_rate": 1.8794265964077283e-05, "loss": 1.6078, "step": 51320 }, { "epoch": 8.725140234574196, "grad_norm": 17.456396102905273, "learning_rate": 1.879143294237634e-05, "loss": 1.5776, "step": 51330 }, { "epoch": 8.726840047594765, "grad_norm": 22.11290740966797, "learning_rate": 1.8788599920675393e-05, "loss": 1.6454, "step": 51340 }, { "epoch": 8.728539860615331, "grad_norm": 16.70423126220703, "learning_rate": 1.8785766898974447e-05, "loss": 1.418, "step": 51350 }, { "epoch": 8.7302396736359, "grad_norm": 13.81849479675293, "learning_rate": 1.87829338772735e-05, "loss": 1.5704, "step": 51360 }, { "epoch": 8.731939486656469, "grad_norm": 15.782851219177246, "learning_rate": 1.8780100855572554e-05, "loss": 1.5341, "step": 51370 }, { "epoch": 8.733639299677035, "grad_norm": 21.97937774658203, "learning_rate": 1.8777267833871607e-05, "loss": 1.5383, "step": 51380 }, { "epoch": 8.735339112697604, "grad_norm": 19.860837936401367, "learning_rate": 1.8774434812170664e-05, "loss": 1.6167, "step": 51390 }, { "epoch": 8.73703892571817, "grad_norm": 13.927250862121582, "learning_rate": 1.8771601790469714e-05, "loss": 1.5693, "step": 51400 }, { "epoch": 8.73873873873874, "grad_norm": 14.00829029083252, "learning_rate": 1.8768768768768768e-05, "loss": 1.2721, "step": 51410 }, { "epoch": 8.740438551759306, "grad_norm": 13.670649528503418, "learning_rate": 1.8765935747067825e-05, "loss": 1.3858, "step": 51420 }, { "epoch": 8.742138364779874, "grad_norm": 21.83146095275879, "learning_rate": 1.8763102725366875e-05, "loss": 1.5578, "step": 51430 }, { "epoch": 8.743838177800441, "grad_norm": 23.26712417602539, "learning_rate": 1.876026970366593e-05, "loss": 1.5407, "step": 51440 }, { "epoch": 8.74553799082101, "grad_norm": 14.693559646606445, "learning_rate": 1.8757436681964985e-05, "loss": 1.8406, "step": 51450 }, { "epoch": 8.747237803841578, "grad_norm": 11.904765129089355, "learning_rate": 1.875460366026404e-05, "loss": 1.431, "step": 51460 }, { "epoch": 8.748937616862145, "grad_norm": 15.63609504699707, "learning_rate": 1.875177063856309e-05, "loss": 1.398, "step": 51470 }, { "epoch": 8.750637429882714, "grad_norm": 13.665417671203613, "learning_rate": 1.8748937616862146e-05, "loss": 1.3043, "step": 51480 }, { "epoch": 8.75233724290328, "grad_norm": 18.331172943115234, "learning_rate": 1.87461045951612e-05, "loss": 1.2405, "step": 51490 }, { "epoch": 8.754037055923849, "grad_norm": 13.030244827270508, "learning_rate": 1.8743271573460253e-05, "loss": 1.5571, "step": 51500 }, { "epoch": 8.755736868944416, "grad_norm": 18.130008697509766, "learning_rate": 1.8740438551759307e-05, "loss": 1.3833, "step": 51510 }, { "epoch": 8.757436681964984, "grad_norm": 14.032910346984863, "learning_rate": 1.873760553005836e-05, "loss": 1.6082, "step": 51520 }, { "epoch": 8.759136494985551, "grad_norm": 16.674047470092773, "learning_rate": 1.8734772508357414e-05, "loss": 1.6036, "step": 51530 }, { "epoch": 8.76083630800612, "grad_norm": 15.56101131439209, "learning_rate": 1.873193948665647e-05, "loss": 1.5868, "step": 51540 }, { "epoch": 8.762536121026686, "grad_norm": 13.487323760986328, "learning_rate": 1.872910646495552e-05, "loss": 1.3957, "step": 51550 }, { "epoch": 8.764235934047255, "grad_norm": 14.002933502197266, "learning_rate": 1.8726273443254574e-05, "loss": 1.5237, "step": 51560 }, { "epoch": 8.765935747067822, "grad_norm": 21.3325252532959, "learning_rate": 1.872344042155363e-05, "loss": 1.4638, "step": 51570 }, { "epoch": 8.76763556008839, "grad_norm": 17.384944915771484, "learning_rate": 1.8720607399852685e-05, "loss": 1.4246, "step": 51580 }, { "epoch": 8.769335373108959, "grad_norm": 16.58977508544922, "learning_rate": 1.8717774378151735e-05, "loss": 1.4963, "step": 51590 }, { "epoch": 8.771035186129525, "grad_norm": 15.597407341003418, "learning_rate": 1.871494135645079e-05, "loss": 1.4651, "step": 51600 }, { "epoch": 8.772734999150094, "grad_norm": 13.89003849029541, "learning_rate": 1.8712108334749845e-05, "loss": 1.5318, "step": 51610 }, { "epoch": 8.77443481217066, "grad_norm": 15.601438522338867, "learning_rate": 1.8709275313048895e-05, "loss": 1.4491, "step": 51620 }, { "epoch": 8.77613462519123, "grad_norm": 12.173730850219727, "learning_rate": 1.8706442291347952e-05, "loss": 1.4994, "step": 51630 }, { "epoch": 8.777834438211796, "grad_norm": 16.38232421875, "learning_rate": 1.8703609269647006e-05, "loss": 1.4279, "step": 51640 }, { "epoch": 8.779534251232365, "grad_norm": 16.890583038330078, "learning_rate": 1.870077624794606e-05, "loss": 1.482, "step": 51650 }, { "epoch": 8.781234064252931, "grad_norm": 13.348379135131836, "learning_rate": 1.8697943226245113e-05, "loss": 1.6609, "step": 51660 }, { "epoch": 8.7829338772735, "grad_norm": 13.118965148925781, "learning_rate": 1.8695110204544166e-05, "loss": 1.814, "step": 51670 }, { "epoch": 8.784633690294068, "grad_norm": 13.006844520568848, "learning_rate": 1.8692277182843223e-05, "loss": 1.426, "step": 51680 }, { "epoch": 8.786333503314635, "grad_norm": 14.195405960083008, "learning_rate": 1.8689444161142277e-05, "loss": 1.4949, "step": 51690 }, { "epoch": 8.788033316335204, "grad_norm": 14.977180480957031, "learning_rate": 1.8686611139441327e-05, "loss": 1.5637, "step": 51700 }, { "epoch": 8.78973312935577, "grad_norm": 11.406984329223633, "learning_rate": 1.8683778117740384e-05, "loss": 1.6222, "step": 51710 }, { "epoch": 8.791432942376339, "grad_norm": 13.716421127319336, "learning_rate": 1.8680945096039437e-05, "loss": 1.3286, "step": 51720 }, { "epoch": 8.793132755396906, "grad_norm": 14.911808967590332, "learning_rate": 1.867811207433849e-05, "loss": 1.4606, "step": 51730 }, { "epoch": 8.794832568417474, "grad_norm": 16.55458641052246, "learning_rate": 1.8675279052637544e-05, "loss": 1.6982, "step": 51740 }, { "epoch": 8.796532381438041, "grad_norm": 12.2149076461792, "learning_rate": 1.8672446030936598e-05, "loss": 1.3414, "step": 51750 }, { "epoch": 8.79823219445861, "grad_norm": 20.28461456298828, "learning_rate": 1.866961300923565e-05, "loss": 1.3356, "step": 51760 }, { "epoch": 8.799932007479176, "grad_norm": 15.245342254638672, "learning_rate": 1.8666779987534705e-05, "loss": 1.4899, "step": 51770 }, { "epoch": 8.801631820499745, "grad_norm": 14.555941581726074, "learning_rate": 1.866394696583376e-05, "loss": 1.7179, "step": 51780 }, { "epoch": 8.803331633520314, "grad_norm": 18.850387573242188, "learning_rate": 1.8661113944132812e-05, "loss": 1.4934, "step": 51790 }, { "epoch": 8.80503144654088, "grad_norm": 20.5305118560791, "learning_rate": 1.865828092243187e-05, "loss": 1.3532, "step": 51800 }, { "epoch": 8.806731259561449, "grad_norm": 12.065633773803711, "learning_rate": 1.865544790073092e-05, "loss": 1.3651, "step": 51810 }, { "epoch": 8.808431072582016, "grad_norm": 20.936445236206055, "learning_rate": 1.8652614879029972e-05, "loss": 1.769, "step": 51820 }, { "epoch": 8.810130885602584, "grad_norm": 16.683876037597656, "learning_rate": 1.864978185732903e-05, "loss": 1.8932, "step": 51830 }, { "epoch": 8.811830698623151, "grad_norm": 15.9541015625, "learning_rate": 1.8646948835628083e-05, "loss": 1.203, "step": 51840 }, { "epoch": 8.81353051164372, "grad_norm": 15.870369911193848, "learning_rate": 1.8644115813927133e-05, "loss": 1.4213, "step": 51850 }, { "epoch": 8.815230324664286, "grad_norm": 28.26861572265625, "learning_rate": 1.864128279222619e-05, "loss": 1.4143, "step": 51860 }, { "epoch": 8.816930137684855, "grad_norm": 13.140209197998047, "learning_rate": 1.8638449770525243e-05, "loss": 1.7587, "step": 51870 }, { "epoch": 8.818629950705422, "grad_norm": 16.09272003173828, "learning_rate": 1.8635616748824297e-05, "loss": 1.5293, "step": 51880 }, { "epoch": 8.82032976372599, "grad_norm": 12.529674530029297, "learning_rate": 1.863278372712335e-05, "loss": 1.4237, "step": 51890 }, { "epoch": 8.822029576746559, "grad_norm": 13.88747501373291, "learning_rate": 1.8629950705422404e-05, "loss": 1.6804, "step": 51900 }, { "epoch": 8.823729389767125, "grad_norm": 13.14181137084961, "learning_rate": 1.8627117683721458e-05, "loss": 1.4699, "step": 51910 }, { "epoch": 8.825429202787694, "grad_norm": 14.661871910095215, "learning_rate": 1.8624284662020514e-05, "loss": 1.4152, "step": 51920 }, { "epoch": 8.82712901580826, "grad_norm": 26.387203216552734, "learning_rate": 1.8621451640319565e-05, "loss": 1.445, "step": 51930 }, { "epoch": 8.82882882882883, "grad_norm": 9.126380920410156, "learning_rate": 1.8618618618618618e-05, "loss": 1.5896, "step": 51940 }, { "epoch": 8.830528641849396, "grad_norm": 14.848827362060547, "learning_rate": 1.8615785596917675e-05, "loss": 1.5019, "step": 51950 }, { "epoch": 8.832228454869965, "grad_norm": 11.44696044921875, "learning_rate": 1.8612952575216725e-05, "loss": 1.6807, "step": 51960 }, { "epoch": 8.833928267890531, "grad_norm": 13.674748420715332, "learning_rate": 1.861011955351578e-05, "loss": 1.4393, "step": 51970 }, { "epoch": 8.8356280809111, "grad_norm": 15.120594024658203, "learning_rate": 1.8607286531814836e-05, "loss": 1.4623, "step": 51980 }, { "epoch": 8.837327893931668, "grad_norm": 14.021059036254883, "learning_rate": 1.860445351011389e-05, "loss": 1.2471, "step": 51990 }, { "epoch": 8.839027706952235, "grad_norm": 22.792041778564453, "learning_rate": 1.860162048841294e-05, "loss": 1.5981, "step": 52000 }, { "epoch": 8.840727519972804, "grad_norm": 14.63645076751709, "learning_rate": 1.8598787466711996e-05, "loss": 1.5127, "step": 52010 }, { "epoch": 8.84242733299337, "grad_norm": 13.966776847839355, "learning_rate": 1.859595444501105e-05, "loss": 1.5575, "step": 52020 }, { "epoch": 8.844127146013939, "grad_norm": 14.97574520111084, "learning_rate": 1.8593121423310103e-05, "loss": 1.4474, "step": 52030 }, { "epoch": 8.845826959034506, "grad_norm": 17.627275466918945, "learning_rate": 1.8590288401609157e-05, "loss": 1.6177, "step": 52040 }, { "epoch": 8.847526772055074, "grad_norm": 22.66702651977539, "learning_rate": 1.858745537990821e-05, "loss": 1.5193, "step": 52050 }, { "epoch": 8.849226585075641, "grad_norm": 11.766822814941406, "learning_rate": 1.8584622358207264e-05, "loss": 1.4115, "step": 52060 }, { "epoch": 8.85092639809621, "grad_norm": 17.924654006958008, "learning_rate": 1.858178933650632e-05, "loss": 1.6098, "step": 52070 }, { "epoch": 8.852626211116776, "grad_norm": 16.666521072387695, "learning_rate": 1.857895631480537e-05, "loss": 1.4781, "step": 52080 }, { "epoch": 8.854326024137345, "grad_norm": 30.554502487182617, "learning_rate": 1.8576123293104424e-05, "loss": 1.4903, "step": 52090 }, { "epoch": 8.856025837157912, "grad_norm": 13.721908569335938, "learning_rate": 1.857329027140348e-05, "loss": 1.4465, "step": 52100 }, { "epoch": 8.85772565017848, "grad_norm": 10.350799560546875, "learning_rate": 1.857045724970253e-05, "loss": 1.4976, "step": 52110 }, { "epoch": 8.859425463199049, "grad_norm": 16.044811248779297, "learning_rate": 1.8567624228001585e-05, "loss": 1.4722, "step": 52120 }, { "epoch": 8.861125276219616, "grad_norm": 15.974298477172852, "learning_rate": 1.8564791206300642e-05, "loss": 1.5316, "step": 52130 }, { "epoch": 8.862825089240184, "grad_norm": 12.962904930114746, "learning_rate": 1.8561958184599695e-05, "loss": 1.7032, "step": 52140 }, { "epoch": 8.86452490226075, "grad_norm": 42.65174865722656, "learning_rate": 1.8559125162898745e-05, "loss": 1.4075, "step": 52150 }, { "epoch": 8.86622471528132, "grad_norm": 21.420066833496094, "learning_rate": 1.8556292141197802e-05, "loss": 1.3271, "step": 52160 }, { "epoch": 8.867924528301886, "grad_norm": 11.70039176940918, "learning_rate": 1.8553459119496856e-05, "loss": 1.4659, "step": 52170 }, { "epoch": 8.869624341322455, "grad_norm": 12.924736022949219, "learning_rate": 1.855062609779591e-05, "loss": 1.556, "step": 52180 }, { "epoch": 8.871324154343021, "grad_norm": 14.930855751037598, "learning_rate": 1.8547793076094963e-05, "loss": 1.3699, "step": 52190 }, { "epoch": 8.87302396736359, "grad_norm": 14.142236709594727, "learning_rate": 1.8544960054394016e-05, "loss": 1.4939, "step": 52200 }, { "epoch": 8.874723780384159, "grad_norm": 16.995561599731445, "learning_rate": 1.854212703269307e-05, "loss": 1.4103, "step": 52210 }, { "epoch": 8.876423593404725, "grad_norm": 16.72221565246582, "learning_rate": 1.8539294010992127e-05, "loss": 1.5448, "step": 52220 }, { "epoch": 8.878123406425294, "grad_norm": 22.368772506713867, "learning_rate": 1.8536460989291177e-05, "loss": 1.4048, "step": 52230 }, { "epoch": 8.87982321944586, "grad_norm": 14.64254093170166, "learning_rate": 1.853362796759023e-05, "loss": 1.5985, "step": 52240 }, { "epoch": 8.88152303246643, "grad_norm": 16.932077407836914, "learning_rate": 1.8530794945889287e-05, "loss": 1.576, "step": 52250 }, { "epoch": 8.883222845486996, "grad_norm": 13.894797325134277, "learning_rate": 1.852796192418834e-05, "loss": 1.5518, "step": 52260 }, { "epoch": 8.884922658507564, "grad_norm": 17.21140480041504, "learning_rate": 1.852512890248739e-05, "loss": 1.3268, "step": 52270 }, { "epoch": 8.886622471528131, "grad_norm": 14.68480110168457, "learning_rate": 1.8522295880786448e-05, "loss": 1.511, "step": 52280 }, { "epoch": 8.8883222845487, "grad_norm": 15.159524917602539, "learning_rate": 1.85194628590855e-05, "loss": 1.3708, "step": 52290 }, { "epoch": 8.890022097569268, "grad_norm": 16.644855499267578, "learning_rate": 1.8516629837384552e-05, "loss": 1.38, "step": 52300 }, { "epoch": 8.891721910589835, "grad_norm": 14.521888732910156, "learning_rate": 1.851379681568361e-05, "loss": 1.7252, "step": 52310 }, { "epoch": 8.893421723610404, "grad_norm": 20.14958953857422, "learning_rate": 1.8510963793982662e-05, "loss": 1.4989, "step": 52320 }, { "epoch": 8.89512153663097, "grad_norm": 15.617592811584473, "learning_rate": 1.8508130772281716e-05, "loss": 1.4564, "step": 52330 }, { "epoch": 8.896821349651539, "grad_norm": 20.00077247619629, "learning_rate": 1.850529775058077e-05, "loss": 1.3342, "step": 52340 }, { "epoch": 8.898521162672106, "grad_norm": 15.335179328918457, "learning_rate": 1.8502464728879823e-05, "loss": 1.3521, "step": 52350 }, { "epoch": 8.900220975692674, "grad_norm": 10.453652381896973, "learning_rate": 1.8499631707178876e-05, "loss": 1.6174, "step": 52360 }, { "epoch": 8.901920788713241, "grad_norm": 11.64413070678711, "learning_rate": 1.8496798685477933e-05, "loss": 1.5786, "step": 52370 }, { "epoch": 8.90362060173381, "grad_norm": 10.61569595336914, "learning_rate": 1.8493965663776983e-05, "loss": 1.8233, "step": 52380 }, { "epoch": 8.905320414754376, "grad_norm": 19.421100616455078, "learning_rate": 1.849113264207604e-05, "loss": 1.5185, "step": 52390 }, { "epoch": 8.907020227774945, "grad_norm": 17.731616973876953, "learning_rate": 1.8488299620375094e-05, "loss": 1.5385, "step": 52400 }, { "epoch": 8.908720040795512, "grad_norm": 12.703557968139648, "learning_rate": 1.8485466598674147e-05, "loss": 1.6065, "step": 52410 }, { "epoch": 8.91041985381608, "grad_norm": 10.425838470458984, "learning_rate": 1.84826335769732e-05, "loss": 1.7199, "step": 52420 }, { "epoch": 8.912119666836649, "grad_norm": 10.216784477233887, "learning_rate": 1.8479800555272254e-05, "loss": 1.3371, "step": 52430 }, { "epoch": 8.913819479857215, "grad_norm": 22.163185119628906, "learning_rate": 1.8476967533571308e-05, "loss": 1.6947, "step": 52440 }, { "epoch": 8.915519292877784, "grad_norm": 18.53577995300293, "learning_rate": 1.847413451187036e-05, "loss": 1.3524, "step": 52450 }, { "epoch": 8.91721910589835, "grad_norm": 19.189804077148438, "learning_rate": 1.8471301490169415e-05, "loss": 1.4008, "step": 52460 }, { "epoch": 8.91891891891892, "grad_norm": 15.987190246582031, "learning_rate": 1.846846846846847e-05, "loss": 1.3952, "step": 52470 }, { "epoch": 8.920618731939486, "grad_norm": 15.116157531738281, "learning_rate": 1.8465635446767525e-05, "loss": 1.3328, "step": 52480 }, { "epoch": 8.922318544960055, "grad_norm": 15.295133590698242, "learning_rate": 1.8462802425066575e-05, "loss": 1.3863, "step": 52490 }, { "epoch": 8.924018357980621, "grad_norm": 16.85621452331543, "learning_rate": 1.845996940336563e-05, "loss": 1.4951, "step": 52500 }, { "epoch": 8.92571817100119, "grad_norm": 17.094768524169922, "learning_rate": 1.8457136381664686e-05, "loss": 1.616, "step": 52510 }, { "epoch": 8.927417984021758, "grad_norm": 16.850854873657227, "learning_rate": 1.845430335996374e-05, "loss": 1.5605, "step": 52520 }, { "epoch": 8.929117797042325, "grad_norm": 16.251882553100586, "learning_rate": 1.845147033826279e-05, "loss": 1.5411, "step": 52530 }, { "epoch": 8.930817610062894, "grad_norm": 12.80484390258789, "learning_rate": 1.8448637316561846e-05, "loss": 1.3025, "step": 52540 }, { "epoch": 8.93251742308346, "grad_norm": 15.341597557067871, "learning_rate": 1.84458042948609e-05, "loss": 1.4707, "step": 52550 }, { "epoch": 8.934217236104029, "grad_norm": 15.886282920837402, "learning_rate": 1.8442971273159953e-05, "loss": 1.3894, "step": 52560 }, { "epoch": 8.935917049124596, "grad_norm": 21.960063934326172, "learning_rate": 1.8440138251459007e-05, "loss": 1.4886, "step": 52570 }, { "epoch": 8.937616862145164, "grad_norm": 12.36060619354248, "learning_rate": 1.843730522975806e-05, "loss": 1.3765, "step": 52580 }, { "epoch": 8.939316675165731, "grad_norm": 12.244518280029297, "learning_rate": 1.8434472208057114e-05, "loss": 1.5336, "step": 52590 }, { "epoch": 8.9410164881863, "grad_norm": 10.653550148010254, "learning_rate": 1.843163918635617e-05, "loss": 1.6276, "step": 52600 }, { "epoch": 8.942716301206866, "grad_norm": 11.736052513122559, "learning_rate": 1.842880616465522e-05, "loss": 1.4784, "step": 52610 }, { "epoch": 8.944416114227435, "grad_norm": 13.238510131835938, "learning_rate": 1.8425973142954275e-05, "loss": 1.4164, "step": 52620 }, { "epoch": 8.946115927248004, "grad_norm": 17.88393783569336, "learning_rate": 1.842314012125333e-05, "loss": 1.4881, "step": 52630 }, { "epoch": 8.94781574026857, "grad_norm": 13.086518287658691, "learning_rate": 1.842030709955238e-05, "loss": 1.536, "step": 52640 }, { "epoch": 8.949515553289139, "grad_norm": 29.450708389282227, "learning_rate": 1.8417474077851435e-05, "loss": 1.3389, "step": 52650 }, { "epoch": 8.951215366309706, "grad_norm": 17.250539779663086, "learning_rate": 1.8414641056150492e-05, "loss": 1.415, "step": 52660 }, { "epoch": 8.952915179330274, "grad_norm": 13.366462707519531, "learning_rate": 1.8411808034449546e-05, "loss": 1.4643, "step": 52670 }, { "epoch": 8.954614992350841, "grad_norm": 15.306573867797852, "learning_rate": 1.8408975012748596e-05, "loss": 1.3727, "step": 52680 }, { "epoch": 8.95631480537141, "grad_norm": 20.189565658569336, "learning_rate": 1.8406141991047653e-05, "loss": 1.3916, "step": 52690 }, { "epoch": 8.958014618391976, "grad_norm": 13.770461082458496, "learning_rate": 1.8403308969346706e-05, "loss": 1.5311, "step": 52700 }, { "epoch": 8.959714431412545, "grad_norm": 16.360031127929688, "learning_rate": 1.840047594764576e-05, "loss": 1.6318, "step": 52710 }, { "epoch": 8.961414244433112, "grad_norm": 17.48354148864746, "learning_rate": 1.8397642925944813e-05, "loss": 1.3296, "step": 52720 }, { "epoch": 8.96311405745368, "grad_norm": 17.171937942504883, "learning_rate": 1.8394809904243867e-05, "loss": 1.7202, "step": 52730 }, { "epoch": 8.964813870474249, "grad_norm": 15.307768821716309, "learning_rate": 1.839197688254292e-05, "loss": 1.5133, "step": 52740 }, { "epoch": 8.966513683494815, "grad_norm": 18.705612182617188, "learning_rate": 1.8389143860841977e-05, "loss": 1.7408, "step": 52750 }, { "epoch": 8.968213496515384, "grad_norm": 19.789411544799805, "learning_rate": 1.8386310839141027e-05, "loss": 1.4178, "step": 52760 }, { "epoch": 8.96991330953595, "grad_norm": 15.394417762756348, "learning_rate": 1.838347781744008e-05, "loss": 1.4598, "step": 52770 }, { "epoch": 8.97161312255652, "grad_norm": 12.443199157714844, "learning_rate": 1.8380644795739138e-05, "loss": 1.6807, "step": 52780 }, { "epoch": 8.973312935577086, "grad_norm": 11.001404762268066, "learning_rate": 1.837781177403819e-05, "loss": 1.6392, "step": 52790 }, { "epoch": 8.975012748597655, "grad_norm": 20.516284942626953, "learning_rate": 1.837497875233724e-05, "loss": 1.4999, "step": 52800 }, { "epoch": 8.976712561618221, "grad_norm": 13.369241714477539, "learning_rate": 1.8372145730636298e-05, "loss": 1.5018, "step": 52810 }, { "epoch": 8.97841237463879, "grad_norm": 21.71744728088379, "learning_rate": 1.8369312708935352e-05, "loss": 1.5359, "step": 52820 }, { "epoch": 8.980112187659358, "grad_norm": 11.709768295288086, "learning_rate": 1.8366479687234402e-05, "loss": 1.5032, "step": 52830 }, { "epoch": 8.981812000679925, "grad_norm": 20.898122787475586, "learning_rate": 1.836364666553346e-05, "loss": 1.5192, "step": 52840 }, { "epoch": 8.983511813700494, "grad_norm": 14.790184020996094, "learning_rate": 1.8360813643832512e-05, "loss": 1.2376, "step": 52850 }, { "epoch": 8.98521162672106, "grad_norm": 14.183822631835938, "learning_rate": 1.8357980622131566e-05, "loss": 1.5595, "step": 52860 }, { "epoch": 8.986911439741629, "grad_norm": 13.363118171691895, "learning_rate": 1.835514760043062e-05, "loss": 1.3082, "step": 52870 }, { "epoch": 8.988611252762196, "grad_norm": 14.065319061279297, "learning_rate": 1.8352314578729673e-05, "loss": 1.617, "step": 52880 }, { "epoch": 8.990311065782764, "grad_norm": 14.097311973571777, "learning_rate": 1.8349481557028726e-05, "loss": 1.3926, "step": 52890 }, { "epoch": 8.992010878803331, "grad_norm": 11.0130033493042, "learning_rate": 1.8346648535327783e-05, "loss": 1.2841, "step": 52900 }, { "epoch": 8.9937106918239, "grad_norm": 15.303789138793945, "learning_rate": 1.8343815513626833e-05, "loss": 1.5118, "step": 52910 }, { "epoch": 8.995410504844466, "grad_norm": 10.82986831665039, "learning_rate": 1.8340982491925887e-05, "loss": 1.545, "step": 52920 }, { "epoch": 8.997110317865035, "grad_norm": 17.887964248657227, "learning_rate": 1.8338149470224944e-05, "loss": 1.3683, "step": 52930 }, { "epoch": 8.998810130885602, "grad_norm": 15.61181354522705, "learning_rate": 1.8335316448523997e-05, "loss": 1.6036, "step": 52940 }, { "epoch": 9.0, "eval_cer": 1.0, "eval_loss": 2.469897985458374, "eval_runtime": 1958.1409, "eval_samples_per_second": 0.241, "eval_steps_per_second": 0.241, "step": 52947 }, { "epoch": 9.00050994390617, "grad_norm": 15.138931274414062, "learning_rate": 1.8332483426823048e-05, "loss": 1.3065, "step": 52950 }, { "epoch": 9.002209756926739, "grad_norm": 16.95221710205078, "learning_rate": 1.8329650405122104e-05, "loss": 1.2539, "step": 52960 }, { "epoch": 9.003909569947306, "grad_norm": 19.762805938720703, "learning_rate": 1.8326817383421158e-05, "loss": 1.5195, "step": 52970 }, { "epoch": 9.005609382967874, "grad_norm": 11.899115562438965, "learning_rate": 1.8323984361720208e-05, "loss": 1.2697, "step": 52980 }, { "epoch": 9.00730919598844, "grad_norm": 15.276312828063965, "learning_rate": 1.8321151340019265e-05, "loss": 1.2324, "step": 52990 }, { "epoch": 9.00900900900901, "grad_norm": 12.633012771606445, "learning_rate": 1.831831831831832e-05, "loss": 1.5124, "step": 53000 }, { "epoch": 9.010708822029576, "grad_norm": 15.23083782196045, "learning_rate": 1.8315485296617372e-05, "loss": 1.2474, "step": 53010 }, { "epoch": 9.012408635050145, "grad_norm": 15.202747344970703, "learning_rate": 1.8312652274916426e-05, "loss": 1.1396, "step": 53020 }, { "epoch": 9.014108448070711, "grad_norm": 20.171741485595703, "learning_rate": 1.830981925321548e-05, "loss": 1.2214, "step": 53030 }, { "epoch": 9.01580826109128, "grad_norm": 19.02889633178711, "learning_rate": 1.8306986231514533e-05, "loss": 1.2868, "step": 53040 }, { "epoch": 9.017508074111849, "grad_norm": 9.87448501586914, "learning_rate": 1.830415320981359e-05, "loss": 1.2852, "step": 53050 }, { "epoch": 9.019207887132415, "grad_norm": 13.205535888671875, "learning_rate": 1.830132018811264e-05, "loss": 1.3148, "step": 53060 }, { "epoch": 9.020907700152984, "grad_norm": 13.621896743774414, "learning_rate": 1.8298487166411693e-05, "loss": 1.2777, "step": 53070 }, { "epoch": 9.02260751317355, "grad_norm": 23.784805297851562, "learning_rate": 1.829565414471075e-05, "loss": 1.3742, "step": 53080 }, { "epoch": 9.02430732619412, "grad_norm": 14.126296043395996, "learning_rate": 1.8292821123009804e-05, "loss": 1.3824, "step": 53090 }, { "epoch": 9.026007139214686, "grad_norm": 19.064302444458008, "learning_rate": 1.8289988101308854e-05, "loss": 1.3715, "step": 53100 }, { "epoch": 9.027706952235254, "grad_norm": 15.144579887390137, "learning_rate": 1.828715507960791e-05, "loss": 1.3921, "step": 53110 }, { "epoch": 9.029406765255821, "grad_norm": 12.725473403930664, "learning_rate": 1.8284322057906964e-05, "loss": 1.3316, "step": 53120 }, { "epoch": 9.03110657827639, "grad_norm": 19.410417556762695, "learning_rate": 1.8281489036206018e-05, "loss": 1.1689, "step": 53130 }, { "epoch": 9.032806391296957, "grad_norm": 11.79498291015625, "learning_rate": 1.827865601450507e-05, "loss": 1.4261, "step": 53140 }, { "epoch": 9.034506204317525, "grad_norm": 15.987621307373047, "learning_rate": 1.8275822992804125e-05, "loss": 1.2446, "step": 53150 }, { "epoch": 9.036206017338094, "grad_norm": 14.274617195129395, "learning_rate": 1.827298997110318e-05, "loss": 1.2908, "step": 53160 }, { "epoch": 9.03790583035866, "grad_norm": 17.653322219848633, "learning_rate": 1.8270156949402232e-05, "loss": 1.2648, "step": 53170 }, { "epoch": 9.039605643379229, "grad_norm": 21.549503326416016, "learning_rate": 1.8267323927701285e-05, "loss": 1.2424, "step": 53180 }, { "epoch": 9.041305456399796, "grad_norm": 15.690438270568848, "learning_rate": 1.8264490906000342e-05, "loss": 1.4292, "step": 53190 }, { "epoch": 9.043005269420364, "grad_norm": 12.953389167785645, "learning_rate": 1.8261657884299396e-05, "loss": 1.3418, "step": 53200 }, { "epoch": 9.044705082440931, "grad_norm": 22.20635223388672, "learning_rate": 1.8258824862598446e-05, "loss": 1.2376, "step": 53210 }, { "epoch": 9.0464048954615, "grad_norm": 12.275449752807617, "learning_rate": 1.8255991840897503e-05, "loss": 1.4682, "step": 53220 }, { "epoch": 9.048104708482066, "grad_norm": 13.871325492858887, "learning_rate": 1.8253158819196556e-05, "loss": 1.4319, "step": 53230 }, { "epoch": 9.049804521502635, "grad_norm": 17.0900936126709, "learning_rate": 1.825032579749561e-05, "loss": 1.3065, "step": 53240 }, { "epoch": 9.051504334523202, "grad_norm": 12.892945289611816, "learning_rate": 1.8247492775794663e-05, "loss": 1.4254, "step": 53250 }, { "epoch": 9.05320414754377, "grad_norm": 15.022978782653809, "learning_rate": 1.8244659754093717e-05, "loss": 1.1594, "step": 53260 }, { "epoch": 9.054903960564339, "grad_norm": 17.981124877929688, "learning_rate": 1.824182673239277e-05, "loss": 1.4307, "step": 53270 }, { "epoch": 9.056603773584905, "grad_norm": 13.415827751159668, "learning_rate": 1.8238993710691827e-05, "loss": 1.4059, "step": 53280 }, { "epoch": 9.058303586605474, "grad_norm": 15.497806549072266, "learning_rate": 1.8236160688990877e-05, "loss": 1.2911, "step": 53290 }, { "epoch": 9.06000339962604, "grad_norm": 18.292373657226562, "learning_rate": 1.823332766728993e-05, "loss": 1.3906, "step": 53300 }, { "epoch": 9.06170321264661, "grad_norm": 12.637642860412598, "learning_rate": 1.8230494645588988e-05, "loss": 1.3365, "step": 53310 }, { "epoch": 9.063403025667176, "grad_norm": 18.102962493896484, "learning_rate": 1.8227661623888038e-05, "loss": 1.4002, "step": 53320 }, { "epoch": 9.065102838687745, "grad_norm": 16.92891502380371, "learning_rate": 1.822482860218709e-05, "loss": 1.092, "step": 53330 }, { "epoch": 9.066802651708311, "grad_norm": 13.544093132019043, "learning_rate": 1.822199558048615e-05, "loss": 1.3147, "step": 53340 }, { "epoch": 9.06850246472888, "grad_norm": 15.509353637695312, "learning_rate": 1.8219162558785202e-05, "loss": 1.2923, "step": 53350 }, { "epoch": 9.070202277749448, "grad_norm": 15.792583465576172, "learning_rate": 1.8216329537084252e-05, "loss": 1.3097, "step": 53360 }, { "epoch": 9.071902090770015, "grad_norm": 12.412096977233887, "learning_rate": 1.821349651538331e-05, "loss": 1.4404, "step": 53370 }, { "epoch": 9.073601903790584, "grad_norm": 10.390374183654785, "learning_rate": 1.8210663493682363e-05, "loss": 1.3515, "step": 53380 }, { "epoch": 9.07530171681115, "grad_norm": 67.9982681274414, "learning_rate": 1.8207830471981416e-05, "loss": 1.2164, "step": 53390 }, { "epoch": 9.077001529831719, "grad_norm": 49.37587356567383, "learning_rate": 1.820499745028047e-05, "loss": 1.3343, "step": 53400 }, { "epoch": 9.078701342852286, "grad_norm": 15.37983512878418, "learning_rate": 1.8202164428579523e-05, "loss": 1.3707, "step": 53410 }, { "epoch": 9.080401155872854, "grad_norm": 17.248910903930664, "learning_rate": 1.8199331406878577e-05, "loss": 1.155, "step": 53420 }, { "epoch": 9.082100968893421, "grad_norm": 38.172752380371094, "learning_rate": 1.8196498385177634e-05, "loss": 1.3366, "step": 53430 }, { "epoch": 9.08380078191399, "grad_norm": 14.927475929260254, "learning_rate": 1.8193665363476684e-05, "loss": 1.2844, "step": 53440 }, { "epoch": 9.085500594934556, "grad_norm": 14.979321479797363, "learning_rate": 1.8190832341775737e-05, "loss": 1.2124, "step": 53450 }, { "epoch": 9.087200407955125, "grad_norm": 18.60042381286621, "learning_rate": 1.8187999320074794e-05, "loss": 1.4124, "step": 53460 }, { "epoch": 9.088900220975694, "grad_norm": 22.753631591796875, "learning_rate": 1.8185166298373848e-05, "loss": 1.305, "step": 53470 }, { "epoch": 9.09060003399626, "grad_norm": 16.58887481689453, "learning_rate": 1.8182333276672898e-05, "loss": 1.3173, "step": 53480 }, { "epoch": 9.092299847016829, "grad_norm": 14.380728721618652, "learning_rate": 1.8179500254971955e-05, "loss": 1.2494, "step": 53490 }, { "epoch": 9.093999660037396, "grad_norm": 9.992303848266602, "learning_rate": 1.8176667233271008e-05, "loss": 1.1515, "step": 53500 }, { "epoch": 9.095699473057964, "grad_norm": 15.525525093078613, "learning_rate": 1.817383421157006e-05, "loss": 1.3318, "step": 53510 }, { "epoch": 9.09739928607853, "grad_norm": 13.648642539978027, "learning_rate": 1.8171001189869115e-05, "loss": 1.3997, "step": 53520 }, { "epoch": 9.0990990990991, "grad_norm": 13.946371078491211, "learning_rate": 1.816816816816817e-05, "loss": 1.228, "step": 53530 }, { "epoch": 9.100798912119666, "grad_norm": 21.957530975341797, "learning_rate": 1.8165335146467222e-05, "loss": 1.1361, "step": 53540 }, { "epoch": 9.102498725140235, "grad_norm": 15.047093391418457, "learning_rate": 1.8162502124766276e-05, "loss": 1.1455, "step": 53550 }, { "epoch": 9.104198538160801, "grad_norm": 20.358728408813477, "learning_rate": 1.815966910306533e-05, "loss": 1.2969, "step": 53560 }, { "epoch": 9.10589835118137, "grad_norm": 18.215457916259766, "learning_rate": 1.8156836081364383e-05, "loss": 1.4252, "step": 53570 }, { "epoch": 9.107598164201939, "grad_norm": 15.220858573913574, "learning_rate": 1.815400305966344e-05, "loss": 1.5608, "step": 53580 }, { "epoch": 9.109297977222505, "grad_norm": 7.849105358123779, "learning_rate": 1.815117003796249e-05, "loss": 1.4107, "step": 53590 }, { "epoch": 9.110997790243074, "grad_norm": 13.03510570526123, "learning_rate": 1.8148337016261543e-05, "loss": 1.5346, "step": 53600 }, { "epoch": 9.11269760326364, "grad_norm": 14.677459716796875, "learning_rate": 1.81455039945606e-05, "loss": 1.3397, "step": 53610 }, { "epoch": 9.11439741628421, "grad_norm": 15.390623092651367, "learning_rate": 1.8142670972859654e-05, "loss": 1.0928, "step": 53620 }, { "epoch": 9.116097229304776, "grad_norm": 14.634507179260254, "learning_rate": 1.8139837951158704e-05, "loss": 1.4464, "step": 53630 }, { "epoch": 9.117797042325344, "grad_norm": 17.297462463378906, "learning_rate": 1.813700492945776e-05, "loss": 1.2562, "step": 53640 }, { "epoch": 9.119496855345911, "grad_norm": 17.462114334106445, "learning_rate": 1.8134171907756814e-05, "loss": 1.3338, "step": 53650 }, { "epoch": 9.12119666836648, "grad_norm": 23.87311553955078, "learning_rate": 1.8131338886055868e-05, "loss": 1.1632, "step": 53660 }, { "epoch": 9.122896481387047, "grad_norm": 16.572145462036133, "learning_rate": 1.812850586435492e-05, "loss": 1.1511, "step": 53670 }, { "epoch": 9.124596294407615, "grad_norm": 21.37348747253418, "learning_rate": 1.8125672842653975e-05, "loss": 1.3534, "step": 53680 }, { "epoch": 9.126296107428184, "grad_norm": 15.746362686157227, "learning_rate": 1.812283982095303e-05, "loss": 1.4056, "step": 53690 }, { "epoch": 9.12799592044875, "grad_norm": 15.158108711242676, "learning_rate": 1.8120006799252082e-05, "loss": 1.2974, "step": 53700 }, { "epoch": 9.129695733469319, "grad_norm": 12.103832244873047, "learning_rate": 1.8117173777551136e-05, "loss": 1.316, "step": 53710 }, { "epoch": 9.131395546489886, "grad_norm": 14.669291496276855, "learning_rate": 1.811434075585019e-05, "loss": 1.3786, "step": 53720 }, { "epoch": 9.133095359510454, "grad_norm": 15.424217224121094, "learning_rate": 1.8111507734149246e-05, "loss": 1.004, "step": 53730 }, { "epoch": 9.134795172531021, "grad_norm": 16.115154266357422, "learning_rate": 1.8108674712448296e-05, "loss": 1.5154, "step": 53740 }, { "epoch": 9.13649498555159, "grad_norm": 16.85526466369629, "learning_rate": 1.810584169074735e-05, "loss": 1.3596, "step": 53750 }, { "epoch": 9.138194798572156, "grad_norm": 16.849777221679688, "learning_rate": 1.8103008669046407e-05, "loss": 1.5484, "step": 53760 }, { "epoch": 9.139894611592725, "grad_norm": 17.19001007080078, "learning_rate": 1.810017564734546e-05, "loss": 1.1353, "step": 53770 }, { "epoch": 9.141594424613292, "grad_norm": 11.834454536437988, "learning_rate": 1.809734262564451e-05, "loss": 1.1874, "step": 53780 }, { "epoch": 9.14329423763386, "grad_norm": 16.348512649536133, "learning_rate": 1.8094509603943567e-05, "loss": 1.3363, "step": 53790 }, { "epoch": 9.144994050654429, "grad_norm": 15.76944351196289, "learning_rate": 1.809167658224262e-05, "loss": 1.1004, "step": 53800 }, { "epoch": 9.146693863674995, "grad_norm": 10.086339950561523, "learning_rate": 1.8088843560541674e-05, "loss": 1.5855, "step": 53810 }, { "epoch": 9.148393676695564, "grad_norm": 15.118208885192871, "learning_rate": 1.8086010538840728e-05, "loss": 1.4652, "step": 53820 }, { "epoch": 9.15009348971613, "grad_norm": 14.42131233215332, "learning_rate": 1.808317751713978e-05, "loss": 1.2659, "step": 53830 }, { "epoch": 9.1517933027367, "grad_norm": 13.149345397949219, "learning_rate": 1.8080344495438835e-05, "loss": 1.3139, "step": 53840 }, { "epoch": 9.153493115757266, "grad_norm": 15.17807388305664, "learning_rate": 1.8077511473737888e-05, "loss": 1.1186, "step": 53850 }, { "epoch": 9.155192928777835, "grad_norm": 10.511011123657227, "learning_rate": 1.8074678452036942e-05, "loss": 1.2966, "step": 53860 }, { "epoch": 9.156892741798401, "grad_norm": 13.444091796875, "learning_rate": 1.8071845430336e-05, "loss": 1.358, "step": 53870 }, { "epoch": 9.15859255481897, "grad_norm": 15.556344032287598, "learning_rate": 1.8069012408635052e-05, "loss": 1.3582, "step": 53880 }, { "epoch": 9.160292367839538, "grad_norm": 12.684870719909668, "learning_rate": 1.8066179386934102e-05, "loss": 1.1339, "step": 53890 }, { "epoch": 9.161992180860105, "grad_norm": 14.082263946533203, "learning_rate": 1.806334636523316e-05, "loss": 1.3721, "step": 53900 }, { "epoch": 9.163691993880674, "grad_norm": 14.445767402648926, "learning_rate": 1.8060513343532213e-05, "loss": 1.6032, "step": 53910 }, { "epoch": 9.16539180690124, "grad_norm": 16.14864158630371, "learning_rate": 1.8057680321831266e-05, "loss": 1.415, "step": 53920 }, { "epoch": 9.167091619921809, "grad_norm": 15.67900562286377, "learning_rate": 1.805484730013032e-05, "loss": 1.4993, "step": 53930 }, { "epoch": 9.168791432942376, "grad_norm": 16.777929306030273, "learning_rate": 1.8052014278429373e-05, "loss": 1.3976, "step": 53940 }, { "epoch": 9.170491245962944, "grad_norm": 16.588682174682617, "learning_rate": 1.8049181256728427e-05, "loss": 1.4858, "step": 53950 }, { "epoch": 9.172191058983511, "grad_norm": 15.042276382446289, "learning_rate": 1.8046348235027484e-05, "loss": 1.3872, "step": 53960 }, { "epoch": 9.17389087200408, "grad_norm": 14.126724243164062, "learning_rate": 1.8043515213326534e-05, "loss": 1.5973, "step": 53970 }, { "epoch": 9.175590685024646, "grad_norm": 12.40029239654541, "learning_rate": 1.8040682191625587e-05, "loss": 1.2161, "step": 53980 }, { "epoch": 9.177290498045215, "grad_norm": 16.390371322631836, "learning_rate": 1.8037849169924644e-05, "loss": 1.4703, "step": 53990 }, { "epoch": 9.178990311065784, "grad_norm": 14.603211402893066, "learning_rate": 1.8035016148223698e-05, "loss": 1.1591, "step": 54000 }, { "epoch": 9.18069012408635, "grad_norm": 24.332263946533203, "learning_rate": 1.8032183126522748e-05, "loss": 1.368, "step": 54010 }, { "epoch": 9.182389937106919, "grad_norm": 13.190400123596191, "learning_rate": 1.8029350104821805e-05, "loss": 1.3812, "step": 54020 }, { "epoch": 9.184089750127486, "grad_norm": 14.634663581848145, "learning_rate": 1.802651708312086e-05, "loss": 1.3993, "step": 54030 }, { "epoch": 9.185789563148054, "grad_norm": 18.05339241027832, "learning_rate": 1.802368406141991e-05, "loss": 1.3522, "step": 54040 }, { "epoch": 9.187489376168621, "grad_norm": 14.897225379943848, "learning_rate": 1.8020851039718965e-05, "loss": 1.3959, "step": 54050 }, { "epoch": 9.18918918918919, "grad_norm": 15.925865173339844, "learning_rate": 1.801801801801802e-05, "loss": 1.3829, "step": 54060 }, { "epoch": 9.190889002209756, "grad_norm": 18.961416244506836, "learning_rate": 1.8015184996317072e-05, "loss": 1.3112, "step": 54070 }, { "epoch": 9.192588815230325, "grad_norm": 16.65428352355957, "learning_rate": 1.8012351974616126e-05, "loss": 1.1398, "step": 54080 }, { "epoch": 9.194288628250892, "grad_norm": 15.467334747314453, "learning_rate": 1.800951895291518e-05, "loss": 1.1223, "step": 54090 }, { "epoch": 9.19598844127146, "grad_norm": 14.942085266113281, "learning_rate": 1.8006685931214233e-05, "loss": 1.4686, "step": 54100 }, { "epoch": 9.197688254292029, "grad_norm": 21.017213821411133, "learning_rate": 1.800385290951329e-05, "loss": 1.2918, "step": 54110 }, { "epoch": 9.199388067312595, "grad_norm": 11.572039604187012, "learning_rate": 1.800101988781234e-05, "loss": 1.3475, "step": 54120 }, { "epoch": 9.201087880333164, "grad_norm": 21.78547477722168, "learning_rate": 1.7998186866111394e-05, "loss": 1.2816, "step": 54130 }, { "epoch": 9.20278769335373, "grad_norm": 15.410125732421875, "learning_rate": 1.799535384441045e-05, "loss": 1.2855, "step": 54140 }, { "epoch": 9.2044875063743, "grad_norm": 13.423827171325684, "learning_rate": 1.7992520822709504e-05, "loss": 1.3969, "step": 54150 }, { "epoch": 9.206187319394866, "grad_norm": 18.02946662902832, "learning_rate": 1.7989687801008554e-05, "loss": 1.3282, "step": 54160 }, { "epoch": 9.207887132415435, "grad_norm": 15.751209259033203, "learning_rate": 1.798685477930761e-05, "loss": 1.4769, "step": 54170 }, { "epoch": 9.209586945436001, "grad_norm": 17.96636962890625, "learning_rate": 1.7984021757606665e-05, "loss": 1.4546, "step": 54180 }, { "epoch": 9.21128675845657, "grad_norm": 15.683955192565918, "learning_rate": 1.7981188735905715e-05, "loss": 1.1759, "step": 54190 }, { "epoch": 9.212986571477138, "grad_norm": 17.81399917602539, "learning_rate": 1.797835571420477e-05, "loss": 1.5428, "step": 54200 }, { "epoch": 9.214686384497705, "grad_norm": 16.897371292114258, "learning_rate": 1.7975522692503825e-05, "loss": 1.1575, "step": 54210 }, { "epoch": 9.216386197518274, "grad_norm": 11.6671781539917, "learning_rate": 1.797268967080288e-05, "loss": 1.4594, "step": 54220 }, { "epoch": 9.21808601053884, "grad_norm": 13.572989463806152, "learning_rate": 1.7969856649101932e-05, "loss": 1.2749, "step": 54230 }, { "epoch": 9.219785823559409, "grad_norm": 14.463102340698242, "learning_rate": 1.7967023627400986e-05, "loss": 1.4104, "step": 54240 }, { "epoch": 9.221485636579976, "grad_norm": 19.904621124267578, "learning_rate": 1.796419060570004e-05, "loss": 1.2209, "step": 54250 }, { "epoch": 9.223185449600544, "grad_norm": 13.110740661621094, "learning_rate": 1.7961357583999096e-05, "loss": 1.1854, "step": 54260 }, { "epoch": 9.224885262621111, "grad_norm": 15.38334846496582, "learning_rate": 1.7958524562298146e-05, "loss": 1.6112, "step": 54270 }, { "epoch": 9.22658507564168, "grad_norm": 14.323334693908691, "learning_rate": 1.79556915405972e-05, "loss": 1.0982, "step": 54280 }, { "epoch": 9.228284888662246, "grad_norm": 13.362373352050781, "learning_rate": 1.7952858518896257e-05, "loss": 1.2888, "step": 54290 }, { "epoch": 9.229984701682815, "grad_norm": 13.228784561157227, "learning_rate": 1.795002549719531e-05, "loss": 1.1717, "step": 54300 }, { "epoch": 9.231684514703383, "grad_norm": 13.260489463806152, "learning_rate": 1.794719247549436e-05, "loss": 1.333, "step": 54310 }, { "epoch": 9.23338432772395, "grad_norm": 11.630232810974121, "learning_rate": 1.7944359453793417e-05, "loss": 1.2264, "step": 54320 }, { "epoch": 9.235084140744519, "grad_norm": 13.10587215423584, "learning_rate": 1.794152643209247e-05, "loss": 1.3109, "step": 54330 }, { "epoch": 9.236783953765086, "grad_norm": 13.08385944366455, "learning_rate": 1.7938693410391524e-05, "loss": 1.1829, "step": 54340 }, { "epoch": 9.238483766785654, "grad_norm": 15.819772720336914, "learning_rate": 1.7935860388690578e-05, "loss": 1.2251, "step": 54350 }, { "epoch": 9.24018357980622, "grad_norm": 12.689817428588867, "learning_rate": 1.793302736698963e-05, "loss": 1.3686, "step": 54360 }, { "epoch": 9.24188339282679, "grad_norm": 13.254007339477539, "learning_rate": 1.7930194345288685e-05, "loss": 1.2934, "step": 54370 }, { "epoch": 9.243583205847356, "grad_norm": 12.989713668823242, "learning_rate": 1.792736132358774e-05, "loss": 1.3816, "step": 54380 }, { "epoch": 9.245283018867925, "grad_norm": 15.124445915222168, "learning_rate": 1.7924528301886792e-05, "loss": 1.436, "step": 54390 }, { "epoch": 9.246982831888491, "grad_norm": 14.686720848083496, "learning_rate": 1.7921695280185845e-05, "loss": 1.4707, "step": 54400 }, { "epoch": 9.24868264490906, "grad_norm": 22.59539794921875, "learning_rate": 1.7918862258484902e-05, "loss": 1.2226, "step": 54410 }, { "epoch": 9.250382457929629, "grad_norm": 13.970053672790527, "learning_rate": 1.7916029236783953e-05, "loss": 1.3002, "step": 54420 }, { "epoch": 9.252082270950195, "grad_norm": 15.190274238586426, "learning_rate": 1.7913196215083006e-05, "loss": 1.5, "step": 54430 }, { "epoch": 9.253782083970764, "grad_norm": 12.440534591674805, "learning_rate": 1.7910363193382063e-05, "loss": 1.3638, "step": 54440 }, { "epoch": 9.25548189699133, "grad_norm": 15.480193138122559, "learning_rate": 1.7907530171681116e-05, "loss": 1.3074, "step": 54450 }, { "epoch": 9.2571817100119, "grad_norm": 16.62567710876465, "learning_rate": 1.7904697149980167e-05, "loss": 1.4009, "step": 54460 }, { "epoch": 9.258881523032466, "grad_norm": 15.960346221923828, "learning_rate": 1.7901864128279224e-05, "loss": 1.2113, "step": 54470 }, { "epoch": 9.260581336053034, "grad_norm": 15.343902587890625, "learning_rate": 1.7899031106578277e-05, "loss": 1.5062, "step": 54480 }, { "epoch": 9.262281149073601, "grad_norm": 14.702249526977539, "learning_rate": 1.789619808487733e-05, "loss": 1.2991, "step": 54490 }, { "epoch": 9.26398096209417, "grad_norm": 12.925145149230957, "learning_rate": 1.7893365063176384e-05, "loss": 1.2014, "step": 54500 }, { "epoch": 9.265680775114737, "grad_norm": 14.221511840820312, "learning_rate": 1.7890532041475438e-05, "loss": 1.2758, "step": 54510 }, { "epoch": 9.267380588135305, "grad_norm": 18.780868530273438, "learning_rate": 1.788769901977449e-05, "loss": 1.1841, "step": 54520 }, { "epoch": 9.269080401155874, "grad_norm": 12.032203674316406, "learning_rate": 1.7884865998073545e-05, "loss": 1.3232, "step": 54530 }, { "epoch": 9.27078021417644, "grad_norm": 17.709184646606445, "learning_rate": 1.7882032976372598e-05, "loss": 1.2988, "step": 54540 }, { "epoch": 9.272480027197009, "grad_norm": 16.745676040649414, "learning_rate": 1.7879199954671652e-05, "loss": 1.3665, "step": 54550 }, { "epoch": 9.274179840217576, "grad_norm": 15.706480026245117, "learning_rate": 1.787636693297071e-05, "loss": 1.3068, "step": 54560 }, { "epoch": 9.275879653238144, "grad_norm": 13.9956693649292, "learning_rate": 1.787353391126976e-05, "loss": 1.3691, "step": 54570 }, { "epoch": 9.277579466258711, "grad_norm": 17.539302825927734, "learning_rate": 1.7870700889568816e-05, "loss": 1.4516, "step": 54580 }, { "epoch": 9.27927927927928, "grad_norm": 15.092859268188477, "learning_rate": 1.786786786786787e-05, "loss": 1.6319, "step": 54590 }, { "epoch": 9.280979092299846, "grad_norm": 13.923948287963867, "learning_rate": 1.7865034846166923e-05, "loss": 1.3092, "step": 54600 }, { "epoch": 9.282678905320415, "grad_norm": 16.19135284423828, "learning_rate": 1.7862201824465976e-05, "loss": 1.3602, "step": 54610 }, { "epoch": 9.284378718340982, "grad_norm": 11.683504104614258, "learning_rate": 1.785936880276503e-05, "loss": 1.2733, "step": 54620 }, { "epoch": 9.28607853136155, "grad_norm": 26.73036766052246, "learning_rate": 1.7856535781064083e-05, "loss": 1.3918, "step": 54630 }, { "epoch": 9.287778344382119, "grad_norm": 14.772802352905273, "learning_rate": 1.785370275936314e-05, "loss": 1.3338, "step": 54640 }, { "epoch": 9.289478157402685, "grad_norm": 15.055011749267578, "learning_rate": 1.785086973766219e-05, "loss": 1.3812, "step": 54650 }, { "epoch": 9.291177970423254, "grad_norm": 15.156900405883789, "learning_rate": 1.7848036715961244e-05, "loss": 1.5512, "step": 54660 }, { "epoch": 9.29287778344382, "grad_norm": 24.586915969848633, "learning_rate": 1.78452036942603e-05, "loss": 1.3262, "step": 54670 }, { "epoch": 9.29457759646439, "grad_norm": 35.737518310546875, "learning_rate": 1.7842370672559354e-05, "loss": 1.1954, "step": 54680 }, { "epoch": 9.296277409484956, "grad_norm": 16.345142364501953, "learning_rate": 1.7839537650858404e-05, "loss": 1.388, "step": 54690 }, { "epoch": 9.297977222505525, "grad_norm": 25.32347869873047, "learning_rate": 1.783670462915746e-05, "loss": 1.2303, "step": 54700 }, { "epoch": 9.299677035526091, "grad_norm": 13.406492233276367, "learning_rate": 1.7833871607456515e-05, "loss": 1.4465, "step": 54710 }, { "epoch": 9.30137684854666, "grad_norm": 12.647465705871582, "learning_rate": 1.7831038585755565e-05, "loss": 1.4213, "step": 54720 }, { "epoch": 9.303076661567228, "grad_norm": 13.350075721740723, "learning_rate": 1.7828205564054622e-05, "loss": 1.6319, "step": 54730 }, { "epoch": 9.304776474587795, "grad_norm": 27.189470291137695, "learning_rate": 1.7825372542353675e-05, "loss": 1.2822, "step": 54740 }, { "epoch": 9.306476287608364, "grad_norm": 12.343219757080078, "learning_rate": 1.782253952065273e-05, "loss": 1.3852, "step": 54750 }, { "epoch": 9.30817610062893, "grad_norm": 14.554994583129883, "learning_rate": 1.7819706498951782e-05, "loss": 1.2506, "step": 54760 }, { "epoch": 9.309875913649499, "grad_norm": 52.70374298095703, "learning_rate": 1.7816873477250836e-05, "loss": 1.4233, "step": 54770 }, { "epoch": 9.311575726670066, "grad_norm": 16.6230525970459, "learning_rate": 1.781404045554989e-05, "loss": 1.2511, "step": 54780 }, { "epoch": 9.313275539690634, "grad_norm": 12.668806076049805, "learning_rate": 1.7811207433848946e-05, "loss": 1.2069, "step": 54790 }, { "epoch": 9.314975352711201, "grad_norm": 17.256938934326172, "learning_rate": 1.7808374412147997e-05, "loss": 1.3718, "step": 54800 }, { "epoch": 9.31667516573177, "grad_norm": 14.669392585754395, "learning_rate": 1.780554139044705e-05, "loss": 1.1855, "step": 54810 }, { "epoch": 9.318374978752336, "grad_norm": 27.34807014465332, "learning_rate": 1.7802708368746107e-05, "loss": 1.3016, "step": 54820 }, { "epoch": 9.320074791772905, "grad_norm": 13.972529411315918, "learning_rate": 1.779987534704516e-05, "loss": 1.3277, "step": 54830 }, { "epoch": 9.321774604793474, "grad_norm": 16.688310623168945, "learning_rate": 1.779704232534421e-05, "loss": 1.2173, "step": 54840 }, { "epoch": 9.32347441781404, "grad_norm": 16.793434143066406, "learning_rate": 1.7794209303643268e-05, "loss": 1.3179, "step": 54850 }, { "epoch": 9.325174230834609, "grad_norm": 10.213579177856445, "learning_rate": 1.779137628194232e-05, "loss": 1.3128, "step": 54860 }, { "epoch": 9.326874043855176, "grad_norm": 13.846261024475098, "learning_rate": 1.7788543260241375e-05, "loss": 1.2191, "step": 54870 }, { "epoch": 9.328573856875744, "grad_norm": 15.313227653503418, "learning_rate": 1.7785710238540428e-05, "loss": 1.4654, "step": 54880 }, { "epoch": 9.330273669896311, "grad_norm": 13.39500617980957, "learning_rate": 1.778287721683948e-05, "loss": 1.2259, "step": 54890 }, { "epoch": 9.33197348291688, "grad_norm": 11.170334815979004, "learning_rate": 1.7780044195138535e-05, "loss": 1.4239, "step": 54900 }, { "epoch": 9.333673295937446, "grad_norm": 14.67405891418457, "learning_rate": 1.777721117343759e-05, "loss": 1.2363, "step": 54910 }, { "epoch": 9.335373108958015, "grad_norm": 24.3085994720459, "learning_rate": 1.7774378151736642e-05, "loss": 1.4817, "step": 54920 }, { "epoch": 9.337072921978582, "grad_norm": 14.60227108001709, "learning_rate": 1.7771545130035696e-05, "loss": 1.1263, "step": 54930 }, { "epoch": 9.33877273499915, "grad_norm": 21.52391242980957, "learning_rate": 1.7768712108334753e-05, "loss": 1.2353, "step": 54940 }, { "epoch": 9.340472548019719, "grad_norm": 15.142924308776855, "learning_rate": 1.7765879086633803e-05, "loss": 1.3503, "step": 54950 }, { "epoch": 9.342172361040285, "grad_norm": 14.573707580566406, "learning_rate": 1.7763046064932856e-05, "loss": 1.2774, "step": 54960 }, { "epoch": 9.343872174060854, "grad_norm": 16.747594833374023, "learning_rate": 1.7760213043231913e-05, "loss": 1.398, "step": 54970 }, { "epoch": 9.34557198708142, "grad_norm": 19.598251342773438, "learning_rate": 1.7757380021530967e-05, "loss": 1.3106, "step": 54980 }, { "epoch": 9.34727180010199, "grad_norm": 1180.98974609375, "learning_rate": 1.7754546999830017e-05, "loss": 1.3711, "step": 54990 }, { "epoch": 9.348971613122556, "grad_norm": 14.97021198272705, "learning_rate": 1.7751713978129074e-05, "loss": 1.3068, "step": 55000 }, { "epoch": 9.350671426143125, "grad_norm": 11.409139633178711, "learning_rate": 1.7748880956428127e-05, "loss": 1.4257, "step": 55010 }, { "epoch": 9.352371239163691, "grad_norm": 12.771403312683105, "learning_rate": 1.774604793472718e-05, "loss": 1.3455, "step": 55020 }, { "epoch": 9.35407105218426, "grad_norm": 15.083358764648438, "learning_rate": 1.7743214913026234e-05, "loss": 1.2058, "step": 55030 }, { "epoch": 9.355770865204828, "grad_norm": 14.23222541809082, "learning_rate": 1.7740381891325288e-05, "loss": 1.3472, "step": 55040 }, { "epoch": 9.357470678225395, "grad_norm": 16.13986587524414, "learning_rate": 1.773754886962434e-05, "loss": 1.2683, "step": 55050 }, { "epoch": 9.359170491245964, "grad_norm": 15.984464645385742, "learning_rate": 1.7734715847923395e-05, "loss": 1.3836, "step": 55060 }, { "epoch": 9.36087030426653, "grad_norm": 14.675846099853516, "learning_rate": 1.773188282622245e-05, "loss": 1.158, "step": 55070 }, { "epoch": 9.362570117287099, "grad_norm": 11.503443717956543, "learning_rate": 1.7729049804521502e-05, "loss": 1.3746, "step": 55080 }, { "epoch": 9.364269930307666, "grad_norm": 16.047874450683594, "learning_rate": 1.772621678282056e-05, "loss": 1.4116, "step": 55090 }, { "epoch": 9.365969743328234, "grad_norm": 12.080984115600586, "learning_rate": 1.772338376111961e-05, "loss": 1.2847, "step": 55100 }, { "epoch": 9.367669556348801, "grad_norm": 14.20535659790039, "learning_rate": 1.7720550739418662e-05, "loss": 1.1701, "step": 55110 }, { "epoch": 9.36936936936937, "grad_norm": 11.909639358520508, "learning_rate": 1.771771771771772e-05, "loss": 1.3493, "step": 55120 }, { "epoch": 9.371069182389936, "grad_norm": 20.61135482788086, "learning_rate": 1.7714884696016773e-05, "loss": 1.2833, "step": 55130 }, { "epoch": 9.372768995410505, "grad_norm": 19.67620849609375, "learning_rate": 1.7712051674315823e-05, "loss": 1.3903, "step": 55140 }, { "epoch": 9.374468808431073, "grad_norm": 12.599621772766113, "learning_rate": 1.770921865261488e-05, "loss": 1.573, "step": 55150 }, { "epoch": 9.37616862145164, "grad_norm": 14.629220962524414, "learning_rate": 1.7706385630913933e-05, "loss": 1.2635, "step": 55160 }, { "epoch": 9.377868434472209, "grad_norm": 11.476117134094238, "learning_rate": 1.7703552609212987e-05, "loss": 1.3746, "step": 55170 }, { "epoch": 9.379568247492776, "grad_norm": 17.516630172729492, "learning_rate": 1.770071958751204e-05, "loss": 1.3076, "step": 55180 }, { "epoch": 9.381268060513344, "grad_norm": 15.995636940002441, "learning_rate": 1.7697886565811094e-05, "loss": 1.3279, "step": 55190 }, { "epoch": 9.38296787353391, "grad_norm": 14.42573356628418, "learning_rate": 1.7695053544110148e-05, "loss": 1.3365, "step": 55200 }, { "epoch": 9.38466768655448, "grad_norm": 15.628149032592773, "learning_rate": 1.7692220522409204e-05, "loss": 1.4574, "step": 55210 }, { "epoch": 9.386367499575046, "grad_norm": 23.504079818725586, "learning_rate": 1.7689387500708255e-05, "loss": 1.4118, "step": 55220 }, { "epoch": 9.388067312595615, "grad_norm": 16.617652893066406, "learning_rate": 1.7686554479007308e-05, "loss": 1.2637, "step": 55230 }, { "epoch": 9.389767125616181, "grad_norm": 19.053367614746094, "learning_rate": 1.7683721457306365e-05, "loss": 1.4705, "step": 55240 }, { "epoch": 9.39146693863675, "grad_norm": 14.29977035522461, "learning_rate": 1.7680888435605415e-05, "loss": 1.3848, "step": 55250 }, { "epoch": 9.393166751657319, "grad_norm": 16.369892120361328, "learning_rate": 1.767805541390447e-05, "loss": 1.3303, "step": 55260 }, { "epoch": 9.394866564677885, "grad_norm": 11.829463005065918, "learning_rate": 1.7675222392203526e-05, "loss": 1.5069, "step": 55270 }, { "epoch": 9.396566377698454, "grad_norm": 42.96257781982422, "learning_rate": 1.767238937050258e-05, "loss": 1.4155, "step": 55280 }, { "epoch": 9.39826619071902, "grad_norm": 14.773571968078613, "learning_rate": 1.766955634880163e-05, "loss": 1.4071, "step": 55290 }, { "epoch": 9.39996600373959, "grad_norm": 17.846084594726562, "learning_rate": 1.7666723327100686e-05, "loss": 1.261, "step": 55300 }, { "epoch": 9.401665816760156, "grad_norm": 15.398706436157227, "learning_rate": 1.766389030539974e-05, "loss": 1.3711, "step": 55310 }, { "epoch": 9.403365629780724, "grad_norm": 25.374086380004883, "learning_rate": 1.7661057283698797e-05, "loss": 1.3755, "step": 55320 }, { "epoch": 9.405065442801291, "grad_norm": 25.7300968170166, "learning_rate": 1.7658224261997847e-05, "loss": 1.251, "step": 55330 }, { "epoch": 9.40676525582186, "grad_norm": 11.827119827270508, "learning_rate": 1.76553912402969e-05, "loss": 1.5165, "step": 55340 }, { "epoch": 9.408465068842427, "grad_norm": 20.968355178833008, "learning_rate": 1.7652558218595957e-05, "loss": 1.3622, "step": 55350 }, { "epoch": 9.410164881862995, "grad_norm": 17.958742141723633, "learning_rate": 1.764972519689501e-05, "loss": 1.4634, "step": 55360 }, { "epoch": 9.411864694883564, "grad_norm": 13.1669921875, "learning_rate": 1.764689217519406e-05, "loss": 1.3076, "step": 55370 }, { "epoch": 9.41356450790413, "grad_norm": 18.596067428588867, "learning_rate": 1.7644059153493118e-05, "loss": 1.3906, "step": 55380 }, { "epoch": 9.415264320924699, "grad_norm": 11.508535385131836, "learning_rate": 1.764122613179217e-05, "loss": 1.175, "step": 55390 }, { "epoch": 9.416964133945266, "grad_norm": 12.773887634277344, "learning_rate": 1.763839311009122e-05, "loss": 1.1108, "step": 55400 }, { "epoch": 9.418663946965834, "grad_norm": 13.053357124328613, "learning_rate": 1.7635560088390278e-05, "loss": 1.3049, "step": 55410 }, { "epoch": 9.420363759986401, "grad_norm": 20.740455627441406, "learning_rate": 1.7632727066689332e-05, "loss": 1.2582, "step": 55420 }, { "epoch": 9.42206357300697, "grad_norm": 16.69809341430664, "learning_rate": 1.7629894044988385e-05, "loss": 1.7293, "step": 55430 }, { "epoch": 9.423763386027536, "grad_norm": 15.339371681213379, "learning_rate": 1.762706102328744e-05, "loss": 1.236, "step": 55440 }, { "epoch": 9.425463199048105, "grad_norm": 11.960844039916992, "learning_rate": 1.7624228001586492e-05, "loss": 1.2359, "step": 55450 }, { "epoch": 9.427163012068672, "grad_norm": 11.364815711975098, "learning_rate": 1.7621394979885546e-05, "loss": 1.1409, "step": 55460 }, { "epoch": 9.42886282508924, "grad_norm": 18.402393341064453, "learning_rate": 1.7618561958184603e-05, "loss": 1.4778, "step": 55470 }, { "epoch": 9.430562638109809, "grad_norm": 14.651461601257324, "learning_rate": 1.7615728936483653e-05, "loss": 1.3888, "step": 55480 }, { "epoch": 9.432262451130375, "grad_norm": 16.63093376159668, "learning_rate": 1.7612895914782706e-05, "loss": 1.2177, "step": 55490 }, { "epoch": 9.433962264150944, "grad_norm": 15.593648910522461, "learning_rate": 1.7610062893081763e-05, "loss": 1.2435, "step": 55500 }, { "epoch": 9.43566207717151, "grad_norm": 12.563055992126465, "learning_rate": 1.7607229871380817e-05, "loss": 1.3293, "step": 55510 }, { "epoch": 9.43736189019208, "grad_norm": 26.5516300201416, "learning_rate": 1.7604396849679867e-05, "loss": 1.302, "step": 55520 }, { "epoch": 9.439061703212646, "grad_norm": 10.741913795471191, "learning_rate": 1.7601563827978924e-05, "loss": 1.3907, "step": 55530 }, { "epoch": 9.440761516233215, "grad_norm": 18.908788681030273, "learning_rate": 1.7598730806277977e-05, "loss": 1.1529, "step": 55540 }, { "epoch": 9.442461329253781, "grad_norm": 16.17289161682129, "learning_rate": 1.759589778457703e-05, "loss": 1.3378, "step": 55550 }, { "epoch": 9.44416114227435, "grad_norm": 16.265491485595703, "learning_rate": 1.7593064762876085e-05, "loss": 1.3038, "step": 55560 }, { "epoch": 9.445860955294918, "grad_norm": 12.20537281036377, "learning_rate": 1.7590231741175138e-05, "loss": 1.1921, "step": 55570 }, { "epoch": 9.447560768315485, "grad_norm": 16.90904998779297, "learning_rate": 1.758739871947419e-05, "loss": 1.2564, "step": 55580 }, { "epoch": 9.449260581336054, "grad_norm": 14.783689498901367, "learning_rate": 1.7584565697773245e-05, "loss": 1.2451, "step": 55590 }, { "epoch": 9.45096039435662, "grad_norm": 13.741411209106445, "learning_rate": 1.75817326760723e-05, "loss": 1.4314, "step": 55600 }, { "epoch": 9.452660207377189, "grad_norm": 21.38789176940918, "learning_rate": 1.7578899654371352e-05, "loss": 1.7531, "step": 55610 }, { "epoch": 9.454360020397756, "grad_norm": 14.835077285766602, "learning_rate": 1.757606663267041e-05, "loss": 1.4079, "step": 55620 }, { "epoch": 9.456059833418324, "grad_norm": 12.461600303649902, "learning_rate": 1.757323361096946e-05, "loss": 1.1411, "step": 55630 }, { "epoch": 9.457759646438891, "grad_norm": 17.707250595092773, "learning_rate": 1.7570400589268513e-05, "loss": 1.2645, "step": 55640 }, { "epoch": 9.45945945945946, "grad_norm": 20.869903564453125, "learning_rate": 1.756756756756757e-05, "loss": 1.4056, "step": 55650 }, { "epoch": 9.461159272480026, "grad_norm": 15.243109703063965, "learning_rate": 1.7564734545866623e-05, "loss": 1.3913, "step": 55660 }, { "epoch": 9.462859085500595, "grad_norm": 12.96111011505127, "learning_rate": 1.7561901524165673e-05, "loss": 1.4512, "step": 55670 }, { "epoch": 9.464558898521163, "grad_norm": 13.270390510559082, "learning_rate": 1.755906850246473e-05, "loss": 1.2009, "step": 55680 }, { "epoch": 9.46625871154173, "grad_norm": 11.687577247619629, "learning_rate": 1.7556235480763784e-05, "loss": 1.491, "step": 55690 }, { "epoch": 9.467958524562299, "grad_norm": 12.321331977844238, "learning_rate": 1.7553402459062837e-05, "loss": 1.3393, "step": 55700 }, { "epoch": 9.469658337582866, "grad_norm": 15.634649276733398, "learning_rate": 1.755056943736189e-05, "loss": 1.2046, "step": 55710 }, { "epoch": 9.471358150603434, "grad_norm": 15.487112998962402, "learning_rate": 1.7547736415660944e-05, "loss": 1.3102, "step": 55720 }, { "epoch": 9.473057963624, "grad_norm": 14.636943817138672, "learning_rate": 1.7544903393959998e-05, "loss": 1.2973, "step": 55730 }, { "epoch": 9.47475777664457, "grad_norm": 14.78079891204834, "learning_rate": 1.754207037225905e-05, "loss": 1.494, "step": 55740 }, { "epoch": 9.476457589665136, "grad_norm": 19.878000259399414, "learning_rate": 1.7539237350558105e-05, "loss": 1.2631, "step": 55750 }, { "epoch": 9.478157402685705, "grad_norm": 14.820108413696289, "learning_rate": 1.753640432885716e-05, "loss": 1.3318, "step": 55760 }, { "epoch": 9.479857215706271, "grad_norm": 15.269723892211914, "learning_rate": 1.7533571307156215e-05, "loss": 1.4414, "step": 55770 }, { "epoch": 9.48155702872684, "grad_norm": 15.180267333984375, "learning_rate": 1.7530738285455265e-05, "loss": 1.3837, "step": 55780 }, { "epoch": 9.483256841747409, "grad_norm": 18.148237228393555, "learning_rate": 1.752790526375432e-05, "loss": 1.1472, "step": 55790 }, { "epoch": 9.484956654767975, "grad_norm": 13.208902359008789, "learning_rate": 1.7525072242053376e-05, "loss": 1.2715, "step": 55800 }, { "epoch": 9.486656467788544, "grad_norm": 10.891918182373047, "learning_rate": 1.752223922035243e-05, "loss": 1.5329, "step": 55810 }, { "epoch": 9.48835628080911, "grad_norm": 21.414602279663086, "learning_rate": 1.751940619865148e-05, "loss": 1.3642, "step": 55820 }, { "epoch": 9.49005609382968, "grad_norm": 20.938432693481445, "learning_rate": 1.7516573176950536e-05, "loss": 1.3427, "step": 55830 }, { "epoch": 9.491755906850246, "grad_norm": 27.858919143676758, "learning_rate": 1.751374015524959e-05, "loss": 1.4475, "step": 55840 }, { "epoch": 9.493455719870814, "grad_norm": 15.48620319366455, "learning_rate": 1.7510907133548643e-05, "loss": 1.3769, "step": 55850 }, { "epoch": 9.495155532891381, "grad_norm": 12.751924514770508, "learning_rate": 1.7508074111847697e-05, "loss": 1.2463, "step": 55860 }, { "epoch": 9.49685534591195, "grad_norm": 13.82418155670166, "learning_rate": 1.750524109014675e-05, "loss": 1.3257, "step": 55870 }, { "epoch": 9.498555158932518, "grad_norm": 14.135275840759277, "learning_rate": 1.7502408068445804e-05, "loss": 1.0096, "step": 55880 }, { "epoch": 9.500254971953085, "grad_norm": 12.10538387298584, "learning_rate": 1.749957504674486e-05, "loss": 1.4799, "step": 55890 }, { "epoch": 9.501954784973654, "grad_norm": 14.300698280334473, "learning_rate": 1.749674202504391e-05, "loss": 1.349, "step": 55900 }, { "epoch": 9.50365459799422, "grad_norm": 21.115276336669922, "learning_rate": 1.7493909003342965e-05, "loss": 1.2754, "step": 55910 }, { "epoch": 9.505354411014789, "grad_norm": 12.616503715515137, "learning_rate": 1.749107598164202e-05, "loss": 1.2907, "step": 55920 }, { "epoch": 9.507054224035356, "grad_norm": 12.03853988647461, "learning_rate": 1.748824295994107e-05, "loss": 1.2858, "step": 55930 }, { "epoch": 9.508754037055924, "grad_norm": 12.73962688446045, "learning_rate": 1.7485409938240125e-05, "loss": 1.3345, "step": 55940 }, { "epoch": 9.510453850076491, "grad_norm": 19.169801712036133, "learning_rate": 1.7482576916539182e-05, "loss": 1.2824, "step": 55950 }, { "epoch": 9.51215366309706, "grad_norm": 18.162948608398438, "learning_rate": 1.7479743894838236e-05, "loss": 1.2296, "step": 55960 }, { "epoch": 9.513853476117626, "grad_norm": 14.716899871826172, "learning_rate": 1.7476910873137286e-05, "loss": 1.5084, "step": 55970 }, { "epoch": 9.515553289138195, "grad_norm": 12.899076461791992, "learning_rate": 1.7474077851436343e-05, "loss": 1.3031, "step": 55980 }, { "epoch": 9.517253102158762, "grad_norm": 20.88075065612793, "learning_rate": 1.7471244829735396e-05, "loss": 1.3331, "step": 55990 }, { "epoch": 9.51895291517933, "grad_norm": 15.05825424194336, "learning_rate": 1.746841180803445e-05, "loss": 1.4515, "step": 56000 }, { "epoch": 9.520652728199899, "grad_norm": 12.729863166809082, "learning_rate": 1.7465578786333503e-05, "loss": 1.3926, "step": 56010 }, { "epoch": 9.522352541220465, "grad_norm": 15.235644340515137, "learning_rate": 1.7462745764632557e-05, "loss": 1.3423, "step": 56020 }, { "epoch": 9.524052354241034, "grad_norm": 16.45903778076172, "learning_rate": 1.745991274293161e-05, "loss": 1.2496, "step": 56030 }, { "epoch": 9.5257521672616, "grad_norm": 16.261844635009766, "learning_rate": 1.7457079721230667e-05, "loss": 1.235, "step": 56040 }, { "epoch": 9.52745198028217, "grad_norm": 10.83021068572998, "learning_rate": 1.7454246699529717e-05, "loss": 1.629, "step": 56050 }, { "epoch": 9.529151793302736, "grad_norm": 10.860315322875977, "learning_rate": 1.7451413677828774e-05, "loss": 1.173, "step": 56060 }, { "epoch": 9.530851606323305, "grad_norm": 18.772653579711914, "learning_rate": 1.7448580656127828e-05, "loss": 1.2647, "step": 56070 }, { "epoch": 9.532551419343871, "grad_norm": 11.431276321411133, "learning_rate": 1.744574763442688e-05, "loss": 1.5444, "step": 56080 }, { "epoch": 9.53425123236444, "grad_norm": 13.811397552490234, "learning_rate": 1.7442914612725935e-05, "loss": 1.2807, "step": 56090 }, { "epoch": 9.535951045385008, "grad_norm": 21.473438262939453, "learning_rate": 1.7440081591024988e-05, "loss": 1.4751, "step": 56100 }, { "epoch": 9.537650858405575, "grad_norm": 12.974053382873535, "learning_rate": 1.7437248569324042e-05, "loss": 1.2162, "step": 56110 }, { "epoch": 9.539350671426144, "grad_norm": 14.167863845825195, "learning_rate": 1.7434415547623095e-05, "loss": 1.6179, "step": 56120 }, { "epoch": 9.54105048444671, "grad_norm": 14.727156639099121, "learning_rate": 1.743158252592215e-05, "loss": 1.2267, "step": 56130 }, { "epoch": 9.542750297467279, "grad_norm": 14.931554794311523, "learning_rate": 1.7428749504221202e-05, "loss": 1.4417, "step": 56140 }, { "epoch": 9.544450110487846, "grad_norm": 12.866047859191895, "learning_rate": 1.742591648252026e-05, "loss": 1.2835, "step": 56150 }, { "epoch": 9.546149923508414, "grad_norm": 21.354455947875977, "learning_rate": 1.742308346081931e-05, "loss": 1.2425, "step": 56160 }, { "epoch": 9.547849736528981, "grad_norm": 16.905044555664062, "learning_rate": 1.7420250439118363e-05, "loss": 1.4872, "step": 56170 }, { "epoch": 9.54954954954955, "grad_norm": 17.21470069885254, "learning_rate": 1.741741741741742e-05, "loss": 1.4134, "step": 56180 }, { "epoch": 9.551249362570116, "grad_norm": 14.507037162780762, "learning_rate": 1.7414584395716473e-05, "loss": 1.2496, "step": 56190 }, { "epoch": 9.552949175590685, "grad_norm": 10.956923484802246, "learning_rate": 1.7411751374015523e-05, "loss": 1.366, "step": 56200 }, { "epoch": 9.554648988611254, "grad_norm": 11.78067398071289, "learning_rate": 1.740891835231458e-05, "loss": 1.5254, "step": 56210 }, { "epoch": 9.55634880163182, "grad_norm": 16.529966354370117, "learning_rate": 1.7406085330613634e-05, "loss": 1.6219, "step": 56220 }, { "epoch": 9.558048614652389, "grad_norm": 25.48668670654297, "learning_rate": 1.7403252308912687e-05, "loss": 1.1981, "step": 56230 }, { "epoch": 9.559748427672956, "grad_norm": 14.493003845214844, "learning_rate": 1.740041928721174e-05, "loss": 1.2896, "step": 56240 }, { "epoch": 9.561448240693524, "grad_norm": 11.776716232299805, "learning_rate": 1.7397586265510794e-05, "loss": 1.3271, "step": 56250 }, { "epoch": 9.563148053714091, "grad_norm": 12.526420593261719, "learning_rate": 1.7394753243809848e-05, "loss": 1.3564, "step": 56260 }, { "epoch": 9.56484786673466, "grad_norm": 13.26523494720459, "learning_rate": 1.73919202221089e-05, "loss": 1.4013, "step": 56270 }, { "epoch": 9.566547679755226, "grad_norm": 17.633289337158203, "learning_rate": 1.7389087200407955e-05, "loss": 1.2933, "step": 56280 }, { "epoch": 9.568247492775795, "grad_norm": 12.604182243347168, "learning_rate": 1.738625417870701e-05, "loss": 1.3373, "step": 56290 }, { "epoch": 9.569947305796362, "grad_norm": 11.682863235473633, "learning_rate": 1.7383421157006065e-05, "loss": 1.3252, "step": 56300 }, { "epoch": 9.57164711881693, "grad_norm": 15.922856330871582, "learning_rate": 1.7380588135305116e-05, "loss": 1.4635, "step": 56310 }, { "epoch": 9.573346931837499, "grad_norm": 14.250843048095703, "learning_rate": 1.737775511360417e-05, "loss": 1.4465, "step": 56320 }, { "epoch": 9.575046744858065, "grad_norm": 16.928672790527344, "learning_rate": 1.7374922091903226e-05, "loss": 1.372, "step": 56330 }, { "epoch": 9.576746557878634, "grad_norm": 14.523409843444824, "learning_rate": 1.737208907020228e-05, "loss": 1.389, "step": 56340 }, { "epoch": 9.5784463708992, "grad_norm": 16.056991577148438, "learning_rate": 1.736925604850133e-05, "loss": 1.5647, "step": 56350 }, { "epoch": 9.58014618391977, "grad_norm": 13.890312194824219, "learning_rate": 1.7366423026800387e-05, "loss": 1.1792, "step": 56360 }, { "epoch": 9.581845996940336, "grad_norm": 15.157037734985352, "learning_rate": 1.736359000509944e-05, "loss": 1.2731, "step": 56370 }, { "epoch": 9.583545809960905, "grad_norm": 19.023542404174805, "learning_rate": 1.7360756983398494e-05, "loss": 1.3952, "step": 56380 }, { "epoch": 9.585245622981471, "grad_norm": 17.80742835998535, "learning_rate": 1.7357923961697547e-05, "loss": 1.3242, "step": 56390 }, { "epoch": 9.58694543600204, "grad_norm": 11.112422943115234, "learning_rate": 1.73550909399966e-05, "loss": 1.4994, "step": 56400 }, { "epoch": 9.588645249022608, "grad_norm": 13.729632377624512, "learning_rate": 1.7352257918295654e-05, "loss": 1.3785, "step": 56410 }, { "epoch": 9.590345062043175, "grad_norm": 16.588769912719727, "learning_rate": 1.734942489659471e-05, "loss": 1.355, "step": 56420 }, { "epoch": 9.592044875063744, "grad_norm": 16.029272079467773, "learning_rate": 1.734659187489376e-05, "loss": 1.2967, "step": 56430 }, { "epoch": 9.59374468808431, "grad_norm": 16.00800895690918, "learning_rate": 1.7343758853192815e-05, "loss": 1.2893, "step": 56440 }, { "epoch": 9.595444501104879, "grad_norm": 18.450225830078125, "learning_rate": 1.734092583149187e-05, "loss": 1.3935, "step": 56450 }, { "epoch": 9.597144314125446, "grad_norm": 13.345033645629883, "learning_rate": 1.7338092809790922e-05, "loss": 1.4891, "step": 56460 }, { "epoch": 9.598844127146014, "grad_norm": 14.468534469604492, "learning_rate": 1.7335259788089975e-05, "loss": 1.3362, "step": 56470 }, { "epoch": 9.600543940166581, "grad_norm": 16.494016647338867, "learning_rate": 1.7332426766389032e-05, "loss": 1.3576, "step": 56480 }, { "epoch": 9.60224375318715, "grad_norm": 18.041486740112305, "learning_rate": 1.7329593744688086e-05, "loss": 1.4742, "step": 56490 }, { "epoch": 9.603943566207716, "grad_norm": 39.60988998413086, "learning_rate": 1.7326760722987136e-05, "loss": 1.4219, "step": 56500 }, { "epoch": 9.605643379228285, "grad_norm": 15.152480125427246, "learning_rate": 1.7323927701286193e-05, "loss": 1.3772, "step": 56510 }, { "epoch": 9.607343192248852, "grad_norm": 10.452275276184082, "learning_rate": 1.7321094679585246e-05, "loss": 1.3938, "step": 56520 }, { "epoch": 9.60904300526942, "grad_norm": 22.46470069885254, "learning_rate": 1.73182616578843e-05, "loss": 1.2961, "step": 56530 }, { "epoch": 9.610742818289989, "grad_norm": 16.01055145263672, "learning_rate": 1.7315428636183353e-05, "loss": 1.1409, "step": 56540 }, { "epoch": 9.612442631310556, "grad_norm": 15.988120079040527, "learning_rate": 1.7312595614482407e-05, "loss": 1.3613, "step": 56550 }, { "epoch": 9.614142444331124, "grad_norm": 22.405248641967773, "learning_rate": 1.730976259278146e-05, "loss": 1.2632, "step": 56560 }, { "epoch": 9.61584225735169, "grad_norm": 14.775259971618652, "learning_rate": 1.7306929571080517e-05, "loss": 1.3869, "step": 56570 }, { "epoch": 9.61754207037226, "grad_norm": 14.33281421661377, "learning_rate": 1.7304096549379567e-05, "loss": 1.3409, "step": 56580 }, { "epoch": 9.619241883392826, "grad_norm": 16.929563522338867, "learning_rate": 1.730126352767862e-05, "loss": 1.3188, "step": 56590 }, { "epoch": 9.620941696413395, "grad_norm": 12.862616539001465, "learning_rate": 1.7298430505977678e-05, "loss": 1.4807, "step": 56600 }, { "epoch": 9.622641509433961, "grad_norm": 12.203927993774414, "learning_rate": 1.7295597484276728e-05, "loss": 1.2973, "step": 56610 }, { "epoch": 9.62434132245453, "grad_norm": 16.581283569335938, "learning_rate": 1.729276446257578e-05, "loss": 1.1449, "step": 56620 }, { "epoch": 9.626041135475099, "grad_norm": 12.104817390441895, "learning_rate": 1.728993144087484e-05, "loss": 1.3464, "step": 56630 }, { "epoch": 9.627740948495665, "grad_norm": 10.987459182739258, "learning_rate": 1.7287098419173892e-05, "loss": 1.3474, "step": 56640 }, { "epoch": 9.629440761516234, "grad_norm": 12.53663158416748, "learning_rate": 1.7284265397472942e-05, "loss": 1.314, "step": 56650 }, { "epoch": 9.6311405745368, "grad_norm": 12.61436939239502, "learning_rate": 1.7281432375772e-05, "loss": 1.1318, "step": 56660 }, { "epoch": 9.63284038755737, "grad_norm": 21.349002838134766, "learning_rate": 1.7278599354071053e-05, "loss": 1.5221, "step": 56670 }, { "epoch": 9.634540200577936, "grad_norm": 14.495308876037598, "learning_rate": 1.7275766332370106e-05, "loss": 1.5108, "step": 56680 }, { "epoch": 9.636240013598504, "grad_norm": 16.878435134887695, "learning_rate": 1.727293331066916e-05, "loss": 1.4408, "step": 56690 }, { "epoch": 9.637939826619071, "grad_norm": 12.73630142211914, "learning_rate": 1.7270100288968213e-05, "loss": 1.3324, "step": 56700 }, { "epoch": 9.63963963963964, "grad_norm": 16.943151473999023, "learning_rate": 1.7267267267267267e-05, "loss": 1.4376, "step": 56710 }, { "epoch": 9.641339452660208, "grad_norm": 28.92561912536621, "learning_rate": 1.7264434245566324e-05, "loss": 1.1828, "step": 56720 }, { "epoch": 9.643039265680775, "grad_norm": 19.209814071655273, "learning_rate": 1.7261601223865374e-05, "loss": 1.4019, "step": 56730 }, { "epoch": 9.644739078701344, "grad_norm": 12.456279754638672, "learning_rate": 1.7258768202164427e-05, "loss": 1.2796, "step": 56740 }, { "epoch": 9.64643889172191, "grad_norm": 17.21248435974121, "learning_rate": 1.7255935180463484e-05, "loss": 1.1065, "step": 56750 }, { "epoch": 9.648138704742479, "grad_norm": 9.263657569885254, "learning_rate": 1.7253102158762538e-05, "loss": 1.3387, "step": 56760 }, { "epoch": 9.649838517763046, "grad_norm": 10.376019477844238, "learning_rate": 1.725026913706159e-05, "loss": 1.4756, "step": 56770 }, { "epoch": 9.651538330783614, "grad_norm": 13.645231246948242, "learning_rate": 1.7247436115360645e-05, "loss": 1.2652, "step": 56780 }, { "epoch": 9.653238143804181, "grad_norm": 15.78387451171875, "learning_rate": 1.7244603093659698e-05, "loss": 1.4678, "step": 56790 }, { "epoch": 9.65493795682475, "grad_norm": 17.178544998168945, "learning_rate": 1.7241770071958752e-05, "loss": 1.5677, "step": 56800 }, { "epoch": 9.656637769845316, "grad_norm": 13.364766120910645, "learning_rate": 1.7238937050257805e-05, "loss": 1.3589, "step": 56810 }, { "epoch": 9.658337582865885, "grad_norm": 18.1573429107666, "learning_rate": 1.723610402855686e-05, "loss": 1.474, "step": 56820 }, { "epoch": 9.660037395886452, "grad_norm": 14.203916549682617, "learning_rate": 1.7233271006855916e-05, "loss": 1.4689, "step": 56830 }, { "epoch": 9.66173720890702, "grad_norm": 14.998076438903809, "learning_rate": 1.7230437985154966e-05, "loss": 1.3869, "step": 56840 }, { "epoch": 9.663437021927589, "grad_norm": 17.559154510498047, "learning_rate": 1.722760496345402e-05, "loss": 1.0739, "step": 56850 }, { "epoch": 9.665136834948155, "grad_norm": 15.532888412475586, "learning_rate": 1.7224771941753076e-05, "loss": 1.2897, "step": 56860 }, { "epoch": 9.666836647968724, "grad_norm": 12.670088768005371, "learning_rate": 1.722193892005213e-05, "loss": 1.2024, "step": 56870 }, { "epoch": 9.66853646098929, "grad_norm": 12.360231399536133, "learning_rate": 1.721910589835118e-05, "loss": 1.3367, "step": 56880 }, { "epoch": 9.67023627400986, "grad_norm": 20.786041259765625, "learning_rate": 1.7216272876650237e-05, "loss": 1.1844, "step": 56890 }, { "epoch": 9.671936087030426, "grad_norm": 16.7069149017334, "learning_rate": 1.721343985494929e-05, "loss": 1.2571, "step": 56900 }, { "epoch": 9.673635900050995, "grad_norm": 17.25033950805664, "learning_rate": 1.7210606833248344e-05, "loss": 1.3667, "step": 56910 }, { "epoch": 9.675335713071561, "grad_norm": 17.604963302612305, "learning_rate": 1.7207773811547397e-05, "loss": 1.2934, "step": 56920 }, { "epoch": 9.67703552609213, "grad_norm": 13.11767864227295, "learning_rate": 1.720494078984645e-05, "loss": 1.3992, "step": 56930 }, { "epoch": 9.678735339112698, "grad_norm": 16.372451782226562, "learning_rate": 1.7202107768145504e-05, "loss": 1.4044, "step": 56940 }, { "epoch": 9.680435152133265, "grad_norm": 13.44831657409668, "learning_rate": 1.7199274746444558e-05, "loss": 1.4679, "step": 56950 }, { "epoch": 9.682134965153834, "grad_norm": 13.392455101013184, "learning_rate": 1.719644172474361e-05, "loss": 1.4234, "step": 56960 }, { "epoch": 9.6838347781744, "grad_norm": 12.689203262329102, "learning_rate": 1.7193608703042665e-05, "loss": 1.1173, "step": 56970 }, { "epoch": 9.685534591194969, "grad_norm": 16.468975067138672, "learning_rate": 1.7190775681341722e-05, "loss": 1.2571, "step": 56980 }, { "epoch": 9.687234404215536, "grad_norm": 13.630876541137695, "learning_rate": 1.7187942659640772e-05, "loss": 1.504, "step": 56990 }, { "epoch": 9.688934217236104, "grad_norm": 18.341081619262695, "learning_rate": 1.7185109637939826e-05, "loss": 1.2784, "step": 57000 }, { "epoch": 9.690634030256671, "grad_norm": 14.518448829650879, "learning_rate": 1.7182276616238882e-05, "loss": 1.3959, "step": 57010 }, { "epoch": 9.69233384327724, "grad_norm": 13.2869291305542, "learning_rate": 1.7179443594537936e-05, "loss": 1.1867, "step": 57020 }, { "epoch": 9.694033656297806, "grad_norm": 12.194459915161133, "learning_rate": 1.7176610572836986e-05, "loss": 1.5091, "step": 57030 }, { "epoch": 9.695733469318375, "grad_norm": 21.303836822509766, "learning_rate": 1.7173777551136043e-05, "loss": 1.2449, "step": 57040 }, { "epoch": 9.697433282338944, "grad_norm": 12.341155052185059, "learning_rate": 1.7170944529435097e-05, "loss": 1.6071, "step": 57050 }, { "epoch": 9.69913309535951, "grad_norm": 18.74759292602539, "learning_rate": 1.716811150773415e-05, "loss": 1.4074, "step": 57060 }, { "epoch": 9.700832908380079, "grad_norm": 11.2924165725708, "learning_rate": 1.7165278486033204e-05, "loss": 1.2887, "step": 57070 }, { "epoch": 9.702532721400646, "grad_norm": 24.890640258789062, "learning_rate": 1.7162445464332257e-05, "loss": 1.2964, "step": 57080 }, { "epoch": 9.704232534421214, "grad_norm": 11.888025283813477, "learning_rate": 1.715961244263131e-05, "loss": 1.4153, "step": 57090 }, { "epoch": 9.705932347441781, "grad_norm": 25.075828552246094, "learning_rate": 1.7156779420930368e-05, "loss": 1.3405, "step": 57100 }, { "epoch": 9.70763216046235, "grad_norm": 36.13197708129883, "learning_rate": 1.7153946399229418e-05, "loss": 1.2752, "step": 57110 }, { "epoch": 9.709331973482916, "grad_norm": 14.582451820373535, "learning_rate": 1.715111337752847e-05, "loss": 1.4569, "step": 57120 }, { "epoch": 9.711031786503485, "grad_norm": 17.9534969329834, "learning_rate": 1.7148280355827528e-05, "loss": 1.36, "step": 57130 }, { "epoch": 9.712731599524052, "grad_norm": 15.15922737121582, "learning_rate": 1.7145447334126578e-05, "loss": 0.996, "step": 57140 }, { "epoch": 9.71443141254462, "grad_norm": 12.471165657043457, "learning_rate": 1.7142614312425632e-05, "loss": 1.2913, "step": 57150 }, { "epoch": 9.716131225565189, "grad_norm": 14.464344024658203, "learning_rate": 1.713978129072469e-05, "loss": 1.4754, "step": 57160 }, { "epoch": 9.717831038585755, "grad_norm": 16.00356674194336, "learning_rate": 1.7136948269023742e-05, "loss": 1.2916, "step": 57170 }, { "epoch": 9.719530851606324, "grad_norm": 14.551353454589844, "learning_rate": 1.7134115247322792e-05, "loss": 1.4833, "step": 57180 }, { "epoch": 9.72123066462689, "grad_norm": 17.485273361206055, "learning_rate": 1.713128222562185e-05, "loss": 1.4056, "step": 57190 }, { "epoch": 9.72293047764746, "grad_norm": 14.820096969604492, "learning_rate": 1.7128449203920903e-05, "loss": 1.3849, "step": 57200 }, { "epoch": 9.724630290668026, "grad_norm": 13.115395545959473, "learning_rate": 1.7125616182219956e-05, "loss": 1.2512, "step": 57210 }, { "epoch": 9.726330103688595, "grad_norm": 12.825043678283691, "learning_rate": 1.712278316051901e-05, "loss": 1.2243, "step": 57220 }, { "epoch": 9.728029916709161, "grad_norm": 16.994430541992188, "learning_rate": 1.7119950138818063e-05, "loss": 1.2246, "step": 57230 }, { "epoch": 9.72972972972973, "grad_norm": 15.309162139892578, "learning_rate": 1.7117117117117117e-05, "loss": 1.2828, "step": 57240 }, { "epoch": 9.731429542750298, "grad_norm": 13.761688232421875, "learning_rate": 1.7114284095416174e-05, "loss": 1.4713, "step": 57250 }, { "epoch": 9.733129355770865, "grad_norm": 21.154186248779297, "learning_rate": 1.7111451073715224e-05, "loss": 1.1919, "step": 57260 }, { "epoch": 9.734829168791434, "grad_norm": 12.8450288772583, "learning_rate": 1.7108618052014277e-05, "loss": 1.2281, "step": 57270 }, { "epoch": 9.736528981812, "grad_norm": 19.52410125732422, "learning_rate": 1.7105785030313334e-05, "loss": 1.3293, "step": 57280 }, { "epoch": 9.738228794832569, "grad_norm": 18.739534378051758, "learning_rate": 1.7102952008612388e-05, "loss": 1.3068, "step": 57290 }, { "epoch": 9.739928607853136, "grad_norm": 16.369319915771484, "learning_rate": 1.7100118986911438e-05, "loss": 1.4906, "step": 57300 }, { "epoch": 9.741628420873704, "grad_norm": 16.18627166748047, "learning_rate": 1.7097285965210495e-05, "loss": 1.4161, "step": 57310 }, { "epoch": 9.743328233894271, "grad_norm": 16.88880157470703, "learning_rate": 1.709445294350955e-05, "loss": 1.4048, "step": 57320 }, { "epoch": 9.74502804691484, "grad_norm": 13.887529373168945, "learning_rate": 1.70916199218086e-05, "loss": 1.6469, "step": 57330 }, { "epoch": 9.746727859935406, "grad_norm": 16.29050636291504, "learning_rate": 1.7088786900107655e-05, "loss": 1.354, "step": 57340 }, { "epoch": 9.748427672955975, "grad_norm": 14.426860809326172, "learning_rate": 1.708595387840671e-05, "loss": 1.4543, "step": 57350 }, { "epoch": 9.750127485976542, "grad_norm": 18.076074600219727, "learning_rate": 1.7083120856705762e-05, "loss": 1.4483, "step": 57360 }, { "epoch": 9.75182729899711, "grad_norm": 10.627419471740723, "learning_rate": 1.7080287835004816e-05, "loss": 1.241, "step": 57370 }, { "epoch": 9.753527112017679, "grad_norm": 10.907903671264648, "learning_rate": 1.707745481330387e-05, "loss": 1.5485, "step": 57380 }, { "epoch": 9.755226925038246, "grad_norm": 11.359594345092773, "learning_rate": 1.7074621791602923e-05, "loss": 1.4091, "step": 57390 }, { "epoch": 9.756926738058814, "grad_norm": 17.39885139465332, "learning_rate": 1.707178876990198e-05, "loss": 1.0906, "step": 57400 }, { "epoch": 9.75862655107938, "grad_norm": 15.234336853027344, "learning_rate": 1.706895574820103e-05, "loss": 1.4296, "step": 57410 }, { "epoch": 9.76032636409995, "grad_norm": 19.078277587890625, "learning_rate": 1.7066122726500084e-05, "loss": 1.4468, "step": 57420 }, { "epoch": 9.762026177120516, "grad_norm": 16.789215087890625, "learning_rate": 1.706328970479914e-05, "loss": 1.3266, "step": 57430 }, { "epoch": 9.763725990141085, "grad_norm": 16.257673263549805, "learning_rate": 1.7060456683098194e-05, "loss": 1.2755, "step": 57440 }, { "epoch": 9.765425803161651, "grad_norm": 25.399282455444336, "learning_rate": 1.7057623661397244e-05, "loss": 1.336, "step": 57450 }, { "epoch": 9.76712561618222, "grad_norm": 19.174949645996094, "learning_rate": 1.70547906396963e-05, "loss": 1.2866, "step": 57460 }, { "epoch": 9.768825429202789, "grad_norm": 25.198341369628906, "learning_rate": 1.7051957617995355e-05, "loss": 1.3492, "step": 57470 }, { "epoch": 9.770525242223355, "grad_norm": 15.032561302185059, "learning_rate": 1.7049124596294408e-05, "loss": 1.3903, "step": 57480 }, { "epoch": 9.772225055243924, "grad_norm": 17.461214065551758, "learning_rate": 1.704629157459346e-05, "loss": 1.3045, "step": 57490 }, { "epoch": 9.77392486826449, "grad_norm": 13.944259643554688, "learning_rate": 1.7043458552892515e-05, "loss": 1.2308, "step": 57500 }, { "epoch": 9.77562468128506, "grad_norm": 13.292868614196777, "learning_rate": 1.7040625531191572e-05, "loss": 1.3097, "step": 57510 }, { "epoch": 9.777324494305626, "grad_norm": 12.344544410705566, "learning_rate": 1.7037792509490622e-05, "loss": 1.2042, "step": 57520 }, { "epoch": 9.779024307326194, "grad_norm": 19.559659957885742, "learning_rate": 1.7034959487789676e-05, "loss": 1.1104, "step": 57530 }, { "epoch": 9.780724120346761, "grad_norm": 19.486032485961914, "learning_rate": 1.7032126466088733e-05, "loss": 1.3313, "step": 57540 }, { "epoch": 9.78242393336733, "grad_norm": 14.119696617126465, "learning_rate": 1.7029293444387786e-05, "loss": 1.3871, "step": 57550 }, { "epoch": 9.784123746387897, "grad_norm": 14.740527153015137, "learning_rate": 1.7026460422686836e-05, "loss": 1.4635, "step": 57560 }, { "epoch": 9.785823559408465, "grad_norm": 13.18557071685791, "learning_rate": 1.7023627400985893e-05, "loss": 1.3122, "step": 57570 }, { "epoch": 9.787523372429034, "grad_norm": 16.853282928466797, "learning_rate": 1.7020794379284947e-05, "loss": 1.3917, "step": 57580 }, { "epoch": 9.7892231854496, "grad_norm": 18.85413360595703, "learning_rate": 1.7017961357584e-05, "loss": 1.2748, "step": 57590 }, { "epoch": 9.790922998470169, "grad_norm": 27.47248649597168, "learning_rate": 1.7015128335883054e-05, "loss": 1.0968, "step": 57600 }, { "epoch": 9.792622811490736, "grad_norm": 15.620461463928223, "learning_rate": 1.7012295314182107e-05, "loss": 1.3743, "step": 57610 }, { "epoch": 9.794322624511304, "grad_norm": 12.008535385131836, "learning_rate": 1.700946229248116e-05, "loss": 1.5258, "step": 57620 }, { "epoch": 9.796022437531871, "grad_norm": 14.542524337768555, "learning_rate": 1.7006629270780218e-05, "loss": 1.445, "step": 57630 }, { "epoch": 9.79772225055244, "grad_norm": 16.056190490722656, "learning_rate": 1.7003796249079268e-05, "loss": 1.3863, "step": 57640 }, { "epoch": 9.799422063573006, "grad_norm": 14.102243423461914, "learning_rate": 1.700096322737832e-05, "loss": 1.3516, "step": 57650 }, { "epoch": 9.801121876593575, "grad_norm": 16.70928382873535, "learning_rate": 1.699813020567738e-05, "loss": 1.3322, "step": 57660 }, { "epoch": 9.802821689614142, "grad_norm": 16.80184555053711, "learning_rate": 1.699529718397643e-05, "loss": 1.341, "step": 57670 }, { "epoch": 9.80452150263471, "grad_norm": 13.490674018859863, "learning_rate": 1.6992464162275482e-05, "loss": 1.4859, "step": 57680 }, { "epoch": 9.806221315655279, "grad_norm": 13.390007019042969, "learning_rate": 1.698963114057454e-05, "loss": 1.3718, "step": 57690 }, { "epoch": 9.807921128675845, "grad_norm": 15.641372680664062, "learning_rate": 1.6986798118873592e-05, "loss": 1.2908, "step": 57700 }, { "epoch": 9.809620941696414, "grad_norm": 15.245641708374023, "learning_rate": 1.6983965097172643e-05, "loss": 1.4569, "step": 57710 }, { "epoch": 9.81132075471698, "grad_norm": 15.650312423706055, "learning_rate": 1.69811320754717e-05, "loss": 1.3032, "step": 57720 }, { "epoch": 9.81302056773755, "grad_norm": 13.408099174499512, "learning_rate": 1.6978299053770753e-05, "loss": 1.5538, "step": 57730 }, { "epoch": 9.814720380758116, "grad_norm": 19.688720703125, "learning_rate": 1.6975466032069806e-05, "loss": 1.3849, "step": 57740 }, { "epoch": 9.816420193778685, "grad_norm": 12.09582805633545, "learning_rate": 1.697263301036886e-05, "loss": 1.3654, "step": 57750 }, { "epoch": 9.818120006799251, "grad_norm": 12.251763343811035, "learning_rate": 1.6969799988667914e-05, "loss": 1.1262, "step": 57760 }, { "epoch": 9.81981981981982, "grad_norm": 14.743391990661621, "learning_rate": 1.6966966966966967e-05, "loss": 1.2898, "step": 57770 }, { "epoch": 9.821519632840388, "grad_norm": 13.252304077148438, "learning_rate": 1.6964133945266024e-05, "loss": 1.4915, "step": 57780 }, { "epoch": 9.823219445860955, "grad_norm": 17.225074768066406, "learning_rate": 1.6961300923565074e-05, "loss": 1.4692, "step": 57790 }, { "epoch": 9.824919258881524, "grad_norm": 23.93762969970703, "learning_rate": 1.6958467901864128e-05, "loss": 1.6437, "step": 57800 }, { "epoch": 9.82661907190209, "grad_norm": 20.320171356201172, "learning_rate": 1.6955634880163185e-05, "loss": 1.2048, "step": 57810 }, { "epoch": 9.828318884922659, "grad_norm": 20.132549285888672, "learning_rate": 1.6952801858462235e-05, "loss": 1.4787, "step": 57820 }, { "epoch": 9.830018697943226, "grad_norm": 13.287223815917969, "learning_rate": 1.6949968836761288e-05, "loss": 1.3725, "step": 57830 }, { "epoch": 9.831718510963794, "grad_norm": 15.987154006958008, "learning_rate": 1.6947135815060345e-05, "loss": 1.4783, "step": 57840 }, { "epoch": 9.833418323984361, "grad_norm": 11.864831924438477, "learning_rate": 1.69443027933594e-05, "loss": 1.1489, "step": 57850 }, { "epoch": 9.83511813700493, "grad_norm": 17.950862884521484, "learning_rate": 1.694146977165845e-05, "loss": 1.2066, "step": 57860 }, { "epoch": 9.836817950025496, "grad_norm": 15.894370079040527, "learning_rate": 1.6938636749957506e-05, "loss": 1.2866, "step": 57870 }, { "epoch": 9.838517763046065, "grad_norm": 11.222244262695312, "learning_rate": 1.693580372825656e-05, "loss": 1.3723, "step": 57880 }, { "epoch": 9.840217576066633, "grad_norm": 11.93498420715332, "learning_rate": 1.6932970706555613e-05, "loss": 1.4113, "step": 57890 }, { "epoch": 9.8419173890872, "grad_norm": 17.452762603759766, "learning_rate": 1.6930137684854666e-05, "loss": 1.2726, "step": 57900 }, { "epoch": 9.843617202107769, "grad_norm": 15.706599235534668, "learning_rate": 1.692730466315372e-05, "loss": 1.6011, "step": 57910 }, { "epoch": 9.845317015128336, "grad_norm": 26.632688522338867, "learning_rate": 1.6924471641452773e-05, "loss": 1.4685, "step": 57920 }, { "epoch": 9.847016828148904, "grad_norm": 17.01910400390625, "learning_rate": 1.692163861975183e-05, "loss": 1.3848, "step": 57930 }, { "epoch": 9.84871664116947, "grad_norm": 14.331851959228516, "learning_rate": 1.691880559805088e-05, "loss": 1.402, "step": 57940 }, { "epoch": 9.85041645419004, "grad_norm": 16.692184448242188, "learning_rate": 1.6915972576349934e-05, "loss": 1.3273, "step": 57950 }, { "epoch": 9.852116267210606, "grad_norm": 41.0677604675293, "learning_rate": 1.691313955464899e-05, "loss": 1.4014, "step": 57960 }, { "epoch": 9.853816080231175, "grad_norm": 17.57040786743164, "learning_rate": 1.6910306532948044e-05, "loss": 1.2433, "step": 57970 }, { "epoch": 9.855515893251741, "grad_norm": 17.189790725708008, "learning_rate": 1.6907473511247094e-05, "loss": 1.5512, "step": 57980 }, { "epoch": 9.85721570627231, "grad_norm": 16.817960739135742, "learning_rate": 1.690464048954615e-05, "loss": 1.283, "step": 57990 }, { "epoch": 9.858915519292879, "grad_norm": 9.919124603271484, "learning_rate": 1.6901807467845205e-05, "loss": 1.3894, "step": 58000 }, { "epoch": 9.860615332313445, "grad_norm": 19.65211296081543, "learning_rate": 1.6898974446144255e-05, "loss": 1.4762, "step": 58010 }, { "epoch": 9.862315145334014, "grad_norm": 12.104331970214844, "learning_rate": 1.6896141424443312e-05, "loss": 1.2977, "step": 58020 }, { "epoch": 9.86401495835458, "grad_norm": 11.719135284423828, "learning_rate": 1.6893308402742365e-05, "loss": 1.4074, "step": 58030 }, { "epoch": 9.86571477137515, "grad_norm": 22.40625, "learning_rate": 1.689047538104142e-05, "loss": 1.3805, "step": 58040 }, { "epoch": 9.867414584395716, "grad_norm": 21.995386123657227, "learning_rate": 1.6887642359340472e-05, "loss": 1.1808, "step": 58050 }, { "epoch": 9.869114397416284, "grad_norm": 15.349132537841797, "learning_rate": 1.6884809337639526e-05, "loss": 1.215, "step": 58060 }, { "epoch": 9.870814210436851, "grad_norm": 22.63803482055664, "learning_rate": 1.688197631593858e-05, "loss": 1.5585, "step": 58070 }, { "epoch": 9.87251402345742, "grad_norm": 15.100747108459473, "learning_rate": 1.6879143294237636e-05, "loss": 1.2526, "step": 58080 }, { "epoch": 9.874213836477988, "grad_norm": 14.858964920043945, "learning_rate": 1.6876310272536687e-05, "loss": 1.2649, "step": 58090 }, { "epoch": 9.875913649498555, "grad_norm": 15.22615909576416, "learning_rate": 1.687347725083574e-05, "loss": 1.3061, "step": 58100 }, { "epoch": 9.877613462519124, "grad_norm": 16.77873420715332, "learning_rate": 1.6870644229134797e-05, "loss": 1.5615, "step": 58110 }, { "epoch": 9.87931327553969, "grad_norm": 16.1813907623291, "learning_rate": 1.686781120743385e-05, "loss": 1.4386, "step": 58120 }, { "epoch": 9.881013088560259, "grad_norm": 20.312053680419922, "learning_rate": 1.68649781857329e-05, "loss": 1.3289, "step": 58130 }, { "epoch": 9.882712901580826, "grad_norm": 14.29128360748291, "learning_rate": 1.6862145164031958e-05, "loss": 1.5364, "step": 58140 }, { "epoch": 9.884412714601394, "grad_norm": 14.413649559020996, "learning_rate": 1.685931214233101e-05, "loss": 1.4864, "step": 58150 }, { "epoch": 9.886112527621961, "grad_norm": 14.911670684814453, "learning_rate": 1.685647912063006e-05, "loss": 1.1143, "step": 58160 }, { "epoch": 9.88781234064253, "grad_norm": 18.747163772583008, "learning_rate": 1.6853646098929118e-05, "loss": 1.1699, "step": 58170 }, { "epoch": 9.889512153663096, "grad_norm": 15.305492401123047, "learning_rate": 1.685081307722817e-05, "loss": 1.3958, "step": 58180 }, { "epoch": 9.891211966683665, "grad_norm": 17.468034744262695, "learning_rate": 1.6847980055527225e-05, "loss": 1.2419, "step": 58190 }, { "epoch": 9.892911779704232, "grad_norm": 12.293375015258789, "learning_rate": 1.684514703382628e-05, "loss": 1.1743, "step": 58200 }, { "epoch": 9.8946115927248, "grad_norm": 13.090224266052246, "learning_rate": 1.6842314012125332e-05, "loss": 1.4968, "step": 58210 }, { "epoch": 9.896311405745369, "grad_norm": 15.015395164489746, "learning_rate": 1.6839480990424386e-05, "loss": 1.2872, "step": 58220 }, { "epoch": 9.898011218765935, "grad_norm": 12.165766716003418, "learning_rate": 1.6836647968723443e-05, "loss": 1.1893, "step": 58230 }, { "epoch": 9.899711031786504, "grad_norm": 14.109947204589844, "learning_rate": 1.6833814947022493e-05, "loss": 1.132, "step": 58240 }, { "epoch": 9.90141084480707, "grad_norm": 24.8373966217041, "learning_rate": 1.683098192532155e-05, "loss": 1.1849, "step": 58250 }, { "epoch": 9.90311065782764, "grad_norm": 17.584354400634766, "learning_rate": 1.6828148903620603e-05, "loss": 1.2001, "step": 58260 }, { "epoch": 9.904810470848206, "grad_norm": 13.535835266113281, "learning_rate": 1.6825315881919657e-05, "loss": 1.3728, "step": 58270 }, { "epoch": 9.906510283868775, "grad_norm": 9.215579986572266, "learning_rate": 1.682248286021871e-05, "loss": 1.3738, "step": 58280 }, { "epoch": 9.908210096889341, "grad_norm": 15.495771408081055, "learning_rate": 1.6819649838517764e-05, "loss": 1.2775, "step": 58290 }, { "epoch": 9.90990990990991, "grad_norm": 22.404006958007812, "learning_rate": 1.6816816816816817e-05, "loss": 1.2998, "step": 58300 }, { "epoch": 9.911609722930478, "grad_norm": 16.764801025390625, "learning_rate": 1.6813983795115874e-05, "loss": 1.5795, "step": 58310 }, { "epoch": 9.913309535951045, "grad_norm": 12.027934074401855, "learning_rate": 1.6811150773414924e-05, "loss": 1.2934, "step": 58320 }, { "epoch": 9.915009348971614, "grad_norm": 22.21027183532715, "learning_rate": 1.6808317751713978e-05, "loss": 1.3872, "step": 58330 }, { "epoch": 9.91670916199218, "grad_norm": 13.101012229919434, "learning_rate": 1.6805484730013035e-05, "loss": 1.2311, "step": 58340 }, { "epoch": 9.918408975012749, "grad_norm": 16.38216209411621, "learning_rate": 1.6802651708312085e-05, "loss": 1.1835, "step": 58350 }, { "epoch": 9.920108788033316, "grad_norm": 16.226911544799805, "learning_rate": 1.679981868661114e-05, "loss": 1.3996, "step": 58360 }, { "epoch": 9.921808601053884, "grad_norm": 17.090679168701172, "learning_rate": 1.6796985664910195e-05, "loss": 1.1463, "step": 58370 }, { "epoch": 9.923508414074451, "grad_norm": 18.890628814697266, "learning_rate": 1.679415264320925e-05, "loss": 1.473, "step": 58380 }, { "epoch": 9.92520822709502, "grad_norm": 16.7185115814209, "learning_rate": 1.67913196215083e-05, "loss": 1.359, "step": 58390 }, { "epoch": 9.926908040115586, "grad_norm": 13.218174934387207, "learning_rate": 1.6788486599807356e-05, "loss": 1.2929, "step": 58400 }, { "epoch": 9.928607853136155, "grad_norm": 16.91391944885254, "learning_rate": 1.678565357810641e-05, "loss": 1.0404, "step": 58410 }, { "epoch": 9.930307666156724, "grad_norm": 20.02778434753418, "learning_rate": 1.6782820556405463e-05, "loss": 1.1792, "step": 58420 }, { "epoch": 9.93200747917729, "grad_norm": 22.294282913208008, "learning_rate": 1.6779987534704516e-05, "loss": 1.2887, "step": 58430 }, { "epoch": 9.933707292197859, "grad_norm": 13.310667037963867, "learning_rate": 1.677715451300357e-05, "loss": 1.2314, "step": 58440 }, { "epoch": 9.935407105218426, "grad_norm": 16.686471939086914, "learning_rate": 1.6774321491302623e-05, "loss": 1.3457, "step": 58450 }, { "epoch": 9.937106918238994, "grad_norm": 34.017581939697266, "learning_rate": 1.677148846960168e-05, "loss": 1.356, "step": 58460 }, { "epoch": 9.938806731259561, "grad_norm": 14.940596580505371, "learning_rate": 1.676865544790073e-05, "loss": 1.337, "step": 58470 }, { "epoch": 9.94050654428013, "grad_norm": 14.831204414367676, "learning_rate": 1.6765822426199784e-05, "loss": 1.5455, "step": 58480 }, { "epoch": 9.942206357300696, "grad_norm": 13.811309814453125, "learning_rate": 1.676298940449884e-05, "loss": 1.3531, "step": 58490 }, { "epoch": 9.943906170321265, "grad_norm": 15.385157585144043, "learning_rate": 1.6760156382797894e-05, "loss": 1.1894, "step": 58500 }, { "epoch": 9.945605983341832, "grad_norm": 14.876537322998047, "learning_rate": 1.6757323361096945e-05, "loss": 1.3644, "step": 58510 }, { "epoch": 9.9473057963624, "grad_norm": 11.445497512817383, "learning_rate": 1.6754490339396e-05, "loss": 1.3833, "step": 58520 }, { "epoch": 9.949005609382969, "grad_norm": 18.419523239135742, "learning_rate": 1.6751657317695055e-05, "loss": 1.3291, "step": 58530 }, { "epoch": 9.950705422403535, "grad_norm": 18.33003807067871, "learning_rate": 1.6748824295994105e-05, "loss": 1.3016, "step": 58540 }, { "epoch": 9.952405235424104, "grad_norm": 19.020217895507812, "learning_rate": 1.6745991274293162e-05, "loss": 1.2137, "step": 58550 }, { "epoch": 9.95410504844467, "grad_norm": 15.793243408203125, "learning_rate": 1.6743158252592216e-05, "loss": 1.3532, "step": 58560 }, { "epoch": 9.95580486146524, "grad_norm": 14.541790008544922, "learning_rate": 1.674032523089127e-05, "loss": 1.3287, "step": 58570 }, { "epoch": 9.957504674485806, "grad_norm": 14.956843376159668, "learning_rate": 1.6737492209190323e-05, "loss": 1.4152, "step": 58580 }, { "epoch": 9.959204487506375, "grad_norm": 14.042685508728027, "learning_rate": 1.6734659187489376e-05, "loss": 1.2269, "step": 58590 }, { "epoch": 9.960904300526941, "grad_norm": 15.376683235168457, "learning_rate": 1.673182616578843e-05, "loss": 1.2148, "step": 58600 }, { "epoch": 9.96260411354751, "grad_norm": 23.212677001953125, "learning_rate": 1.6728993144087487e-05, "loss": 1.4487, "step": 58610 }, { "epoch": 9.964303926568078, "grad_norm": 10.643712997436523, "learning_rate": 1.6726160122386537e-05, "loss": 1.357, "step": 58620 }, { "epoch": 9.966003739588645, "grad_norm": 19.060035705566406, "learning_rate": 1.672332710068559e-05, "loss": 1.3777, "step": 58630 }, { "epoch": 9.967703552609214, "grad_norm": 37.52762985229492, "learning_rate": 1.6720494078984647e-05, "loss": 1.335, "step": 58640 }, { "epoch": 9.96940336562978, "grad_norm": 18.71849250793457, "learning_rate": 1.67176610572837e-05, "loss": 1.2511, "step": 58650 }, { "epoch": 9.971103178650349, "grad_norm": 12.7039155960083, "learning_rate": 1.671482803558275e-05, "loss": 1.4106, "step": 58660 }, { "epoch": 9.972802991670916, "grad_norm": 15.440673828125, "learning_rate": 1.6711995013881808e-05, "loss": 1.0923, "step": 58670 }, { "epoch": 9.974502804691484, "grad_norm": 13.689217567443848, "learning_rate": 1.670916199218086e-05, "loss": 1.4646, "step": 58680 }, { "epoch": 9.976202617712051, "grad_norm": 18.54193687438965, "learning_rate": 1.670632897047991e-05, "loss": 1.3705, "step": 58690 }, { "epoch": 9.97790243073262, "grad_norm": 10.693082809448242, "learning_rate": 1.6703495948778968e-05, "loss": 1.3846, "step": 58700 }, { "epoch": 9.979602243753186, "grad_norm": 12.1820650100708, "learning_rate": 1.6700662927078022e-05, "loss": 1.4432, "step": 58710 }, { "epoch": 9.981302056773755, "grad_norm": 13.200044631958008, "learning_rate": 1.6697829905377075e-05, "loss": 1.4289, "step": 58720 }, { "epoch": 9.983001869794322, "grad_norm": 17.64542007446289, "learning_rate": 1.669499688367613e-05, "loss": 1.555, "step": 58730 }, { "epoch": 9.98470168281489, "grad_norm": 11.799248695373535, "learning_rate": 1.6692163861975182e-05, "loss": 1.3667, "step": 58740 }, { "epoch": 9.986401495835459, "grad_norm": 15.628120422363281, "learning_rate": 1.6689330840274236e-05, "loss": 1.4531, "step": 58750 }, { "epoch": 9.988101308856026, "grad_norm": 17.34709358215332, "learning_rate": 1.6686497818573293e-05, "loss": 1.4644, "step": 58760 }, { "epoch": 9.989801121876594, "grad_norm": 11.113030433654785, "learning_rate": 1.6683664796872343e-05, "loss": 1.3669, "step": 58770 }, { "epoch": 9.99150093489716, "grad_norm": 18.8727970123291, "learning_rate": 1.6680831775171396e-05, "loss": 1.4051, "step": 58780 }, { "epoch": 9.99320074791773, "grad_norm": 18.753406524658203, "learning_rate": 1.6677998753470453e-05, "loss": 1.5739, "step": 58790 }, { "epoch": 9.994900560938296, "grad_norm": 21.805757522583008, "learning_rate": 1.6675165731769507e-05, "loss": 1.2143, "step": 58800 }, { "epoch": 9.996600373958865, "grad_norm": 41.69313049316406, "learning_rate": 1.6672332710068557e-05, "loss": 1.4325, "step": 58810 }, { "epoch": 9.998300186979431, "grad_norm": 20.96022605895996, "learning_rate": 1.6669499688367614e-05, "loss": 1.286, "step": 58820 }, { "epoch": 10.0, "grad_norm": 57.32018280029297, "learning_rate": 1.6666666666666667e-05, "loss": 1.3961, "step": 58830 }, { "epoch": 10.0, "eval_cer": 1.0, "eval_loss": 2.482346296310425, "eval_runtime": 1957.6059, "eval_samples_per_second": 0.241, "eval_steps_per_second": 0.241, "step": 58830 }, { "epoch": 10.001699813020569, "grad_norm": 16.33857536315918, "learning_rate": 1.666383364496572e-05, "loss": 1.2195, "step": 58840 }, { "epoch": 10.003399626041135, "grad_norm": 35.71213150024414, "learning_rate": 1.6661000623264775e-05, "loss": 1.2099, "step": 58850 }, { "epoch": 10.005099439061704, "grad_norm": 13.994068145751953, "learning_rate": 1.6658167601563828e-05, "loss": 1.1699, "step": 58860 }, { "epoch": 10.00679925208227, "grad_norm": 14.841238021850586, "learning_rate": 1.665533457986288e-05, "loss": 1.2522, "step": 58870 }, { "epoch": 10.00849906510284, "grad_norm": 15.953015327453613, "learning_rate": 1.6652501558161935e-05, "loss": 1.1982, "step": 58880 }, { "epoch": 10.010198878123406, "grad_norm": 13.220727920532227, "learning_rate": 1.664966853646099e-05, "loss": 1.4076, "step": 58890 }, { "epoch": 10.011898691143974, "grad_norm": 15.707820892333984, "learning_rate": 1.6646835514760042e-05, "loss": 1.3267, "step": 58900 }, { "epoch": 10.013598504164541, "grad_norm": 20.351825714111328, "learning_rate": 1.66440024930591e-05, "loss": 1.3566, "step": 58910 }, { "epoch": 10.01529831718511, "grad_norm": 12.99527645111084, "learning_rate": 1.664116947135815e-05, "loss": 1.215, "step": 58920 }, { "epoch": 10.016998130205677, "grad_norm": 13.139944076538086, "learning_rate": 1.6638336449657203e-05, "loss": 1.2688, "step": 58930 }, { "epoch": 10.018697943226245, "grad_norm": 18.849016189575195, "learning_rate": 1.663550342795626e-05, "loss": 1.1147, "step": 58940 }, { "epoch": 10.020397756246814, "grad_norm": 15.669721603393555, "learning_rate": 1.6632670406255313e-05, "loss": 1.4054, "step": 58950 }, { "epoch": 10.02209756926738, "grad_norm": 18.29370880126953, "learning_rate": 1.6629837384554367e-05, "loss": 1.3285, "step": 58960 }, { "epoch": 10.023797382287949, "grad_norm": 13.666223526000977, "learning_rate": 1.662700436285342e-05, "loss": 1.2075, "step": 58970 }, { "epoch": 10.025497195308516, "grad_norm": 14.483379364013672, "learning_rate": 1.6624171341152474e-05, "loss": 1.1301, "step": 58980 }, { "epoch": 10.027197008329084, "grad_norm": 25.477256774902344, "learning_rate": 1.662133831945153e-05, "loss": 1.1307, "step": 58990 }, { "epoch": 10.028896821349651, "grad_norm": 15.693344116210938, "learning_rate": 1.661850529775058e-05, "loss": 0.9401, "step": 59000 }, { "epoch": 10.03059663437022, "grad_norm": 14.681363105773926, "learning_rate": 1.6615672276049634e-05, "loss": 1.3308, "step": 59010 }, { "epoch": 10.032296447390786, "grad_norm": 19.52218246459961, "learning_rate": 1.661283925434869e-05, "loss": 1.2737, "step": 59020 }, { "epoch": 10.033996260411355, "grad_norm": 18.305408477783203, "learning_rate": 1.661000623264774e-05, "loss": 1.3422, "step": 59030 }, { "epoch": 10.035696073431922, "grad_norm": 16.52880096435547, "learning_rate": 1.6607173210946795e-05, "loss": 1.0755, "step": 59040 }, { "epoch": 10.03739588645249, "grad_norm": 23.416656494140625, "learning_rate": 1.6604340189245852e-05, "loss": 1.0265, "step": 59050 }, { "epoch": 10.039095699473059, "grad_norm": 14.750100135803223, "learning_rate": 1.6601507167544905e-05, "loss": 1.2615, "step": 59060 }, { "epoch": 10.040795512493625, "grad_norm": 18.912378311157227, "learning_rate": 1.6598674145843955e-05, "loss": 1.2006, "step": 59070 }, { "epoch": 10.042495325514194, "grad_norm": 13.982158660888672, "learning_rate": 1.6595841124143012e-05, "loss": 1.3122, "step": 59080 }, { "epoch": 10.04419513853476, "grad_norm": 11.574501991271973, "learning_rate": 1.6593008102442066e-05, "loss": 1.3324, "step": 59090 }, { "epoch": 10.04589495155533, "grad_norm": 12.9996337890625, "learning_rate": 1.659017508074112e-05, "loss": 1.1377, "step": 59100 }, { "epoch": 10.047594764575896, "grad_norm": 11.07320785522461, "learning_rate": 1.6587342059040173e-05, "loss": 1.435, "step": 59110 }, { "epoch": 10.049294577596465, "grad_norm": 17.372907638549805, "learning_rate": 1.6584509037339226e-05, "loss": 1.109, "step": 59120 }, { "epoch": 10.050994390617031, "grad_norm": 14.644556045532227, "learning_rate": 1.658167601563828e-05, "loss": 1.2637, "step": 59130 }, { "epoch": 10.0526942036376, "grad_norm": 12.881649017333984, "learning_rate": 1.6578842993937337e-05, "loss": 1.2041, "step": 59140 }, { "epoch": 10.054394016658168, "grad_norm": 16.034513473510742, "learning_rate": 1.6576009972236387e-05, "loss": 1.192, "step": 59150 }, { "epoch": 10.056093829678735, "grad_norm": 15.569679260253906, "learning_rate": 1.657317695053544e-05, "loss": 1.2043, "step": 59160 }, { "epoch": 10.057793642699304, "grad_norm": 15.666240692138672, "learning_rate": 1.6570343928834497e-05, "loss": 1.1881, "step": 59170 }, { "epoch": 10.05949345571987, "grad_norm": 18.269363403320312, "learning_rate": 1.656751090713355e-05, "loss": 1.1366, "step": 59180 }, { "epoch": 10.061193268740439, "grad_norm": 21.26482391357422, "learning_rate": 1.65646778854326e-05, "loss": 1.2455, "step": 59190 }, { "epoch": 10.062893081761006, "grad_norm": 15.450812339782715, "learning_rate": 1.6561844863731658e-05, "loss": 1.1094, "step": 59200 }, { "epoch": 10.064592894781574, "grad_norm": 15.118513107299805, "learning_rate": 1.655901184203071e-05, "loss": 0.9758, "step": 59210 }, { "epoch": 10.066292707802141, "grad_norm": 18.11209487915039, "learning_rate": 1.655617882032976e-05, "loss": 1.113, "step": 59220 }, { "epoch": 10.06799252082271, "grad_norm": 12.489859580993652, "learning_rate": 1.655334579862882e-05, "loss": 0.9264, "step": 59230 }, { "epoch": 10.069692333843276, "grad_norm": 35.37751007080078, "learning_rate": 1.6550512776927872e-05, "loss": 1.396, "step": 59240 }, { "epoch": 10.071392146863845, "grad_norm": 15.446310043334961, "learning_rate": 1.6547679755226926e-05, "loss": 1.2271, "step": 59250 }, { "epoch": 10.073091959884414, "grad_norm": 11.129317283630371, "learning_rate": 1.654484673352598e-05, "loss": 1.1045, "step": 59260 }, { "epoch": 10.07479177290498, "grad_norm": 15.589812278747559, "learning_rate": 1.6542013711825033e-05, "loss": 1.1672, "step": 59270 }, { "epoch": 10.076491585925549, "grad_norm": 13.243229866027832, "learning_rate": 1.6539180690124086e-05, "loss": 1.2376, "step": 59280 }, { "epoch": 10.078191398946116, "grad_norm": 11.380990982055664, "learning_rate": 1.6536347668423143e-05, "loss": 1.2191, "step": 59290 }, { "epoch": 10.079891211966684, "grad_norm": 12.852063179016113, "learning_rate": 1.6533514646722193e-05, "loss": 1.2841, "step": 59300 }, { "epoch": 10.081591024987251, "grad_norm": 16.852556228637695, "learning_rate": 1.6530681625021247e-05, "loss": 1.4879, "step": 59310 }, { "epoch": 10.08329083800782, "grad_norm": 12.287515640258789, "learning_rate": 1.6527848603320304e-05, "loss": 1.3446, "step": 59320 }, { "epoch": 10.084990651028386, "grad_norm": 14.41264820098877, "learning_rate": 1.6525015581619357e-05, "loss": 1.3821, "step": 59330 }, { "epoch": 10.086690464048955, "grad_norm": 9.33561897277832, "learning_rate": 1.6522182559918407e-05, "loss": 1.1886, "step": 59340 }, { "epoch": 10.088390277069522, "grad_norm": 16.997482299804688, "learning_rate": 1.6519349538217464e-05, "loss": 1.4436, "step": 59350 }, { "epoch": 10.09009009009009, "grad_norm": 14.002727508544922, "learning_rate": 1.6516516516516518e-05, "loss": 1.2872, "step": 59360 }, { "epoch": 10.091789903110659, "grad_norm": 10.577152252197266, "learning_rate": 1.6513683494815568e-05, "loss": 1.0647, "step": 59370 }, { "epoch": 10.093489716131225, "grad_norm": 12.154919624328613, "learning_rate": 1.6510850473114625e-05, "loss": 1.2983, "step": 59380 }, { "epoch": 10.095189529151794, "grad_norm": 16.756624221801758, "learning_rate": 1.6508017451413678e-05, "loss": 1.2923, "step": 59390 }, { "epoch": 10.09688934217236, "grad_norm": 12.364200592041016, "learning_rate": 1.6505184429712732e-05, "loss": 1.0637, "step": 59400 }, { "epoch": 10.09858915519293, "grad_norm": 17.923545837402344, "learning_rate": 1.6502351408011785e-05, "loss": 1.277, "step": 59410 }, { "epoch": 10.100288968213496, "grad_norm": 12.028101921081543, "learning_rate": 1.649951838631084e-05, "loss": 1.0361, "step": 59420 }, { "epoch": 10.101988781234065, "grad_norm": 12.21276569366455, "learning_rate": 1.6496685364609892e-05, "loss": 1.396, "step": 59430 }, { "epoch": 10.103688594254631, "grad_norm": 12.711071968078613, "learning_rate": 1.649385234290895e-05, "loss": 1.221, "step": 59440 }, { "epoch": 10.1053884072752, "grad_norm": 16.036865234375, "learning_rate": 1.6491019321208e-05, "loss": 1.2853, "step": 59450 }, { "epoch": 10.107088220295767, "grad_norm": 15.789003372192383, "learning_rate": 1.6488186299507053e-05, "loss": 1.0498, "step": 59460 }, { "epoch": 10.108788033316335, "grad_norm": 17.77434539794922, "learning_rate": 1.648535327780611e-05, "loss": 1.0599, "step": 59470 }, { "epoch": 10.110487846336904, "grad_norm": 11.66074275970459, "learning_rate": 1.6482520256105163e-05, "loss": 1.0832, "step": 59480 }, { "epoch": 10.11218765935747, "grad_norm": 12.0945405960083, "learning_rate": 1.6479687234404213e-05, "loss": 1.1072, "step": 59490 }, { "epoch": 10.113887472378039, "grad_norm": 15.808094024658203, "learning_rate": 1.647685421270327e-05, "loss": 1.3993, "step": 59500 }, { "epoch": 10.115587285398606, "grad_norm": 12.463869094848633, "learning_rate": 1.6474021191002324e-05, "loss": 1.0811, "step": 59510 }, { "epoch": 10.117287098419174, "grad_norm": 11.752448081970215, "learning_rate": 1.6471188169301377e-05, "loss": 1.1514, "step": 59520 }, { "epoch": 10.118986911439741, "grad_norm": 14.243658065795898, "learning_rate": 1.646835514760043e-05, "loss": 1.212, "step": 59530 }, { "epoch": 10.12068672446031, "grad_norm": 13.993215560913086, "learning_rate": 1.6465522125899484e-05, "loss": 1.1978, "step": 59540 }, { "epoch": 10.122386537480876, "grad_norm": 16.429855346679688, "learning_rate": 1.6462689104198538e-05, "loss": 1.4453, "step": 59550 }, { "epoch": 10.124086350501445, "grad_norm": 12.270153999328613, "learning_rate": 1.645985608249759e-05, "loss": 1.3147, "step": 59560 }, { "epoch": 10.125786163522013, "grad_norm": 12.7386474609375, "learning_rate": 1.6457023060796645e-05, "loss": 1.2357, "step": 59570 }, { "epoch": 10.12748597654258, "grad_norm": 11.542425155639648, "learning_rate": 1.64541900390957e-05, "loss": 1.2218, "step": 59580 }, { "epoch": 10.129185789563149, "grad_norm": 14.433863639831543, "learning_rate": 1.6451357017394755e-05, "loss": 1.1333, "step": 59590 }, { "epoch": 10.130885602583716, "grad_norm": 16.56212043762207, "learning_rate": 1.6448523995693806e-05, "loss": 1.0981, "step": 59600 }, { "epoch": 10.132585415604284, "grad_norm": 15.165494918823242, "learning_rate": 1.644569097399286e-05, "loss": 1.3104, "step": 59610 }, { "epoch": 10.13428522862485, "grad_norm": 12.70972728729248, "learning_rate": 1.6442857952291916e-05, "loss": 1.0098, "step": 59620 }, { "epoch": 10.13598504164542, "grad_norm": 13.014739036560059, "learning_rate": 1.644002493059097e-05, "loss": 1.0648, "step": 59630 }, { "epoch": 10.137684854665986, "grad_norm": 16.55820655822754, "learning_rate": 1.643719190889002e-05, "loss": 1.1734, "step": 59640 }, { "epoch": 10.139384667686555, "grad_norm": 11.648327827453613, "learning_rate": 1.6434358887189077e-05, "loss": 1.1251, "step": 59650 }, { "epoch": 10.141084480707121, "grad_norm": 19.77804946899414, "learning_rate": 1.643152586548813e-05, "loss": 1.2785, "step": 59660 }, { "epoch": 10.14278429372769, "grad_norm": 14.070216178894043, "learning_rate": 1.6428692843787184e-05, "loss": 1.1833, "step": 59670 }, { "epoch": 10.144484106748259, "grad_norm": 16.11638069152832, "learning_rate": 1.6425859822086237e-05, "loss": 1.3507, "step": 59680 }, { "epoch": 10.146183919768825, "grad_norm": 15.071884155273438, "learning_rate": 1.642302680038529e-05, "loss": 1.079, "step": 59690 }, { "epoch": 10.147883732789394, "grad_norm": 13.271394729614258, "learning_rate": 1.6420193778684348e-05, "loss": 1.1236, "step": 59700 }, { "epoch": 10.14958354580996, "grad_norm": 16.517839431762695, "learning_rate": 1.64173607569834e-05, "loss": 1.3253, "step": 59710 }, { "epoch": 10.15128335883053, "grad_norm": 14.010156631469727, "learning_rate": 1.641452773528245e-05, "loss": 1.2247, "step": 59720 }, { "epoch": 10.152983171851096, "grad_norm": 17.046810150146484, "learning_rate": 1.6411694713581508e-05, "loss": 1.0195, "step": 59730 }, { "epoch": 10.154682984871664, "grad_norm": 15.440706253051758, "learning_rate": 1.640886169188056e-05, "loss": 1.1677, "step": 59740 }, { "epoch": 10.156382797892231, "grad_norm": 15.340571403503418, "learning_rate": 1.6406028670179612e-05, "loss": 1.1962, "step": 59750 }, { "epoch": 10.1580826109128, "grad_norm": 20.232288360595703, "learning_rate": 1.640319564847867e-05, "loss": 1.3506, "step": 59760 }, { "epoch": 10.159782423933367, "grad_norm": 12.882514953613281, "learning_rate": 1.6400362626777722e-05, "loss": 1.0119, "step": 59770 }, { "epoch": 10.161482236953935, "grad_norm": 11.878813743591309, "learning_rate": 1.6397529605076776e-05, "loss": 1.2287, "step": 59780 }, { "epoch": 10.163182049974504, "grad_norm": 18.759279251098633, "learning_rate": 1.639469658337583e-05, "loss": 1.1056, "step": 59790 }, { "epoch": 10.16488186299507, "grad_norm": 19.735383987426758, "learning_rate": 1.6391863561674883e-05, "loss": 1.1668, "step": 59800 }, { "epoch": 10.166581676015639, "grad_norm": 11.899413108825684, "learning_rate": 1.6389030539973936e-05, "loss": 1.1723, "step": 59810 }, { "epoch": 10.168281489036206, "grad_norm": 16.56702995300293, "learning_rate": 1.6386197518272993e-05, "loss": 1.2818, "step": 59820 }, { "epoch": 10.169981302056774, "grad_norm": 14.054313659667969, "learning_rate": 1.6383364496572043e-05, "loss": 1.1417, "step": 59830 }, { "epoch": 10.171681115077341, "grad_norm": 11.292556762695312, "learning_rate": 1.6380531474871097e-05, "loss": 1.1568, "step": 59840 }, { "epoch": 10.17338092809791, "grad_norm": 14.681839942932129, "learning_rate": 1.6377698453170154e-05, "loss": 1.2417, "step": 59850 }, { "epoch": 10.175080741118476, "grad_norm": 17.873018264770508, "learning_rate": 1.6374865431469207e-05, "loss": 1.1461, "step": 59860 }, { "epoch": 10.176780554139045, "grad_norm": 13.324442863464355, "learning_rate": 1.6372032409768257e-05, "loss": 1.2195, "step": 59870 }, { "epoch": 10.178480367159612, "grad_norm": 11.833946228027344, "learning_rate": 1.6369199388067314e-05, "loss": 1.3521, "step": 59880 }, { "epoch": 10.18018018018018, "grad_norm": 16.33332061767578, "learning_rate": 1.6366366366366368e-05, "loss": 1.3403, "step": 59890 }, { "epoch": 10.181879993200749, "grad_norm": 12.959818840026855, "learning_rate": 1.6363533344665418e-05, "loss": 1.2188, "step": 59900 }, { "epoch": 10.183579806221315, "grad_norm": 14.919508934020996, "learning_rate": 1.6360700322964475e-05, "loss": 1.304, "step": 59910 }, { "epoch": 10.185279619241884, "grad_norm": 13.688971519470215, "learning_rate": 1.635786730126353e-05, "loss": 1.0252, "step": 59920 }, { "epoch": 10.18697943226245, "grad_norm": 15.374298095703125, "learning_rate": 1.6355034279562582e-05, "loss": 1.036, "step": 59930 }, { "epoch": 10.18867924528302, "grad_norm": 16.55264663696289, "learning_rate": 1.6352201257861635e-05, "loss": 1.2975, "step": 59940 }, { "epoch": 10.190379058303586, "grad_norm": 13.673981666564941, "learning_rate": 1.634936823616069e-05, "loss": 1.1565, "step": 59950 }, { "epoch": 10.192078871324155, "grad_norm": 12.947539329528809, "learning_rate": 1.6346535214459743e-05, "loss": 1.2975, "step": 59960 }, { "epoch": 10.193778684344721, "grad_norm": 19.37702751159668, "learning_rate": 1.63437021927588e-05, "loss": 1.1574, "step": 59970 }, { "epoch": 10.19547849736529, "grad_norm": 13.842536926269531, "learning_rate": 1.634086917105785e-05, "loss": 1.2033, "step": 59980 }, { "epoch": 10.197178310385858, "grad_norm": 17.904815673828125, "learning_rate": 1.6338036149356903e-05, "loss": 1.4317, "step": 59990 }, { "epoch": 10.198878123406425, "grad_norm": 12.307153701782227, "learning_rate": 1.633520312765596e-05, "loss": 1.0576, "step": 60000 }, { "epoch": 10.200577936426994, "grad_norm": 20.395837783813477, "learning_rate": 1.6332370105955014e-05, "loss": 1.0741, "step": 60010 }, { "epoch": 10.20227774944756, "grad_norm": 14.048035621643066, "learning_rate": 1.6329537084254064e-05, "loss": 1.3079, "step": 60020 }, { "epoch": 10.203977562468129, "grad_norm": 24.032167434692383, "learning_rate": 1.632670406255312e-05, "loss": 1.0407, "step": 60030 }, { "epoch": 10.205677375488696, "grad_norm": 14.000452995300293, "learning_rate": 1.6323871040852174e-05, "loss": 1.2666, "step": 60040 }, { "epoch": 10.207377188509264, "grad_norm": 15.476436614990234, "learning_rate": 1.6321038019151228e-05, "loss": 1.2705, "step": 60050 }, { "epoch": 10.209077001529831, "grad_norm": 18.31184196472168, "learning_rate": 1.631820499745028e-05, "loss": 1.1491, "step": 60060 }, { "epoch": 10.2107768145504, "grad_norm": 16.5869197845459, "learning_rate": 1.6315371975749335e-05, "loss": 1.2888, "step": 60070 }, { "epoch": 10.212476627570966, "grad_norm": 13.42168140411377, "learning_rate": 1.6312538954048388e-05, "loss": 1.2267, "step": 60080 }, { "epoch": 10.214176440591535, "grad_norm": 15.057514190673828, "learning_rate": 1.6309705932347442e-05, "loss": 1.1565, "step": 60090 }, { "epoch": 10.215876253612103, "grad_norm": 25.319791793823242, "learning_rate": 1.6306872910646495e-05, "loss": 1.3353, "step": 60100 }, { "epoch": 10.21757606663267, "grad_norm": 14.630755424499512, "learning_rate": 1.630403988894555e-05, "loss": 1.276, "step": 60110 }, { "epoch": 10.219275879653239, "grad_norm": 13.82407283782959, "learning_rate": 1.6301206867244606e-05, "loss": 1.3149, "step": 60120 }, { "epoch": 10.220975692673806, "grad_norm": 15.701191902160645, "learning_rate": 1.6298373845543656e-05, "loss": 1.3212, "step": 60130 }, { "epoch": 10.222675505694374, "grad_norm": 14.521502494812012, "learning_rate": 1.629554082384271e-05, "loss": 1.2694, "step": 60140 }, { "epoch": 10.22437531871494, "grad_norm": 16.283035278320312, "learning_rate": 1.6292707802141766e-05, "loss": 1.1266, "step": 60150 }, { "epoch": 10.22607513173551, "grad_norm": 15.627142906188965, "learning_rate": 1.628987478044082e-05, "loss": 1.2197, "step": 60160 }, { "epoch": 10.227774944756076, "grad_norm": 18.882648468017578, "learning_rate": 1.628704175873987e-05, "loss": 1.2713, "step": 60170 }, { "epoch": 10.229474757776645, "grad_norm": 14.634317398071289, "learning_rate": 1.6284208737038927e-05, "loss": 1.231, "step": 60180 }, { "epoch": 10.231174570797211, "grad_norm": 12.565778732299805, "learning_rate": 1.628137571533798e-05, "loss": 1.4221, "step": 60190 }, { "epoch": 10.23287438381778, "grad_norm": 17.81194496154785, "learning_rate": 1.6278542693637034e-05, "loss": 1.1503, "step": 60200 }, { "epoch": 10.234574196838349, "grad_norm": 13.712151527404785, "learning_rate": 1.6275709671936087e-05, "loss": 1.0984, "step": 60210 }, { "epoch": 10.236274009858915, "grad_norm": 11.82718563079834, "learning_rate": 1.627287665023514e-05, "loss": 1.2904, "step": 60220 }, { "epoch": 10.237973822879484, "grad_norm": 15.093082427978516, "learning_rate": 1.6270043628534194e-05, "loss": 1.1251, "step": 60230 }, { "epoch": 10.23967363590005, "grad_norm": 22.803707122802734, "learning_rate": 1.6267210606833248e-05, "loss": 1.2918, "step": 60240 }, { "epoch": 10.24137344892062, "grad_norm": 18.405458450317383, "learning_rate": 1.62643775851323e-05, "loss": 1.1232, "step": 60250 }, { "epoch": 10.243073261941186, "grad_norm": 14.354175567626953, "learning_rate": 1.6261544563431355e-05, "loss": 1.2569, "step": 60260 }, { "epoch": 10.244773074961754, "grad_norm": 13.830670356750488, "learning_rate": 1.6258711541730412e-05, "loss": 1.2007, "step": 60270 }, { "epoch": 10.246472887982321, "grad_norm": 11.83724308013916, "learning_rate": 1.6255878520029462e-05, "loss": 1.2973, "step": 60280 }, { "epoch": 10.24817270100289, "grad_norm": 13.140318870544434, "learning_rate": 1.6253045498328516e-05, "loss": 1.2494, "step": 60290 }, { "epoch": 10.249872514023457, "grad_norm": 21.43256950378418, "learning_rate": 1.6250212476627572e-05, "loss": 1.2594, "step": 60300 }, { "epoch": 10.251572327044025, "grad_norm": 14.790149688720703, "learning_rate": 1.6247379454926626e-05, "loss": 1.3089, "step": 60310 }, { "epoch": 10.253272140064594, "grad_norm": 17.224451065063477, "learning_rate": 1.6244546433225676e-05, "loss": 1.2001, "step": 60320 }, { "epoch": 10.25497195308516, "grad_norm": 16.03631591796875, "learning_rate": 1.6241713411524733e-05, "loss": 1.2939, "step": 60330 }, { "epoch": 10.256671766105729, "grad_norm": 13.09289264678955, "learning_rate": 1.6238880389823787e-05, "loss": 1.3225, "step": 60340 }, { "epoch": 10.258371579126296, "grad_norm": 20.3729248046875, "learning_rate": 1.623604736812284e-05, "loss": 1.334, "step": 60350 }, { "epoch": 10.260071392146864, "grad_norm": 15.354409217834473, "learning_rate": 1.6233214346421894e-05, "loss": 1.2781, "step": 60360 }, { "epoch": 10.261771205167431, "grad_norm": 12.964150428771973, "learning_rate": 1.6230381324720947e-05, "loss": 1.3308, "step": 60370 }, { "epoch": 10.263471018188, "grad_norm": 20.87494468688965, "learning_rate": 1.622754830302e-05, "loss": 1.1227, "step": 60380 }, { "epoch": 10.265170831208566, "grad_norm": 15.33708667755127, "learning_rate": 1.6224715281319058e-05, "loss": 1.1557, "step": 60390 }, { "epoch": 10.266870644229135, "grad_norm": 15.505764961242676, "learning_rate": 1.6221882259618108e-05, "loss": 1.2716, "step": 60400 }, { "epoch": 10.268570457249702, "grad_norm": 13.679378509521484, "learning_rate": 1.621904923791716e-05, "loss": 1.2546, "step": 60410 }, { "epoch": 10.27027027027027, "grad_norm": 18.863792419433594, "learning_rate": 1.6216216216216218e-05, "loss": 1.2506, "step": 60420 }, { "epoch": 10.271970083290839, "grad_norm": 17.031368255615234, "learning_rate": 1.6213383194515268e-05, "loss": 1.1905, "step": 60430 }, { "epoch": 10.273669896311405, "grad_norm": 21.208070755004883, "learning_rate": 1.6210550172814325e-05, "loss": 1.3556, "step": 60440 }, { "epoch": 10.275369709331974, "grad_norm": 17.194164276123047, "learning_rate": 1.620771715111338e-05, "loss": 1.2457, "step": 60450 }, { "epoch": 10.27706952235254, "grad_norm": 13.209839820861816, "learning_rate": 1.6204884129412432e-05, "loss": 1.1408, "step": 60460 }, { "epoch": 10.27876933537311, "grad_norm": 16.91526985168457, "learning_rate": 1.6202051107711486e-05, "loss": 1.1838, "step": 60470 }, { "epoch": 10.280469148393676, "grad_norm": 12.620284080505371, "learning_rate": 1.619921808601054e-05, "loss": 1.2362, "step": 60480 }, { "epoch": 10.282168961414245, "grad_norm": 40.27890396118164, "learning_rate": 1.6196385064309593e-05, "loss": 1.1921, "step": 60490 }, { "epoch": 10.283868774434811, "grad_norm": 16.18003273010254, "learning_rate": 1.619355204260865e-05, "loss": 1.1929, "step": 60500 }, { "epoch": 10.28556858745538, "grad_norm": 17.588350296020508, "learning_rate": 1.61907190209077e-05, "loss": 1.2253, "step": 60510 }, { "epoch": 10.287268400475948, "grad_norm": 14.830187797546387, "learning_rate": 1.6187885999206753e-05, "loss": 1.0181, "step": 60520 }, { "epoch": 10.288968213496515, "grad_norm": 13.506171226501465, "learning_rate": 1.618505297750581e-05, "loss": 1.2553, "step": 60530 }, { "epoch": 10.290668026517084, "grad_norm": 12.85332202911377, "learning_rate": 1.6182219955804864e-05, "loss": 1.1381, "step": 60540 }, { "epoch": 10.29236783953765, "grad_norm": 13.020970344543457, "learning_rate": 1.6179386934103914e-05, "loss": 1.0913, "step": 60550 }, { "epoch": 10.294067652558219, "grad_norm": 12.878240585327148, "learning_rate": 1.617655391240297e-05, "loss": 1.1608, "step": 60560 }, { "epoch": 10.295767465578786, "grad_norm": 20.55738067626953, "learning_rate": 1.6173720890702024e-05, "loss": 1.2264, "step": 60570 }, { "epoch": 10.297467278599354, "grad_norm": 17.51419448852539, "learning_rate": 1.6170887869001074e-05, "loss": 1.2686, "step": 60580 }, { "epoch": 10.299167091619921, "grad_norm": 15.833996772766113, "learning_rate": 1.616805484730013e-05, "loss": 1.1851, "step": 60590 }, { "epoch": 10.30086690464049, "grad_norm": 11.127851486206055, "learning_rate": 1.6165221825599185e-05, "loss": 1.4605, "step": 60600 }, { "epoch": 10.302566717661056, "grad_norm": 14.221062660217285, "learning_rate": 1.616238880389824e-05, "loss": 1.2583, "step": 60610 }, { "epoch": 10.304266530681625, "grad_norm": 14.12609577178955, "learning_rate": 1.6159555782197292e-05, "loss": 1.1397, "step": 60620 }, { "epoch": 10.305966343702194, "grad_norm": 11.791670799255371, "learning_rate": 1.6156722760496345e-05, "loss": 1.203, "step": 60630 }, { "epoch": 10.30766615672276, "grad_norm": 16.076622009277344, "learning_rate": 1.61538897387954e-05, "loss": 1.394, "step": 60640 }, { "epoch": 10.309365969743329, "grad_norm": 14.480888366699219, "learning_rate": 1.6151056717094456e-05, "loss": 1.1528, "step": 60650 }, { "epoch": 10.311065782763896, "grad_norm": 19.98672866821289, "learning_rate": 1.6148223695393506e-05, "loss": 1.112, "step": 60660 }, { "epoch": 10.312765595784464, "grad_norm": 10.960176467895508, "learning_rate": 1.614539067369256e-05, "loss": 1.0702, "step": 60670 }, { "epoch": 10.314465408805031, "grad_norm": 22.65536117553711, "learning_rate": 1.6142557651991616e-05, "loss": 1.2313, "step": 60680 }, { "epoch": 10.3161652218256, "grad_norm": 14.1158447265625, "learning_rate": 1.613972463029067e-05, "loss": 1.2358, "step": 60690 }, { "epoch": 10.317865034846166, "grad_norm": 11.119468688964844, "learning_rate": 1.613689160858972e-05, "loss": 1.2913, "step": 60700 }, { "epoch": 10.319564847866735, "grad_norm": 20.210525512695312, "learning_rate": 1.6134058586888777e-05, "loss": 1.3655, "step": 60710 }, { "epoch": 10.321264660887302, "grad_norm": 9.798675537109375, "learning_rate": 1.613122556518783e-05, "loss": 1.0526, "step": 60720 }, { "epoch": 10.32296447390787, "grad_norm": 15.946310997009277, "learning_rate": 1.6128392543486884e-05, "loss": 1.1064, "step": 60730 }, { "epoch": 10.324664286928439, "grad_norm": 15.951305389404297, "learning_rate": 1.6125559521785938e-05, "loss": 1.3178, "step": 60740 }, { "epoch": 10.326364099949005, "grad_norm": 33.806190490722656, "learning_rate": 1.612272650008499e-05, "loss": 1.0511, "step": 60750 }, { "epoch": 10.328063912969574, "grad_norm": 15.326312065124512, "learning_rate": 1.6119893478384045e-05, "loss": 1.1252, "step": 60760 }, { "epoch": 10.32976372599014, "grad_norm": 11.954800605773926, "learning_rate": 1.6117060456683098e-05, "loss": 1.1492, "step": 60770 }, { "epoch": 10.33146353901071, "grad_norm": 12.419811248779297, "learning_rate": 1.611422743498215e-05, "loss": 1.2392, "step": 60780 }, { "epoch": 10.333163352031276, "grad_norm": 22.341856002807617, "learning_rate": 1.6111394413281205e-05, "loss": 1.137, "step": 60790 }, { "epoch": 10.334863165051845, "grad_norm": 16.44615364074707, "learning_rate": 1.6108561391580262e-05, "loss": 1.3076, "step": 60800 }, { "epoch": 10.336562978072411, "grad_norm": 13.057441711425781, "learning_rate": 1.6105728369879312e-05, "loss": 1.0617, "step": 60810 }, { "epoch": 10.33826279109298, "grad_norm": 15.433563232421875, "learning_rate": 1.6102895348178366e-05, "loss": 1.4175, "step": 60820 }, { "epoch": 10.339962604113548, "grad_norm": 36.84077453613281, "learning_rate": 1.6100062326477423e-05, "loss": 1.2601, "step": 60830 }, { "epoch": 10.341662417134115, "grad_norm": 44.793880462646484, "learning_rate": 1.6097229304776476e-05, "loss": 1.2707, "step": 60840 }, { "epoch": 10.343362230154684, "grad_norm": 23.62372398376465, "learning_rate": 1.6094396283075526e-05, "loss": 1.125, "step": 60850 }, { "epoch": 10.34506204317525, "grad_norm": 14.030491828918457, "learning_rate": 1.6091563261374583e-05, "loss": 1.1557, "step": 60860 }, { "epoch": 10.346761856195819, "grad_norm": 12.062076568603516, "learning_rate": 1.6088730239673637e-05, "loss": 1.3749, "step": 60870 }, { "epoch": 10.348461669216386, "grad_norm": 16.281373977661133, "learning_rate": 1.608589721797269e-05, "loss": 1.3495, "step": 60880 }, { "epoch": 10.350161482236954, "grad_norm": 16.02005958557129, "learning_rate": 1.6083064196271744e-05, "loss": 1.0578, "step": 60890 }, { "epoch": 10.351861295257521, "grad_norm": 12.695043563842773, "learning_rate": 1.6080231174570797e-05, "loss": 1.3649, "step": 60900 }, { "epoch": 10.35356110827809, "grad_norm": 14.463683128356934, "learning_rate": 1.607739815286985e-05, "loss": 1.4929, "step": 60910 }, { "epoch": 10.355260921298656, "grad_norm": 12.930453300476074, "learning_rate": 1.6074565131168908e-05, "loss": 1.2329, "step": 60920 }, { "epoch": 10.356960734319225, "grad_norm": 14.505942344665527, "learning_rate": 1.6071732109467958e-05, "loss": 1.2811, "step": 60930 }, { "epoch": 10.358660547339793, "grad_norm": 13.949569702148438, "learning_rate": 1.606889908776701e-05, "loss": 1.1365, "step": 60940 }, { "epoch": 10.36036036036036, "grad_norm": 15.243180274963379, "learning_rate": 1.6066066066066068e-05, "loss": 1.1958, "step": 60950 }, { "epoch": 10.362060173380929, "grad_norm": 15.352099418640137, "learning_rate": 1.606323304436512e-05, "loss": 1.1876, "step": 60960 }, { "epoch": 10.363759986401496, "grad_norm": 21.07232666015625, "learning_rate": 1.6060400022664172e-05, "loss": 1.2355, "step": 60970 }, { "epoch": 10.365459799422064, "grad_norm": 19.498882293701172, "learning_rate": 1.605756700096323e-05, "loss": 1.1362, "step": 60980 }, { "epoch": 10.36715961244263, "grad_norm": 11.403948783874512, "learning_rate": 1.6054733979262282e-05, "loss": 1.2817, "step": 60990 }, { "epoch": 10.3688594254632, "grad_norm": 13.880785942077637, "learning_rate": 1.6051900957561333e-05, "loss": 1.251, "step": 61000 }, { "epoch": 10.370559238483766, "grad_norm": 13.041522979736328, "learning_rate": 1.604906793586039e-05, "loss": 1.2622, "step": 61010 }, { "epoch": 10.372259051504335, "grad_norm": 11.62718677520752, "learning_rate": 1.6046234914159443e-05, "loss": 1.0976, "step": 61020 }, { "epoch": 10.373958864524901, "grad_norm": 19.339200973510742, "learning_rate": 1.6043401892458496e-05, "loss": 1.113, "step": 61030 }, { "epoch": 10.37565867754547, "grad_norm": 27.742595672607422, "learning_rate": 1.604056887075755e-05, "loss": 1.2472, "step": 61040 }, { "epoch": 10.377358490566039, "grad_norm": 15.082010269165039, "learning_rate": 1.6037735849056604e-05, "loss": 1.1708, "step": 61050 }, { "epoch": 10.379058303586605, "grad_norm": 14.274876594543457, "learning_rate": 1.6034902827355657e-05, "loss": 1.3543, "step": 61060 }, { "epoch": 10.380758116607174, "grad_norm": 21.531848907470703, "learning_rate": 1.6032069805654714e-05, "loss": 1.1269, "step": 61070 }, { "epoch": 10.38245792962774, "grad_norm": 14.348944664001465, "learning_rate": 1.6029236783953764e-05, "loss": 1.3166, "step": 61080 }, { "epoch": 10.38415774264831, "grad_norm": 19.53386688232422, "learning_rate": 1.6026403762252818e-05, "loss": 1.1459, "step": 61090 }, { "epoch": 10.385857555668876, "grad_norm": 11.972494125366211, "learning_rate": 1.6023570740551875e-05, "loss": 1.2546, "step": 61100 }, { "epoch": 10.387557368689444, "grad_norm": 14.091147422790527, "learning_rate": 1.6020737718850925e-05, "loss": 1.1851, "step": 61110 }, { "epoch": 10.389257181710011, "grad_norm": 14.60051441192627, "learning_rate": 1.6017904697149978e-05, "loss": 1.1249, "step": 61120 }, { "epoch": 10.39095699473058, "grad_norm": 15.20007610321045, "learning_rate": 1.6015071675449035e-05, "loss": 1.1955, "step": 61130 }, { "epoch": 10.392656807751147, "grad_norm": 11.833115577697754, "learning_rate": 1.601223865374809e-05, "loss": 1.0741, "step": 61140 }, { "epoch": 10.394356620771715, "grad_norm": 13.509215354919434, "learning_rate": 1.6009405632047142e-05, "loss": 1.0865, "step": 61150 }, { "epoch": 10.396056433792284, "grad_norm": 15.021053314208984, "learning_rate": 1.6006572610346196e-05, "loss": 1.3398, "step": 61160 }, { "epoch": 10.39775624681285, "grad_norm": 14.099637031555176, "learning_rate": 1.600373958864525e-05, "loss": 1.2606, "step": 61170 }, { "epoch": 10.399456059833419, "grad_norm": 25.27992057800293, "learning_rate": 1.6000906566944306e-05, "loss": 1.3949, "step": 61180 }, { "epoch": 10.401155872853986, "grad_norm": 20.488393783569336, "learning_rate": 1.5998073545243356e-05, "loss": 1.2328, "step": 61190 }, { "epoch": 10.402855685874554, "grad_norm": 17.917367935180664, "learning_rate": 1.599524052354241e-05, "loss": 1.183, "step": 61200 }, { "epoch": 10.404555498895121, "grad_norm": 15.506739616394043, "learning_rate": 1.5992407501841467e-05, "loss": 1.2037, "step": 61210 }, { "epoch": 10.40625531191569, "grad_norm": 16.630117416381836, "learning_rate": 1.598957448014052e-05, "loss": 1.2018, "step": 61220 }, { "epoch": 10.407955124936256, "grad_norm": 12.289257049560547, "learning_rate": 1.598674145843957e-05, "loss": 1.3592, "step": 61230 }, { "epoch": 10.409654937956825, "grad_norm": 17.1683406829834, "learning_rate": 1.5983908436738627e-05, "loss": 1.2695, "step": 61240 }, { "epoch": 10.411354750977392, "grad_norm": 13.17637825012207, "learning_rate": 1.598107541503768e-05, "loss": 1.1422, "step": 61250 }, { "epoch": 10.41305456399796, "grad_norm": 18.541332244873047, "learning_rate": 1.5978242393336734e-05, "loss": 1.1749, "step": 61260 }, { "epoch": 10.414754377018529, "grad_norm": 11.390335083007812, "learning_rate": 1.5975409371635788e-05, "loss": 1.03, "step": 61270 }, { "epoch": 10.416454190039095, "grad_norm": 14.312213897705078, "learning_rate": 1.597257634993484e-05, "loss": 1.1528, "step": 61280 }, { "epoch": 10.418154003059664, "grad_norm": 13.983818054199219, "learning_rate": 1.5969743328233895e-05, "loss": 1.3412, "step": 61290 }, { "epoch": 10.41985381608023, "grad_norm": 9.718522071838379, "learning_rate": 1.596691030653295e-05, "loss": 1.1527, "step": 61300 }, { "epoch": 10.4215536291008, "grad_norm": 12.522194862365723, "learning_rate": 1.5964077284832002e-05, "loss": 1.0898, "step": 61310 }, { "epoch": 10.423253442121366, "grad_norm": 14.342066764831543, "learning_rate": 1.5961244263131055e-05, "loss": 1.1641, "step": 61320 }, { "epoch": 10.424953255141935, "grad_norm": 19.59847640991211, "learning_rate": 1.5958411241430112e-05, "loss": 1.2888, "step": 61330 }, { "epoch": 10.426653068162501, "grad_norm": 13.494855880737305, "learning_rate": 1.5955578219729162e-05, "loss": 1.1678, "step": 61340 }, { "epoch": 10.42835288118307, "grad_norm": 14.445388793945312, "learning_rate": 1.5952745198028216e-05, "loss": 1.0336, "step": 61350 }, { "epoch": 10.430052694203638, "grad_norm": 13.120495796203613, "learning_rate": 1.5949912176327273e-05, "loss": 1.283, "step": 61360 }, { "epoch": 10.431752507224205, "grad_norm": 14.338784217834473, "learning_rate": 1.5947079154626326e-05, "loss": 1.0599, "step": 61370 }, { "epoch": 10.433452320244774, "grad_norm": 14.976664543151855, "learning_rate": 1.5944246132925377e-05, "loss": 1.1547, "step": 61380 }, { "epoch": 10.43515213326534, "grad_norm": 14.885446548461914, "learning_rate": 1.5941413111224433e-05, "loss": 1.1546, "step": 61390 }, { "epoch": 10.436851946285909, "grad_norm": 15.290672302246094, "learning_rate": 1.5938580089523487e-05, "loss": 1.3862, "step": 61400 }, { "epoch": 10.438551759306476, "grad_norm": 12.499482154846191, "learning_rate": 1.593574706782254e-05, "loss": 1.2441, "step": 61410 }, { "epoch": 10.440251572327044, "grad_norm": 12.329859733581543, "learning_rate": 1.5932914046121594e-05, "loss": 1.2581, "step": 61420 }, { "epoch": 10.441951385347611, "grad_norm": 12.663945198059082, "learning_rate": 1.5930081024420648e-05, "loss": 1.1666, "step": 61430 }, { "epoch": 10.44365119836818, "grad_norm": 17.506771087646484, "learning_rate": 1.59272480027197e-05, "loss": 1.2285, "step": 61440 }, { "epoch": 10.445351011388746, "grad_norm": 17.738468170166016, "learning_rate": 1.5924414981018755e-05, "loss": 1.2581, "step": 61450 }, { "epoch": 10.447050824409315, "grad_norm": 12.878238677978516, "learning_rate": 1.5921581959317808e-05, "loss": 1.1562, "step": 61460 }, { "epoch": 10.448750637429884, "grad_norm": 17.59827995300293, "learning_rate": 1.591874893761686e-05, "loss": 1.1544, "step": 61470 }, { "epoch": 10.45045045045045, "grad_norm": 15.875847816467285, "learning_rate": 1.591591591591592e-05, "loss": 1.4053, "step": 61480 }, { "epoch": 10.452150263471019, "grad_norm": 14.153159141540527, "learning_rate": 1.591308289421497e-05, "loss": 1.1191, "step": 61490 }, { "epoch": 10.453850076491586, "grad_norm": 15.138839721679688, "learning_rate": 1.5910249872514022e-05, "loss": 1.2403, "step": 61500 }, { "epoch": 10.455549889512154, "grad_norm": 17.78380012512207, "learning_rate": 1.590741685081308e-05, "loss": 1.183, "step": 61510 }, { "epoch": 10.457249702532721, "grad_norm": 14.539024353027344, "learning_rate": 1.5904583829112133e-05, "loss": 0.973, "step": 61520 }, { "epoch": 10.45894951555329, "grad_norm": 15.042924880981445, "learning_rate": 1.5901750807411183e-05, "loss": 1.1282, "step": 61530 }, { "epoch": 10.460649328573856, "grad_norm": 19.562477111816406, "learning_rate": 1.589891778571024e-05, "loss": 1.277, "step": 61540 }, { "epoch": 10.462349141594425, "grad_norm": 19.421878814697266, "learning_rate": 1.5896084764009293e-05, "loss": 1.2727, "step": 61550 }, { "epoch": 10.464048954614992, "grad_norm": 18.122127532958984, "learning_rate": 1.5893251742308347e-05, "loss": 1.0434, "step": 61560 }, { "epoch": 10.46574876763556, "grad_norm": 16.013036727905273, "learning_rate": 1.58904187206074e-05, "loss": 1.3394, "step": 61570 }, { "epoch": 10.467448580656129, "grad_norm": 16.35121726989746, "learning_rate": 1.5887585698906454e-05, "loss": 1.3042, "step": 61580 }, { "epoch": 10.469148393676695, "grad_norm": 15.059036254882812, "learning_rate": 1.5884752677205507e-05, "loss": 1.0031, "step": 61590 }, { "epoch": 10.470848206697264, "grad_norm": 21.477802276611328, "learning_rate": 1.5881919655504564e-05, "loss": 1.1283, "step": 61600 }, { "epoch": 10.47254801971783, "grad_norm": 11.455619812011719, "learning_rate": 1.5879086633803614e-05, "loss": 1.3006, "step": 61610 }, { "epoch": 10.4742478327384, "grad_norm": 15.347251892089844, "learning_rate": 1.5876253612102668e-05, "loss": 1.1119, "step": 61620 }, { "epoch": 10.475947645758966, "grad_norm": 13.291887283325195, "learning_rate": 1.5873420590401725e-05, "loss": 1.099, "step": 61630 }, { "epoch": 10.477647458779535, "grad_norm": 14.423088073730469, "learning_rate": 1.5870587568700775e-05, "loss": 1.3394, "step": 61640 }, { "epoch": 10.479347271800101, "grad_norm": 11.574652671813965, "learning_rate": 1.586775454699983e-05, "loss": 1.0731, "step": 61650 }, { "epoch": 10.48104708482067, "grad_norm": 20.317474365234375, "learning_rate": 1.5864921525298885e-05, "loss": 1.1658, "step": 61660 }, { "epoch": 10.482746897841238, "grad_norm": 17.586891174316406, "learning_rate": 1.586208850359794e-05, "loss": 1.3817, "step": 61670 }, { "epoch": 10.484446710861805, "grad_norm": 11.905977249145508, "learning_rate": 1.585925548189699e-05, "loss": 1.4462, "step": 61680 }, { "epoch": 10.486146523882374, "grad_norm": 14.399643898010254, "learning_rate": 1.5856422460196046e-05, "loss": 1.3365, "step": 61690 }, { "epoch": 10.48784633690294, "grad_norm": 17.252059936523438, "learning_rate": 1.58535894384951e-05, "loss": 1.2772, "step": 61700 }, { "epoch": 10.489546149923509, "grad_norm": 21.176334381103516, "learning_rate": 1.5850756416794153e-05, "loss": 1.1742, "step": 61710 }, { "epoch": 10.491245962944076, "grad_norm": 10.339421272277832, "learning_rate": 1.5847923395093206e-05, "loss": 1.0481, "step": 61720 }, { "epoch": 10.492945775964644, "grad_norm": 14.053982734680176, "learning_rate": 1.584509037339226e-05, "loss": 1.2938, "step": 61730 }, { "epoch": 10.494645588985211, "grad_norm": 15.935629844665527, "learning_rate": 1.5842257351691313e-05, "loss": 1.3655, "step": 61740 }, { "epoch": 10.49634540200578, "grad_norm": 20.100263595581055, "learning_rate": 1.583942432999037e-05, "loss": 1.2084, "step": 61750 }, { "epoch": 10.498045215026346, "grad_norm": 34.756221771240234, "learning_rate": 1.583659130828942e-05, "loss": 1.1936, "step": 61760 }, { "epoch": 10.499745028046915, "grad_norm": 18.995494842529297, "learning_rate": 1.5833758286588474e-05, "loss": 1.3783, "step": 61770 }, { "epoch": 10.501444841067482, "grad_norm": 8.9885835647583, "learning_rate": 1.583092526488753e-05, "loss": 1.0997, "step": 61780 }, { "epoch": 10.50314465408805, "grad_norm": 18.1671085357666, "learning_rate": 1.582809224318658e-05, "loss": 1.1432, "step": 61790 }, { "epoch": 10.504844467108619, "grad_norm": 13.761608123779297, "learning_rate": 1.5825259221485635e-05, "loss": 1.2721, "step": 61800 }, { "epoch": 10.506544280129186, "grad_norm": 14.257441520690918, "learning_rate": 1.582242619978469e-05, "loss": 1.0954, "step": 61810 }, { "epoch": 10.508244093149754, "grad_norm": 17.744558334350586, "learning_rate": 1.5819593178083745e-05, "loss": 1.0836, "step": 61820 }, { "epoch": 10.50994390617032, "grad_norm": 13.959546089172363, "learning_rate": 1.5816760156382795e-05, "loss": 1.1937, "step": 61830 }, { "epoch": 10.51164371919089, "grad_norm": 13.655734062194824, "learning_rate": 1.5813927134681852e-05, "loss": 1.4759, "step": 61840 }, { "epoch": 10.513343532211456, "grad_norm": 14.103080749511719, "learning_rate": 1.5811094112980906e-05, "loss": 1.316, "step": 61850 }, { "epoch": 10.515043345232025, "grad_norm": 16.0054988861084, "learning_rate": 1.580826109127996e-05, "loss": 1.1567, "step": 61860 }, { "epoch": 10.516743158252591, "grad_norm": 12.485234260559082, "learning_rate": 1.5805428069579013e-05, "loss": 1.2123, "step": 61870 }, { "epoch": 10.51844297127316, "grad_norm": 12.556991577148438, "learning_rate": 1.5802595047878066e-05, "loss": 1.1608, "step": 61880 }, { "epoch": 10.520142784293729, "grad_norm": 15.19385051727295, "learning_rate": 1.5799762026177123e-05, "loss": 1.3991, "step": 61890 }, { "epoch": 10.521842597314295, "grad_norm": 21.42726707458496, "learning_rate": 1.5796929004476177e-05, "loss": 1.2608, "step": 61900 }, { "epoch": 10.523542410334864, "grad_norm": 13.380428314208984, "learning_rate": 1.5794095982775227e-05, "loss": 1.2639, "step": 61910 }, { "epoch": 10.52524222335543, "grad_norm": 18.49765396118164, "learning_rate": 1.5791262961074284e-05, "loss": 1.1403, "step": 61920 }, { "epoch": 10.526942036376, "grad_norm": 18.427597045898438, "learning_rate": 1.5788429939373337e-05, "loss": 1.1451, "step": 61930 }, { "epoch": 10.528641849396566, "grad_norm": 17.219791412353516, "learning_rate": 1.578559691767239e-05, "loss": 1.1033, "step": 61940 }, { "epoch": 10.530341662417134, "grad_norm": 22.03217887878418, "learning_rate": 1.5782763895971444e-05, "loss": 1.4279, "step": 61950 }, { "epoch": 10.532041475437701, "grad_norm": 16.062931060791016, "learning_rate": 1.5779930874270498e-05, "loss": 1.2629, "step": 61960 }, { "epoch": 10.53374128845827, "grad_norm": 13.283356666564941, "learning_rate": 1.577709785256955e-05, "loss": 1.2937, "step": 61970 }, { "epoch": 10.535441101478837, "grad_norm": 18.777265548706055, "learning_rate": 1.5774264830868605e-05, "loss": 1.3538, "step": 61980 }, { "epoch": 10.537140914499405, "grad_norm": 15.782849311828613, "learning_rate": 1.5771431809167658e-05, "loss": 1.2563, "step": 61990 }, { "epoch": 10.538840727519974, "grad_norm": 13.46397876739502, "learning_rate": 1.5768598787466712e-05, "loss": 1.2276, "step": 62000 }, { "epoch": 10.54054054054054, "grad_norm": 15.03705883026123, "learning_rate": 1.576576576576577e-05, "loss": 1.1017, "step": 62010 }, { "epoch": 10.542240353561109, "grad_norm": 14.718892097473145, "learning_rate": 1.576293274406482e-05, "loss": 1.2899, "step": 62020 }, { "epoch": 10.543940166581676, "grad_norm": 17.851428985595703, "learning_rate": 1.5760099722363872e-05, "loss": 1.1618, "step": 62030 }, { "epoch": 10.545639979602244, "grad_norm": 19.61429786682129, "learning_rate": 1.575726670066293e-05, "loss": 1.3511, "step": 62040 }, { "epoch": 10.547339792622811, "grad_norm": 16.324594497680664, "learning_rate": 1.5754433678961983e-05, "loss": 1.2013, "step": 62050 }, { "epoch": 10.54903960564338, "grad_norm": 14.38100528717041, "learning_rate": 1.5751600657261033e-05, "loss": 1.3177, "step": 62060 }, { "epoch": 10.550739418663946, "grad_norm": 15.223631858825684, "learning_rate": 1.574876763556009e-05, "loss": 1.0269, "step": 62070 }, { "epoch": 10.552439231684515, "grad_norm": 20.404081344604492, "learning_rate": 1.5745934613859143e-05, "loss": 1.3297, "step": 62080 }, { "epoch": 10.554139044705082, "grad_norm": 11.427043914794922, "learning_rate": 1.5743101592158197e-05, "loss": 1.18, "step": 62090 }, { "epoch": 10.55583885772565, "grad_norm": 12.301542282104492, "learning_rate": 1.574026857045725e-05, "loss": 1.3007, "step": 62100 }, { "epoch": 10.557538670746219, "grad_norm": 23.844444274902344, "learning_rate": 1.5737435548756304e-05, "loss": 1.221, "step": 62110 }, { "epoch": 10.559238483766785, "grad_norm": 12.91081714630127, "learning_rate": 1.5734602527055357e-05, "loss": 1.5302, "step": 62120 }, { "epoch": 10.560938296787354, "grad_norm": 15.515172958374023, "learning_rate": 1.5731769505354414e-05, "loss": 1.2417, "step": 62130 }, { "epoch": 10.56263810980792, "grad_norm": 20.426105499267578, "learning_rate": 1.5728936483653465e-05, "loss": 1.0953, "step": 62140 }, { "epoch": 10.56433792282849, "grad_norm": 16.673561096191406, "learning_rate": 1.5726103461952518e-05, "loss": 1.201, "step": 62150 }, { "epoch": 10.566037735849056, "grad_norm": 15.649469375610352, "learning_rate": 1.5723270440251575e-05, "loss": 1.1819, "step": 62160 }, { "epoch": 10.567737548869625, "grad_norm": 9.72276782989502, "learning_rate": 1.5720437418550625e-05, "loss": 1.1579, "step": 62170 }, { "epoch": 10.569437361890191, "grad_norm": 12.397133827209473, "learning_rate": 1.571760439684968e-05, "loss": 1.2601, "step": 62180 }, { "epoch": 10.57113717491076, "grad_norm": 20.30237579345703, "learning_rate": 1.5714771375148735e-05, "loss": 1.152, "step": 62190 }, { "epoch": 10.572836987931328, "grad_norm": 22.444326400756836, "learning_rate": 1.571193835344779e-05, "loss": 1.4792, "step": 62200 }, { "epoch": 10.574536800951895, "grad_norm": 17.005651473999023, "learning_rate": 1.570910533174684e-05, "loss": 1.3821, "step": 62210 }, { "epoch": 10.576236613972464, "grad_norm": 41.75814437866211, "learning_rate": 1.5706272310045896e-05, "loss": 1.21, "step": 62220 }, { "epoch": 10.57793642699303, "grad_norm": 14.371267318725586, "learning_rate": 1.570343928834495e-05, "loss": 1.2702, "step": 62230 }, { "epoch": 10.579636240013599, "grad_norm": 13.769341468811035, "learning_rate": 1.5700606266644003e-05, "loss": 1.3371, "step": 62240 }, { "epoch": 10.581336053034166, "grad_norm": 12.75920581817627, "learning_rate": 1.5697773244943057e-05, "loss": 1.3122, "step": 62250 }, { "epoch": 10.583035866054734, "grad_norm": 17.71028709411621, "learning_rate": 1.569494022324211e-05, "loss": 1.14, "step": 62260 }, { "epoch": 10.584735679075301, "grad_norm": 14.631596565246582, "learning_rate": 1.5692107201541164e-05, "loss": 1.3046, "step": 62270 }, { "epoch": 10.58643549209587, "grad_norm": 12.100140571594238, "learning_rate": 1.568927417984022e-05, "loss": 1.2412, "step": 62280 }, { "epoch": 10.588135305116436, "grad_norm": 21.121318817138672, "learning_rate": 1.568644115813927e-05, "loss": 1.332, "step": 62290 }, { "epoch": 10.589835118137005, "grad_norm": 17.903249740600586, "learning_rate": 1.5683608136438324e-05, "loss": 1.4524, "step": 62300 }, { "epoch": 10.591534931157573, "grad_norm": 17.122032165527344, "learning_rate": 1.568077511473738e-05, "loss": 1.1571, "step": 62310 }, { "epoch": 10.59323474417814, "grad_norm": 14.012688636779785, "learning_rate": 1.567794209303643e-05, "loss": 1.1363, "step": 62320 }, { "epoch": 10.594934557198709, "grad_norm": 18.61918067932129, "learning_rate": 1.5675109071335485e-05, "loss": 1.0166, "step": 62330 }, { "epoch": 10.596634370219276, "grad_norm": 17.815082550048828, "learning_rate": 1.5672276049634542e-05, "loss": 1.3219, "step": 62340 }, { "epoch": 10.598334183239844, "grad_norm": 15.736310958862305, "learning_rate": 1.5669443027933595e-05, "loss": 1.1235, "step": 62350 }, { "epoch": 10.60003399626041, "grad_norm": 13.663670539855957, "learning_rate": 1.5666610006232645e-05, "loss": 1.1031, "step": 62360 }, { "epoch": 10.60173380928098, "grad_norm": 15.540471076965332, "learning_rate": 1.5663776984531702e-05, "loss": 1.087, "step": 62370 }, { "epoch": 10.603433622301546, "grad_norm": 11.939882278442383, "learning_rate": 1.5660943962830756e-05, "loss": 1.2504, "step": 62380 }, { "epoch": 10.605133435322115, "grad_norm": 15.10948657989502, "learning_rate": 1.565811094112981e-05, "loss": 1.2209, "step": 62390 }, { "epoch": 10.606833248342681, "grad_norm": 16.318727493286133, "learning_rate": 1.5655277919428863e-05, "loss": 1.1992, "step": 62400 }, { "epoch": 10.60853306136325, "grad_norm": 15.21472454071045, "learning_rate": 1.5652444897727916e-05, "loss": 1.2393, "step": 62410 }, { "epoch": 10.610232874383819, "grad_norm": 16.09611701965332, "learning_rate": 1.564961187602697e-05, "loss": 1.5065, "step": 62420 }, { "epoch": 10.611932687404385, "grad_norm": 14.80422592163086, "learning_rate": 1.5646778854326027e-05, "loss": 1.3902, "step": 62430 }, { "epoch": 10.613632500424954, "grad_norm": 15.505000114440918, "learning_rate": 1.5643945832625077e-05, "loss": 1.1348, "step": 62440 }, { "epoch": 10.61533231344552, "grad_norm": 38.87553787231445, "learning_rate": 1.564111281092413e-05, "loss": 1.2373, "step": 62450 }, { "epoch": 10.61703212646609, "grad_norm": 15.638507843017578, "learning_rate": 1.5638279789223187e-05, "loss": 1.1166, "step": 62460 }, { "epoch": 10.618731939486656, "grad_norm": 16.125980377197266, "learning_rate": 1.563544676752224e-05, "loss": 1.3883, "step": 62470 }, { "epoch": 10.620431752507224, "grad_norm": 11.397339820861816, "learning_rate": 1.563261374582129e-05, "loss": 1.2405, "step": 62480 }, { "epoch": 10.622131565527791, "grad_norm": 14.928251266479492, "learning_rate": 1.5629780724120348e-05, "loss": 1.1876, "step": 62490 }, { "epoch": 10.62383137854836, "grad_norm": 14.68750286102295, "learning_rate": 1.56269477024194e-05, "loss": 1.3348, "step": 62500 }, { "epoch": 10.625531191568928, "grad_norm": 17.82771110534668, "learning_rate": 1.562411468071845e-05, "loss": 1.1988, "step": 62510 }, { "epoch": 10.627231004589495, "grad_norm": 15.914223670959473, "learning_rate": 1.562128165901751e-05, "loss": 0.9995, "step": 62520 }, { "epoch": 10.628930817610064, "grad_norm": 16.870155334472656, "learning_rate": 1.5618448637316562e-05, "loss": 1.1962, "step": 62530 }, { "epoch": 10.63063063063063, "grad_norm": 11.9865083694458, "learning_rate": 1.5615615615615616e-05, "loss": 1.2025, "step": 62540 }, { "epoch": 10.632330443651199, "grad_norm": 19.88804817199707, "learning_rate": 1.561278259391467e-05, "loss": 1.2907, "step": 62550 }, { "epoch": 10.634030256671766, "grad_norm": 19.793453216552734, "learning_rate": 1.5609949572213723e-05, "loss": 1.386, "step": 62560 }, { "epoch": 10.635730069692334, "grad_norm": 17.101150512695312, "learning_rate": 1.5607116550512776e-05, "loss": 1.1948, "step": 62570 }, { "epoch": 10.637429882712901, "grad_norm": 19.287353515625, "learning_rate": 1.5604283528811833e-05, "loss": 1.0788, "step": 62580 }, { "epoch": 10.63912969573347, "grad_norm": 13.726941108703613, "learning_rate": 1.5601450507110883e-05, "loss": 1.4841, "step": 62590 }, { "epoch": 10.640829508754036, "grad_norm": 20.41042709350586, "learning_rate": 1.559861748540994e-05, "loss": 1.0497, "step": 62600 }, { "epoch": 10.642529321774605, "grad_norm": 14.548630714416504, "learning_rate": 1.5595784463708994e-05, "loss": 1.1401, "step": 62610 }, { "epoch": 10.644229134795172, "grad_norm": 12.412419319152832, "learning_rate": 1.5592951442008047e-05, "loss": 1.1443, "step": 62620 }, { "epoch": 10.64592894781574, "grad_norm": 12.20893383026123, "learning_rate": 1.55901184203071e-05, "loss": 1.269, "step": 62630 }, { "epoch": 10.647628760836309, "grad_norm": 12.907522201538086, "learning_rate": 1.5587285398606154e-05, "loss": 1.0644, "step": 62640 }, { "epoch": 10.649328573856875, "grad_norm": 15.330998420715332, "learning_rate": 1.5584452376905208e-05, "loss": 1.2121, "step": 62650 }, { "epoch": 10.651028386877444, "grad_norm": 16.669357299804688, "learning_rate": 1.558161935520426e-05, "loss": 0.9833, "step": 62660 }, { "epoch": 10.65272819989801, "grad_norm": 13.300407409667969, "learning_rate": 1.5578786333503315e-05, "loss": 1.2224, "step": 62670 }, { "epoch": 10.65442801291858, "grad_norm": 18.696311950683594, "learning_rate": 1.5575953311802368e-05, "loss": 1.3177, "step": 62680 }, { "epoch": 10.656127825939146, "grad_norm": 16.18581771850586, "learning_rate": 1.5573120290101425e-05, "loss": 1.2186, "step": 62690 }, { "epoch": 10.657827638959715, "grad_norm": 16.251707077026367, "learning_rate": 1.5570287268400475e-05, "loss": 1.2346, "step": 62700 }, { "epoch": 10.659527451980281, "grad_norm": 13.573160171508789, "learning_rate": 1.556745424669953e-05, "loss": 1.3419, "step": 62710 }, { "epoch": 10.66122726500085, "grad_norm": 18.669570922851562, "learning_rate": 1.5564621224998586e-05, "loss": 1.0819, "step": 62720 }, { "epoch": 10.662927078021418, "grad_norm": 14.05413818359375, "learning_rate": 1.556178820329764e-05, "loss": 1.1972, "step": 62730 }, { "epoch": 10.664626891041985, "grad_norm": 13.392998695373535, "learning_rate": 1.555895518159669e-05, "loss": 1.1052, "step": 62740 }, { "epoch": 10.666326704062554, "grad_norm": 13.408005714416504, "learning_rate": 1.5556122159895746e-05, "loss": 1.1598, "step": 62750 }, { "epoch": 10.66802651708312, "grad_norm": 15.175870895385742, "learning_rate": 1.55532891381948e-05, "loss": 1.1776, "step": 62760 }, { "epoch": 10.669726330103689, "grad_norm": 16.6702880859375, "learning_rate": 1.5550456116493853e-05, "loss": 1.3566, "step": 62770 }, { "epoch": 10.671426143124256, "grad_norm": 16.350624084472656, "learning_rate": 1.5547623094792907e-05, "loss": 1.2222, "step": 62780 }, { "epoch": 10.673125956144824, "grad_norm": 18.54759979248047, "learning_rate": 1.554479007309196e-05, "loss": 1.4015, "step": 62790 }, { "epoch": 10.674825769165391, "grad_norm": 15.06496524810791, "learning_rate": 1.5541957051391014e-05, "loss": 1.0873, "step": 62800 }, { "epoch": 10.67652558218596, "grad_norm": 14.270665168762207, "learning_rate": 1.553912402969007e-05, "loss": 1.3549, "step": 62810 }, { "epoch": 10.678225395206526, "grad_norm": 12.226114273071289, "learning_rate": 1.553629100798912e-05, "loss": 1.1516, "step": 62820 }, { "epoch": 10.679925208227095, "grad_norm": 18.609329223632812, "learning_rate": 1.5533457986288174e-05, "loss": 1.4387, "step": 62830 }, { "epoch": 10.681625021247664, "grad_norm": 24.145702362060547, "learning_rate": 1.553062496458723e-05, "loss": 1.1653, "step": 62840 }, { "epoch": 10.68332483426823, "grad_norm": 16.1776065826416, "learning_rate": 1.552779194288628e-05, "loss": 1.2116, "step": 62850 }, { "epoch": 10.685024647288799, "grad_norm": 12.357538223266602, "learning_rate": 1.5524958921185335e-05, "loss": 1.2148, "step": 62860 }, { "epoch": 10.686724460309366, "grad_norm": 23.893770217895508, "learning_rate": 1.5522125899484392e-05, "loss": 1.0989, "step": 62870 }, { "epoch": 10.688424273329934, "grad_norm": 16.754459381103516, "learning_rate": 1.5519292877783445e-05, "loss": 1.535, "step": 62880 }, { "epoch": 10.690124086350501, "grad_norm": 13.4688138961792, "learning_rate": 1.5516459856082496e-05, "loss": 1.0964, "step": 62890 }, { "epoch": 10.69182389937107, "grad_norm": 16.10501480102539, "learning_rate": 1.5513626834381552e-05, "loss": 1.338, "step": 62900 }, { "epoch": 10.693523712391636, "grad_norm": 21.029043197631836, "learning_rate": 1.5510793812680606e-05, "loss": 1.2084, "step": 62910 }, { "epoch": 10.695223525412205, "grad_norm": 17.516921997070312, "learning_rate": 1.550796079097966e-05, "loss": 1.0378, "step": 62920 }, { "epoch": 10.696923338432772, "grad_norm": 17.20132064819336, "learning_rate": 1.5505127769278713e-05, "loss": 1.294, "step": 62930 }, { "epoch": 10.69862315145334, "grad_norm": 24.88850975036621, "learning_rate": 1.5502294747577767e-05, "loss": 1.1208, "step": 62940 }, { "epoch": 10.700322964473909, "grad_norm": 14.795145988464355, "learning_rate": 1.549946172587682e-05, "loss": 1.2946, "step": 62950 }, { "epoch": 10.702022777494475, "grad_norm": 14.77291488647461, "learning_rate": 1.5496628704175877e-05, "loss": 1.249, "step": 62960 }, { "epoch": 10.703722590515044, "grad_norm": 12.367280006408691, "learning_rate": 1.5493795682474927e-05, "loss": 1.3106, "step": 62970 }, { "epoch": 10.70542240353561, "grad_norm": 15.269190788269043, "learning_rate": 1.549096266077398e-05, "loss": 1.1739, "step": 62980 }, { "epoch": 10.70712221655618, "grad_norm": 16.434642791748047, "learning_rate": 1.5488129639073038e-05, "loss": 1.2323, "step": 62990 }, { "epoch": 10.708822029576746, "grad_norm": 17.50869369506836, "learning_rate": 1.548529661737209e-05, "loss": 1.0374, "step": 63000 }, { "epoch": 10.710521842597315, "grad_norm": 14.649151802062988, "learning_rate": 1.548246359567114e-05, "loss": 1.4869, "step": 63010 }, { "epoch": 10.712221655617881, "grad_norm": 30.802082061767578, "learning_rate": 1.5479630573970198e-05, "loss": 1.2055, "step": 63020 }, { "epoch": 10.71392146863845, "grad_norm": 26.64105987548828, "learning_rate": 1.547679755226925e-05, "loss": 1.162, "step": 63030 }, { "epoch": 10.715621281659018, "grad_norm": 14.975321769714355, "learning_rate": 1.5473964530568302e-05, "loss": 1.4423, "step": 63040 }, { "epoch": 10.717321094679585, "grad_norm": 13.882574081420898, "learning_rate": 1.547113150886736e-05, "loss": 1.2209, "step": 63050 }, { "epoch": 10.719020907700154, "grad_norm": 22.781349182128906, "learning_rate": 1.5468298487166412e-05, "loss": 1.2871, "step": 63060 }, { "epoch": 10.72072072072072, "grad_norm": 15.123802185058594, "learning_rate": 1.5465465465465466e-05, "loss": 1.242, "step": 63070 }, { "epoch": 10.722420533741289, "grad_norm": 12.122029304504395, "learning_rate": 1.546263244376452e-05, "loss": 1.2818, "step": 63080 }, { "epoch": 10.724120346761856, "grad_norm": 14.051079750061035, "learning_rate": 1.5459799422063573e-05, "loss": 1.2582, "step": 63090 }, { "epoch": 10.725820159782424, "grad_norm": 16.125629425048828, "learning_rate": 1.5456966400362626e-05, "loss": 1.1696, "step": 63100 }, { "epoch": 10.727519972802991, "grad_norm": 13.222824096679688, "learning_rate": 1.5454133378661683e-05, "loss": 1.1957, "step": 63110 }, { "epoch": 10.72921978582356, "grad_norm": 13.638066291809082, "learning_rate": 1.5451300356960733e-05, "loss": 1.1783, "step": 63120 }, { "epoch": 10.730919598844126, "grad_norm": 23.460020065307617, "learning_rate": 1.5448467335259787e-05, "loss": 1.1632, "step": 63130 }, { "epoch": 10.732619411864695, "grad_norm": 53.595306396484375, "learning_rate": 1.5445634313558844e-05, "loss": 1.2094, "step": 63140 }, { "epoch": 10.734319224885262, "grad_norm": 16.445127487182617, "learning_rate": 1.5442801291857897e-05, "loss": 1.3862, "step": 63150 }, { "epoch": 10.73601903790583, "grad_norm": 12.392659187316895, "learning_rate": 1.5439968270156947e-05, "loss": 1.0298, "step": 63160 }, { "epoch": 10.737718850926399, "grad_norm": 27.062376022338867, "learning_rate": 1.5437135248456004e-05, "loss": 1.3027, "step": 63170 }, { "epoch": 10.739418663946966, "grad_norm": 14.634697914123535, "learning_rate": 1.5434302226755058e-05, "loss": 1.2353, "step": 63180 }, { "epoch": 10.741118476967534, "grad_norm": 14.532214164733887, "learning_rate": 1.5431469205054108e-05, "loss": 1.3059, "step": 63190 }, { "epoch": 10.7428182899881, "grad_norm": 15.051984786987305, "learning_rate": 1.5428636183353165e-05, "loss": 1.2453, "step": 63200 }, { "epoch": 10.74451810300867, "grad_norm": 15.670448303222656, "learning_rate": 1.542580316165222e-05, "loss": 1.1123, "step": 63210 }, { "epoch": 10.746217916029236, "grad_norm": 17.421403884887695, "learning_rate": 1.5422970139951272e-05, "loss": 1.3641, "step": 63220 }, { "epoch": 10.747917729049805, "grad_norm": 18.915706634521484, "learning_rate": 1.5420137118250325e-05, "loss": 1.3933, "step": 63230 }, { "epoch": 10.749617542070371, "grad_norm": 11.997319221496582, "learning_rate": 1.541730409654938e-05, "loss": 1.3297, "step": 63240 }, { "epoch": 10.75131735509094, "grad_norm": 15.284042358398438, "learning_rate": 1.5414471074848433e-05, "loss": 1.4016, "step": 63250 }, { "epoch": 10.753017168111509, "grad_norm": 13.045676231384277, "learning_rate": 1.541163805314749e-05, "loss": 1.3732, "step": 63260 }, { "epoch": 10.754716981132075, "grad_norm": 21.7224063873291, "learning_rate": 1.540880503144654e-05, "loss": 1.0884, "step": 63270 }, { "epoch": 10.756416794152644, "grad_norm": 26.92788314819336, "learning_rate": 1.5405972009745593e-05, "loss": 1.1892, "step": 63280 }, { "epoch": 10.75811660717321, "grad_norm": 15.079318046569824, "learning_rate": 1.540313898804465e-05, "loss": 1.0781, "step": 63290 }, { "epoch": 10.75981642019378, "grad_norm": 19.775047302246094, "learning_rate": 1.5400305966343704e-05, "loss": 1.282, "step": 63300 }, { "epoch": 10.761516233214346, "grad_norm": 13.982219696044922, "learning_rate": 1.5397472944642754e-05, "loss": 1.3878, "step": 63310 }, { "epoch": 10.763216046234914, "grad_norm": 13.978723526000977, "learning_rate": 1.539463992294181e-05, "loss": 1.2997, "step": 63320 }, { "epoch": 10.764915859255481, "grad_norm": 13.97456169128418, "learning_rate": 1.5391806901240864e-05, "loss": 1.1393, "step": 63330 }, { "epoch": 10.76661567227605, "grad_norm": 16.108436584472656, "learning_rate": 1.5388973879539918e-05, "loss": 1.251, "step": 63340 }, { "epoch": 10.768315485296618, "grad_norm": 16.385488510131836, "learning_rate": 1.538614085783897e-05, "loss": 1.1218, "step": 63350 }, { "epoch": 10.770015298317185, "grad_norm": 15.083240509033203, "learning_rate": 1.5383307836138025e-05, "loss": 1.161, "step": 63360 }, { "epoch": 10.771715111337754, "grad_norm": 14.032764434814453, "learning_rate": 1.538047481443708e-05, "loss": 1.252, "step": 63370 }, { "epoch": 10.77341492435832, "grad_norm": 16.28240394592285, "learning_rate": 1.5377641792736132e-05, "loss": 1.2544, "step": 63380 }, { "epoch": 10.775114737378889, "grad_norm": 14.420799255371094, "learning_rate": 1.5374808771035185e-05, "loss": 1.1479, "step": 63390 }, { "epoch": 10.776814550399456, "grad_norm": 19.971752166748047, "learning_rate": 1.5371975749334242e-05, "loss": 1.265, "step": 63400 }, { "epoch": 10.778514363420024, "grad_norm": 14.802401542663574, "learning_rate": 1.5369142727633296e-05, "loss": 0.9046, "step": 63410 }, { "epoch": 10.780214176440591, "grad_norm": 14.180659294128418, "learning_rate": 1.5366309705932346e-05, "loss": 1.4619, "step": 63420 }, { "epoch": 10.78191398946116, "grad_norm": 16.2987003326416, "learning_rate": 1.5363476684231403e-05, "loss": 1.1283, "step": 63430 }, { "epoch": 10.783613802481726, "grad_norm": 15.165412902832031, "learning_rate": 1.5360643662530456e-05, "loss": 1.2148, "step": 63440 }, { "epoch": 10.785313615502295, "grad_norm": 14.122323989868164, "learning_rate": 1.535781064082951e-05, "loss": 1.1424, "step": 63450 }, { "epoch": 10.787013428522862, "grad_norm": 17.414478302001953, "learning_rate": 1.5354977619128563e-05, "loss": 1.2733, "step": 63460 }, { "epoch": 10.78871324154343, "grad_norm": 14.419354438781738, "learning_rate": 1.5352144597427617e-05, "loss": 1.2125, "step": 63470 }, { "epoch": 10.790413054563999, "grad_norm": 15.076556205749512, "learning_rate": 1.534931157572667e-05, "loss": 1.2896, "step": 63480 }, { "epoch": 10.792112867584565, "grad_norm": 17.311559677124023, "learning_rate": 1.5346478554025727e-05, "loss": 1.2741, "step": 63490 }, { "epoch": 10.793812680605134, "grad_norm": 11.39295482635498, "learning_rate": 1.5343645532324777e-05, "loss": 1.402, "step": 63500 }, { "epoch": 10.7955124936257, "grad_norm": 30.641637802124023, "learning_rate": 1.534081251062383e-05, "loss": 1.3003, "step": 63510 }, { "epoch": 10.79721230664627, "grad_norm": 15.100865364074707, "learning_rate": 1.5337979488922888e-05, "loss": 1.1802, "step": 63520 }, { "epoch": 10.798912119666836, "grad_norm": 19.009727478027344, "learning_rate": 1.5335146467221938e-05, "loss": 1.3545, "step": 63530 }, { "epoch": 10.800611932687405, "grad_norm": 15.257614135742188, "learning_rate": 1.533231344552099e-05, "loss": 1.3476, "step": 63540 }, { "epoch": 10.802311745707971, "grad_norm": 16.416608810424805, "learning_rate": 1.532948042382005e-05, "loss": 1.1379, "step": 63550 }, { "epoch": 10.80401155872854, "grad_norm": 15.363361358642578, "learning_rate": 1.5326647402119102e-05, "loss": 1.238, "step": 63560 }, { "epoch": 10.805711371749108, "grad_norm": 15.624592781066895, "learning_rate": 1.5323814380418152e-05, "loss": 1.1887, "step": 63570 }, { "epoch": 10.807411184769675, "grad_norm": 14.27706527709961, "learning_rate": 1.532098135871721e-05, "loss": 1.2049, "step": 63580 }, { "epoch": 10.809110997790244, "grad_norm": 15.011954307556152, "learning_rate": 1.5318148337016262e-05, "loss": 1.2275, "step": 63590 }, { "epoch": 10.81081081081081, "grad_norm": 13.573762893676758, "learning_rate": 1.5315315315315316e-05, "loss": 1.1226, "step": 63600 }, { "epoch": 10.812510623831379, "grad_norm": 13.785839080810547, "learning_rate": 1.531248229361437e-05, "loss": 1.3346, "step": 63610 }, { "epoch": 10.814210436851946, "grad_norm": 14.266877174377441, "learning_rate": 1.5309649271913423e-05, "loss": 1.3048, "step": 63620 }, { "epoch": 10.815910249872514, "grad_norm": 13.401729583740234, "learning_rate": 1.5306816250212477e-05, "loss": 1.2451, "step": 63630 }, { "epoch": 10.817610062893081, "grad_norm": 12.707308769226074, "learning_rate": 1.5303983228511533e-05, "loss": 1.4469, "step": 63640 }, { "epoch": 10.81930987591365, "grad_norm": 22.211774826049805, "learning_rate": 1.5301150206810584e-05, "loss": 1.1839, "step": 63650 }, { "epoch": 10.821009688934216, "grad_norm": 14.508798599243164, "learning_rate": 1.5298317185109637e-05, "loss": 1.1354, "step": 63660 }, { "epoch": 10.822709501954785, "grad_norm": 19.896394729614258, "learning_rate": 1.5295484163408694e-05, "loss": 1.2063, "step": 63670 }, { "epoch": 10.824409314975354, "grad_norm": 13.525336265563965, "learning_rate": 1.5292651141707748e-05, "loss": 1.1865, "step": 63680 }, { "epoch": 10.82610912799592, "grad_norm": 12.524323463439941, "learning_rate": 1.5289818120006798e-05, "loss": 1.2952, "step": 63690 }, { "epoch": 10.827808941016489, "grad_norm": 18.400157928466797, "learning_rate": 1.5286985098305855e-05, "loss": 0.8684, "step": 63700 }, { "epoch": 10.829508754037056, "grad_norm": 26.755380630493164, "learning_rate": 1.5284152076604908e-05, "loss": 1.3913, "step": 63710 }, { "epoch": 10.831208567057624, "grad_norm": 13.664006233215332, "learning_rate": 1.5281319054903958e-05, "loss": 1.2395, "step": 63720 }, { "epoch": 10.832908380078191, "grad_norm": 18.306415557861328, "learning_rate": 1.5278486033203015e-05, "loss": 1.1304, "step": 63730 }, { "epoch": 10.83460819309876, "grad_norm": 15.237955093383789, "learning_rate": 1.527565301150207e-05, "loss": 1.1978, "step": 63740 }, { "epoch": 10.836308006119326, "grad_norm": 15.527846336364746, "learning_rate": 1.5272819989801122e-05, "loss": 1.1502, "step": 63750 }, { "epoch": 10.838007819139895, "grad_norm": 15.833697319030762, "learning_rate": 1.5269986968100176e-05, "loss": 1.2431, "step": 63760 }, { "epoch": 10.839707632160462, "grad_norm": 11.446257591247559, "learning_rate": 1.526715394639923e-05, "loss": 1.0367, "step": 63770 }, { "epoch": 10.84140744518103, "grad_norm": 13.060930252075195, "learning_rate": 1.5264320924698283e-05, "loss": 1.2261, "step": 63780 }, { "epoch": 10.843107258201599, "grad_norm": 17.607439041137695, "learning_rate": 1.526148790299734e-05, "loss": 1.15, "step": 63790 }, { "epoch": 10.844807071222165, "grad_norm": 12.979783058166504, "learning_rate": 1.525865488129639e-05, "loss": 1.3997, "step": 63800 }, { "epoch": 10.846506884242734, "grad_norm": 15.833968162536621, "learning_rate": 1.5255821859595445e-05, "loss": 1.1503, "step": 63810 }, { "epoch": 10.8482066972633, "grad_norm": 17.124338150024414, "learning_rate": 1.5252988837894499e-05, "loss": 1.1733, "step": 63820 }, { "epoch": 10.84990651028387, "grad_norm": 11.360281944274902, "learning_rate": 1.5250155816193554e-05, "loss": 1.2434, "step": 63830 }, { "epoch": 10.851606323304436, "grad_norm": 11.722728729248047, "learning_rate": 1.5247322794492606e-05, "loss": 1.2157, "step": 63840 }, { "epoch": 10.853306136325005, "grad_norm": 19.984088897705078, "learning_rate": 1.5244489772791659e-05, "loss": 1.3767, "step": 63850 }, { "epoch": 10.855005949345571, "grad_norm": 12.211321830749512, "learning_rate": 1.5241656751090714e-05, "loss": 1.2289, "step": 63860 }, { "epoch": 10.85670576236614, "grad_norm": 11.04782772064209, "learning_rate": 1.5238823729389766e-05, "loss": 1.0938, "step": 63870 }, { "epoch": 10.858405575386708, "grad_norm": 16.57399559020996, "learning_rate": 1.5235990707688821e-05, "loss": 1.3039, "step": 63880 }, { "epoch": 10.860105388407275, "grad_norm": 12.442853927612305, "learning_rate": 1.5233157685987875e-05, "loss": 1.3297, "step": 63890 }, { "epoch": 10.861805201427844, "grad_norm": 13.6427640914917, "learning_rate": 1.523032466428693e-05, "loss": 1.2679, "step": 63900 }, { "epoch": 10.86350501444841, "grad_norm": 16.067285537719727, "learning_rate": 1.5227491642585982e-05, "loss": 1.2939, "step": 63910 }, { "epoch": 10.865204827468979, "grad_norm": 10.35709285736084, "learning_rate": 1.5224658620885035e-05, "loss": 1.1747, "step": 63920 }, { "epoch": 10.866904640489546, "grad_norm": 16.704273223876953, "learning_rate": 1.522182559918409e-05, "loss": 1.2229, "step": 63930 }, { "epoch": 10.868604453510114, "grad_norm": 14.137442588806152, "learning_rate": 1.5218992577483146e-05, "loss": 1.2939, "step": 63940 }, { "epoch": 10.870304266530681, "grad_norm": 16.228744506835938, "learning_rate": 1.5216159555782196e-05, "loss": 1.2368, "step": 63950 }, { "epoch": 10.87200407955125, "grad_norm": 10.984009742736816, "learning_rate": 1.5213326534081251e-05, "loss": 1.2385, "step": 63960 }, { "epoch": 10.873703892571816, "grad_norm": 16.924251556396484, "learning_rate": 1.5210493512380306e-05, "loss": 1.3724, "step": 63970 }, { "epoch": 10.875403705592385, "grad_norm": 16.37944984436035, "learning_rate": 1.520766049067936e-05, "loss": 1.4895, "step": 63980 }, { "epoch": 10.877103518612952, "grad_norm": 25.052734375, "learning_rate": 1.5204827468978412e-05, "loss": 1.2201, "step": 63990 }, { "epoch": 10.87880333163352, "grad_norm": 15.328725814819336, "learning_rate": 1.5201994447277467e-05, "loss": 1.4137, "step": 64000 }, { "epoch": 10.880503144654089, "grad_norm": 14.090420722961426, "learning_rate": 1.519916142557652e-05, "loss": 1.2021, "step": 64010 }, { "epoch": 10.882202957674656, "grad_norm": 19.320201873779297, "learning_rate": 1.5196328403875576e-05, "loss": 1.396, "step": 64020 }, { "epoch": 10.883902770695224, "grad_norm": 13.660804748535156, "learning_rate": 1.5193495382174628e-05, "loss": 1.1236, "step": 64030 }, { "epoch": 10.88560258371579, "grad_norm": 19.36783790588379, "learning_rate": 1.5190662360473681e-05, "loss": 1.2304, "step": 64040 }, { "epoch": 10.88730239673636, "grad_norm": 16.271106719970703, "learning_rate": 1.5187829338772736e-05, "loss": 1.1718, "step": 64050 }, { "epoch": 10.889002209756926, "grad_norm": 19.169862747192383, "learning_rate": 1.5184996317071788e-05, "loss": 1.2018, "step": 64060 }, { "epoch": 10.890702022777495, "grad_norm": 12.991097450256348, "learning_rate": 1.5182163295370842e-05, "loss": 1.0958, "step": 64070 }, { "epoch": 10.892401835798061, "grad_norm": 14.244346618652344, "learning_rate": 1.5179330273669897e-05, "loss": 1.2417, "step": 64080 }, { "epoch": 10.89410164881863, "grad_norm": 14.547636985778809, "learning_rate": 1.5176497251968952e-05, "loss": 1.3082, "step": 64090 }, { "epoch": 10.895801461839199, "grad_norm": 14.150053977966309, "learning_rate": 1.5173664230268002e-05, "loss": 1.1947, "step": 64100 }, { "epoch": 10.897501274859765, "grad_norm": 13.725189208984375, "learning_rate": 1.5170831208567057e-05, "loss": 1.3322, "step": 64110 }, { "epoch": 10.899201087880334, "grad_norm": 15.215027809143066, "learning_rate": 1.5167998186866113e-05, "loss": 1.3026, "step": 64120 }, { "epoch": 10.9009009009009, "grad_norm": 17.816200256347656, "learning_rate": 1.5165165165165166e-05, "loss": 1.1718, "step": 64130 }, { "epoch": 10.90260071392147, "grad_norm": 16.47544288635254, "learning_rate": 1.5162332143464218e-05, "loss": 1.106, "step": 64140 }, { "epoch": 10.904300526942036, "grad_norm": 12.66788387298584, "learning_rate": 1.5159499121763273e-05, "loss": 1.3319, "step": 64150 }, { "epoch": 10.906000339962604, "grad_norm": 13.80542278289795, "learning_rate": 1.5156666100062327e-05, "loss": 1.2135, "step": 64160 }, { "epoch": 10.907700152983171, "grad_norm": 16.274993896484375, "learning_rate": 1.5153833078361382e-05, "loss": 1.33, "step": 64170 }, { "epoch": 10.90939996600374, "grad_norm": 17.478307723999023, "learning_rate": 1.5151000056660434e-05, "loss": 0.9713, "step": 64180 }, { "epoch": 10.911099779024306, "grad_norm": 10.605969429016113, "learning_rate": 1.5148167034959487e-05, "loss": 1.1377, "step": 64190 }, { "epoch": 10.912799592044875, "grad_norm": 12.672956466674805, "learning_rate": 1.5145334013258543e-05, "loss": 1.2344, "step": 64200 }, { "epoch": 10.914499405065444, "grad_norm": 16.740787506103516, "learning_rate": 1.5142500991557598e-05, "loss": 1.1002, "step": 64210 }, { "epoch": 10.91619921808601, "grad_norm": 14.679986953735352, "learning_rate": 1.5139667969856648e-05, "loss": 1.1708, "step": 64220 }, { "epoch": 10.917899031106579, "grad_norm": 12.292978286743164, "learning_rate": 1.5136834948155703e-05, "loss": 1.3238, "step": 64230 }, { "epoch": 10.919598844127146, "grad_norm": 21.72861671447754, "learning_rate": 1.5134001926454758e-05, "loss": 1.2065, "step": 64240 }, { "epoch": 10.921298657147714, "grad_norm": 17.41633415222168, "learning_rate": 1.513116890475381e-05, "loss": 1.1659, "step": 64250 }, { "epoch": 10.922998470168281, "grad_norm": 14.324262619018555, "learning_rate": 1.5128335883052864e-05, "loss": 1.4499, "step": 64260 }, { "epoch": 10.92469828318885, "grad_norm": 16.688661575317383, "learning_rate": 1.5125502861351919e-05, "loss": 1.026, "step": 64270 }, { "epoch": 10.926398096209416, "grad_norm": 19.384037017822266, "learning_rate": 1.5122669839650974e-05, "loss": 1.1352, "step": 64280 }, { "epoch": 10.928097909229985, "grad_norm": 14.171420097351074, "learning_rate": 1.5119836817950024e-05, "loss": 1.2048, "step": 64290 }, { "epoch": 10.929797722250552, "grad_norm": 12.601399421691895, "learning_rate": 1.511700379624908e-05, "loss": 1.3046, "step": 64300 }, { "epoch": 10.93149753527112, "grad_norm": 13.998566627502441, "learning_rate": 1.5114170774548135e-05, "loss": 0.9532, "step": 64310 }, { "epoch": 10.933197348291689, "grad_norm": 14.071196556091309, "learning_rate": 1.5111337752847188e-05, "loss": 1.2285, "step": 64320 }, { "epoch": 10.934897161312255, "grad_norm": 17.135356903076172, "learning_rate": 1.510850473114624e-05, "loss": 1.3078, "step": 64330 }, { "epoch": 10.936596974332824, "grad_norm": 14.299782752990723, "learning_rate": 1.5105671709445295e-05, "loss": 1.0558, "step": 64340 }, { "epoch": 10.93829678735339, "grad_norm": 11.560652732849121, "learning_rate": 1.5102838687744349e-05, "loss": 1.2489, "step": 64350 }, { "epoch": 10.93999660037396, "grad_norm": 16.0532283782959, "learning_rate": 1.5100005666043404e-05, "loss": 1.0864, "step": 64360 }, { "epoch": 10.941696413394526, "grad_norm": 15.673355102539062, "learning_rate": 1.5097172644342456e-05, "loss": 1.105, "step": 64370 }, { "epoch": 10.943396226415095, "grad_norm": 14.536930084228516, "learning_rate": 1.509433962264151e-05, "loss": 1.2424, "step": 64380 }, { "epoch": 10.945096039435661, "grad_norm": 11.670296669006348, "learning_rate": 1.5091506600940565e-05, "loss": 1.2473, "step": 64390 }, { "epoch": 10.94679585245623, "grad_norm": 17.82801628112793, "learning_rate": 1.5088673579239616e-05, "loss": 1.3262, "step": 64400 }, { "epoch": 10.948495665476798, "grad_norm": 16.397485733032227, "learning_rate": 1.508584055753867e-05, "loss": 1.1988, "step": 64410 }, { "epoch": 10.950195478497365, "grad_norm": 13.253890037536621, "learning_rate": 1.5083007535837725e-05, "loss": 1.2457, "step": 64420 }, { "epoch": 10.951895291517934, "grad_norm": 15.05161190032959, "learning_rate": 1.508017451413678e-05, "loss": 1.1244, "step": 64430 }, { "epoch": 10.9535951045385, "grad_norm": 18.3514404296875, "learning_rate": 1.507734149243583e-05, "loss": 1.1529, "step": 64440 }, { "epoch": 10.955294917559069, "grad_norm": 14.653548240661621, "learning_rate": 1.5074508470734886e-05, "loss": 1.2923, "step": 64450 }, { "epoch": 10.956994730579636, "grad_norm": 13.4351167678833, "learning_rate": 1.5071675449033941e-05, "loss": 1.0322, "step": 64460 }, { "epoch": 10.958694543600204, "grad_norm": 17.280189514160156, "learning_rate": 1.5068842427332994e-05, "loss": 1.3265, "step": 64470 }, { "epoch": 10.960394356620771, "grad_norm": 9.388411521911621, "learning_rate": 1.5066009405632046e-05, "loss": 1.0872, "step": 64480 }, { "epoch": 10.96209416964134, "grad_norm": 16.797889709472656, "learning_rate": 1.5063176383931101e-05, "loss": 1.1046, "step": 64490 }, { "epoch": 10.963793982661906, "grad_norm": 14.016112327575684, "learning_rate": 1.5060343362230155e-05, "loss": 1.1946, "step": 64500 }, { "epoch": 10.965493795682475, "grad_norm": 20.36228370666504, "learning_rate": 1.505751034052921e-05, "loss": 1.3074, "step": 64510 }, { "epoch": 10.967193608703043, "grad_norm": 15.969520568847656, "learning_rate": 1.5054677318828262e-05, "loss": 1.2016, "step": 64520 }, { "epoch": 10.96889342172361, "grad_norm": 11.353555679321289, "learning_rate": 1.5051844297127316e-05, "loss": 1.083, "step": 64530 }, { "epoch": 10.970593234744179, "grad_norm": 15.546686172485352, "learning_rate": 1.504901127542637e-05, "loss": 1.1069, "step": 64540 }, { "epoch": 10.972293047764746, "grad_norm": 13.424158096313477, "learning_rate": 1.5046178253725426e-05, "loss": 1.3531, "step": 64550 }, { "epoch": 10.973992860785314, "grad_norm": 19.262603759765625, "learning_rate": 1.5043345232024476e-05, "loss": 1.3512, "step": 64560 }, { "epoch": 10.97569267380588, "grad_norm": 13.167738914489746, "learning_rate": 1.5040512210323531e-05, "loss": 1.3409, "step": 64570 }, { "epoch": 10.97739248682645, "grad_norm": 25.382678985595703, "learning_rate": 1.5037679188622587e-05, "loss": 1.3028, "step": 64580 }, { "epoch": 10.979092299847016, "grad_norm": 13.374361991882324, "learning_rate": 1.5034846166921638e-05, "loss": 1.2162, "step": 64590 }, { "epoch": 10.980792112867585, "grad_norm": 21.64017105102539, "learning_rate": 1.5032013145220692e-05, "loss": 1.2354, "step": 64600 }, { "epoch": 10.982491925888151, "grad_norm": 11.348897933959961, "learning_rate": 1.5029180123519747e-05, "loss": 1.2679, "step": 64610 }, { "epoch": 10.98419173890872, "grad_norm": 15.297874450683594, "learning_rate": 1.5026347101818802e-05, "loss": 1.3537, "step": 64620 }, { "epoch": 10.985891551929289, "grad_norm": 21.840730667114258, "learning_rate": 1.5023514080117852e-05, "loss": 0.9957, "step": 64630 }, { "epoch": 10.987591364949855, "grad_norm": 12.440322875976562, "learning_rate": 1.5020681058416908e-05, "loss": 1.3362, "step": 64640 }, { "epoch": 10.989291177970424, "grad_norm": 13.574811935424805, "learning_rate": 1.5017848036715963e-05, "loss": 1.0327, "step": 64650 }, { "epoch": 10.99099099099099, "grad_norm": 15.084627151489258, "learning_rate": 1.5015015015015016e-05, "loss": 1.1277, "step": 64660 }, { "epoch": 10.99269080401156, "grad_norm": 15.081889152526855, "learning_rate": 1.5012181993314068e-05, "loss": 1.0972, "step": 64670 }, { "epoch": 10.994390617032126, "grad_norm": 12.716523170471191, "learning_rate": 1.5009348971613123e-05, "loss": 1.2014, "step": 64680 }, { "epoch": 10.996090430052694, "grad_norm": 14.631271362304688, "learning_rate": 1.5006515949912177e-05, "loss": 1.2736, "step": 64690 }, { "epoch": 10.997790243073261, "grad_norm": 13.230523109436035, "learning_rate": 1.5003682928211232e-05, "loss": 1.2357, "step": 64700 }, { "epoch": 10.99949005609383, "grad_norm": 14.501029968261719, "learning_rate": 1.5000849906510284e-05, "loss": 1.2846, "step": 64710 }, { "epoch": 11.0, "eval_cer": 1.0, "eval_loss": 2.5167789459228516, "eval_runtime": 1958.9151, "eval_samples_per_second": 0.24, "eval_steps_per_second": 0.24, "step": 64713 }, { "epoch": 11.001189869114397, "grad_norm": 13.231935501098633, "learning_rate": 1.4998016884809338e-05, "loss": 1.0526, "step": 64720 }, { "epoch": 11.002889682134965, "grad_norm": 13.948974609375, "learning_rate": 1.4995183863108393e-05, "loss": 0.7985, "step": 64730 }, { "epoch": 11.004589495155534, "grad_norm": 16.678556442260742, "learning_rate": 1.4992350841407446e-05, "loss": 0.8852, "step": 64740 }, { "epoch": 11.0062893081761, "grad_norm": 13.291299819946289, "learning_rate": 1.4989517819706498e-05, "loss": 1.0783, "step": 64750 }, { "epoch": 11.007989121196669, "grad_norm": 17.265914916992188, "learning_rate": 1.4986684798005553e-05, "loss": 1.0682, "step": 64760 }, { "epoch": 11.009688934217236, "grad_norm": 13.056120872497559, "learning_rate": 1.4983851776304607e-05, "loss": 0.9419, "step": 64770 }, { "epoch": 11.011388747237804, "grad_norm": 18.51735496520996, "learning_rate": 1.498101875460366e-05, "loss": 1.1354, "step": 64780 }, { "epoch": 11.013088560258371, "grad_norm": 17.151260375976562, "learning_rate": 1.4978185732902714e-05, "loss": 1.1143, "step": 64790 }, { "epoch": 11.01478837327894, "grad_norm": 10.611526489257812, "learning_rate": 1.4975352711201769e-05, "loss": 1.3681, "step": 64800 }, { "epoch": 11.016488186299506, "grad_norm": 13.483161926269531, "learning_rate": 1.4972519689500821e-05, "loss": 1.2152, "step": 64810 }, { "epoch": 11.018187999320075, "grad_norm": 17.989036560058594, "learning_rate": 1.4969686667799876e-05, "loss": 1.1033, "step": 64820 }, { "epoch": 11.019887812340643, "grad_norm": 16.03192710876465, "learning_rate": 1.496685364609893e-05, "loss": 1.041, "step": 64830 }, { "epoch": 11.02158762536121, "grad_norm": 11.834376335144043, "learning_rate": 1.4964020624397983e-05, "loss": 1.2079, "step": 64840 }, { "epoch": 11.023287438381779, "grad_norm": 13.06043529510498, "learning_rate": 1.4961187602697037e-05, "loss": 1.2271, "step": 64850 }, { "epoch": 11.024987251402345, "grad_norm": 17.13562774658203, "learning_rate": 1.4958354580996092e-05, "loss": 1.1881, "step": 64860 }, { "epoch": 11.026687064422914, "grad_norm": 13.073183059692383, "learning_rate": 1.4955521559295144e-05, "loss": 0.8566, "step": 64870 }, { "epoch": 11.02838687744348, "grad_norm": 18.08119773864746, "learning_rate": 1.4952688537594199e-05, "loss": 1.1522, "step": 64880 }, { "epoch": 11.03008669046405, "grad_norm": 13.572280883789062, "learning_rate": 1.4949855515893252e-05, "loss": 0.9214, "step": 64890 }, { "epoch": 11.031786503484616, "grad_norm": 11.757237434387207, "learning_rate": 1.4947022494192306e-05, "loss": 0.9925, "step": 64900 }, { "epoch": 11.033486316505185, "grad_norm": 16.352588653564453, "learning_rate": 1.494418947249136e-05, "loss": 0.9097, "step": 64910 }, { "epoch": 11.035186129525751, "grad_norm": 22.264812469482422, "learning_rate": 1.4941356450790413e-05, "loss": 1.0594, "step": 64920 }, { "epoch": 11.03688594254632, "grad_norm": 24.075469970703125, "learning_rate": 1.4938523429089467e-05, "loss": 1.0714, "step": 64930 }, { "epoch": 11.038585755566888, "grad_norm": 15.1624116897583, "learning_rate": 1.493569040738852e-05, "loss": 1.1025, "step": 64940 }, { "epoch": 11.040285568587455, "grad_norm": 17.94485855102539, "learning_rate": 1.4932857385687575e-05, "loss": 1.1314, "step": 64950 }, { "epoch": 11.041985381608024, "grad_norm": 15.72050666809082, "learning_rate": 1.4930024363986627e-05, "loss": 0.9825, "step": 64960 }, { "epoch": 11.04368519462859, "grad_norm": 12.59854507446289, "learning_rate": 1.4927191342285682e-05, "loss": 1.1387, "step": 64970 }, { "epoch": 11.045385007649159, "grad_norm": 21.920732498168945, "learning_rate": 1.4924358320584736e-05, "loss": 1.3185, "step": 64980 }, { "epoch": 11.047084820669726, "grad_norm": 13.437482833862305, "learning_rate": 1.4921525298883791e-05, "loss": 1.1743, "step": 64990 }, { "epoch": 11.048784633690294, "grad_norm": 21.389062881469727, "learning_rate": 1.4918692277182843e-05, "loss": 0.9768, "step": 65000 }, { "epoch": 11.050484446710861, "grad_norm": 16.09063720703125, "learning_rate": 1.4915859255481898e-05, "loss": 1.0018, "step": 65010 }, { "epoch": 11.05218425973143, "grad_norm": 11.884265899658203, "learning_rate": 1.4913026233780952e-05, "loss": 0.9182, "step": 65020 }, { "epoch": 11.053884072751996, "grad_norm": 18.275209426879883, "learning_rate": 1.4910193212080005e-05, "loss": 1.2718, "step": 65030 }, { "epoch": 11.055583885772565, "grad_norm": 17.615198135375977, "learning_rate": 1.4907360190379059e-05, "loss": 1.0049, "step": 65040 }, { "epoch": 11.057283698793134, "grad_norm": 13.943708419799805, "learning_rate": 1.4904527168678114e-05, "loss": 1.0278, "step": 65050 }, { "epoch": 11.0589835118137, "grad_norm": 14.56594181060791, "learning_rate": 1.4901694146977166e-05, "loss": 1.1622, "step": 65060 }, { "epoch": 11.060683324834269, "grad_norm": 16.69588851928711, "learning_rate": 1.4898861125276221e-05, "loss": 1.213, "step": 65070 }, { "epoch": 11.062383137854836, "grad_norm": 12.589947700500488, "learning_rate": 1.4896028103575274e-05, "loss": 0.8986, "step": 65080 }, { "epoch": 11.064082950875404, "grad_norm": 15.81735610961914, "learning_rate": 1.4893195081874326e-05, "loss": 0.9806, "step": 65090 }, { "epoch": 11.065782763895971, "grad_norm": 13.392987251281738, "learning_rate": 1.4890362060173381e-05, "loss": 1.285, "step": 65100 }, { "epoch": 11.06748257691654, "grad_norm": 21.633594512939453, "learning_rate": 1.4887529038472435e-05, "loss": 0.9826, "step": 65110 }, { "epoch": 11.069182389937106, "grad_norm": 10.9719820022583, "learning_rate": 1.4884696016771489e-05, "loss": 1.2, "step": 65120 }, { "epoch": 11.070882202957675, "grad_norm": 15.562994003295898, "learning_rate": 1.4881862995070542e-05, "loss": 1.0271, "step": 65130 }, { "epoch": 11.072582015978242, "grad_norm": 63.95988082885742, "learning_rate": 1.4879029973369597e-05, "loss": 0.979, "step": 65140 }, { "epoch": 11.07428182899881, "grad_norm": 14.287161827087402, "learning_rate": 1.4876196951668649e-05, "loss": 1.1143, "step": 65150 }, { "epoch": 11.075981642019379, "grad_norm": 15.64543628692627, "learning_rate": 1.4873363929967704e-05, "loss": 1.0303, "step": 65160 }, { "epoch": 11.077681455039945, "grad_norm": 18.314189910888672, "learning_rate": 1.4870530908266758e-05, "loss": 1.0212, "step": 65170 }, { "epoch": 11.079381268060514, "grad_norm": 14.53089427947998, "learning_rate": 1.4867697886565811e-05, "loss": 0.9828, "step": 65180 }, { "epoch": 11.08108108108108, "grad_norm": 15.612746238708496, "learning_rate": 1.4864864864864865e-05, "loss": 1.0284, "step": 65190 }, { "epoch": 11.08278089410165, "grad_norm": 13.615309715270996, "learning_rate": 1.486203184316392e-05, "loss": 1.0998, "step": 65200 }, { "epoch": 11.084480707122216, "grad_norm": 14.601909637451172, "learning_rate": 1.4859198821462972e-05, "loss": 0.9729, "step": 65210 }, { "epoch": 11.086180520142785, "grad_norm": 14.335680961608887, "learning_rate": 1.4856365799762027e-05, "loss": 1.0085, "step": 65220 }, { "epoch": 11.087880333163351, "grad_norm": 17.019622802734375, "learning_rate": 1.485353277806108e-05, "loss": 0.8776, "step": 65230 }, { "epoch": 11.08958014618392, "grad_norm": 13.026845932006836, "learning_rate": 1.4850699756360134e-05, "loss": 1.0958, "step": 65240 }, { "epoch": 11.091279959204488, "grad_norm": 17.701597213745117, "learning_rate": 1.4847866734659188e-05, "loss": 0.9457, "step": 65250 }, { "epoch": 11.092979772225055, "grad_norm": 13.416596412658691, "learning_rate": 1.4845033712958241e-05, "loss": 1.2693, "step": 65260 }, { "epoch": 11.094679585245624, "grad_norm": 13.468774795532227, "learning_rate": 1.4842200691257295e-05, "loss": 1.0802, "step": 65270 }, { "epoch": 11.09637939826619, "grad_norm": 10.823847770690918, "learning_rate": 1.4839367669556348e-05, "loss": 1.047, "step": 65280 }, { "epoch": 11.098079211286759, "grad_norm": 15.4097261428833, "learning_rate": 1.4836534647855403e-05, "loss": 1.097, "step": 65290 }, { "epoch": 11.099779024307326, "grad_norm": 11.163959503173828, "learning_rate": 1.4833701626154455e-05, "loss": 1.0942, "step": 65300 }, { "epoch": 11.101478837327894, "grad_norm": 11.356634140014648, "learning_rate": 1.483086860445351e-05, "loss": 1.4076, "step": 65310 }, { "epoch": 11.103178650348461, "grad_norm": 14.570417404174805, "learning_rate": 1.4828035582752564e-05, "loss": 1.2354, "step": 65320 }, { "epoch": 11.10487846336903, "grad_norm": 13.511971473693848, "learning_rate": 1.4825202561051618e-05, "loss": 1.0827, "step": 65330 }, { "epoch": 11.106578276389596, "grad_norm": 16.471349716186523, "learning_rate": 1.4822369539350671e-05, "loss": 1.0967, "step": 65340 }, { "epoch": 11.108278089410165, "grad_norm": 13.488531112670898, "learning_rate": 1.4819536517649726e-05, "loss": 1.1366, "step": 65350 }, { "epoch": 11.109977902430733, "grad_norm": 16.533706665039062, "learning_rate": 1.481670349594878e-05, "loss": 1.0877, "step": 65360 }, { "epoch": 11.1116777154513, "grad_norm": 16.883970260620117, "learning_rate": 1.4813870474247833e-05, "loss": 1.2281, "step": 65370 }, { "epoch": 11.113377528471869, "grad_norm": 16.491104125976562, "learning_rate": 1.4811037452546887e-05, "loss": 1.1687, "step": 65380 }, { "epoch": 11.115077341492436, "grad_norm": 16.716304779052734, "learning_rate": 1.4808204430845942e-05, "loss": 1.1046, "step": 65390 }, { "epoch": 11.116777154513004, "grad_norm": 40.542869567871094, "learning_rate": 1.4805371409144994e-05, "loss": 1.1178, "step": 65400 }, { "epoch": 11.11847696753357, "grad_norm": 19.78620719909668, "learning_rate": 1.4802538387444049e-05, "loss": 1.1252, "step": 65410 }, { "epoch": 11.12017678055414, "grad_norm": 13.470500946044922, "learning_rate": 1.4799705365743103e-05, "loss": 1.0833, "step": 65420 }, { "epoch": 11.121876593574706, "grad_norm": 47.536651611328125, "learning_rate": 1.4796872344042154e-05, "loss": 1.0707, "step": 65430 }, { "epoch": 11.123576406595275, "grad_norm": 14.716536521911621, "learning_rate": 1.479403932234121e-05, "loss": 1.1305, "step": 65440 }, { "epoch": 11.125276219615841, "grad_norm": 12.459942817687988, "learning_rate": 1.4791206300640263e-05, "loss": 1.1834, "step": 65450 }, { "epoch": 11.12697603263641, "grad_norm": 14.4895601272583, "learning_rate": 1.4788373278939317e-05, "loss": 1.1963, "step": 65460 }, { "epoch": 11.128675845656979, "grad_norm": 14.855887413024902, "learning_rate": 1.478554025723837e-05, "loss": 0.8985, "step": 65470 }, { "epoch": 11.130375658677545, "grad_norm": 16.185152053833008, "learning_rate": 1.4782707235537425e-05, "loss": 1.0794, "step": 65480 }, { "epoch": 11.132075471698114, "grad_norm": 18.561931610107422, "learning_rate": 1.4779874213836477e-05, "loss": 1.2682, "step": 65490 }, { "epoch": 11.13377528471868, "grad_norm": 17.10219383239746, "learning_rate": 1.4777041192135533e-05, "loss": 1.0225, "step": 65500 }, { "epoch": 11.13547509773925, "grad_norm": 22.30681800842285, "learning_rate": 1.4774208170434586e-05, "loss": 0.9644, "step": 65510 }, { "epoch": 11.137174910759816, "grad_norm": 12.500802040100098, "learning_rate": 1.477137514873364e-05, "loss": 0.9918, "step": 65520 }, { "epoch": 11.138874723780384, "grad_norm": 11.821484565734863, "learning_rate": 1.4768542127032693e-05, "loss": 1.0375, "step": 65530 }, { "epoch": 11.140574536800951, "grad_norm": 12.167767524719238, "learning_rate": 1.4765709105331748e-05, "loss": 1.205, "step": 65540 }, { "epoch": 11.14227434982152, "grad_norm": 17.06200408935547, "learning_rate": 1.47628760836308e-05, "loss": 1.2551, "step": 65550 }, { "epoch": 11.143974162842087, "grad_norm": 14.879817008972168, "learning_rate": 1.4760043061929855e-05, "loss": 1.4732, "step": 65560 }, { "epoch": 11.145673975862655, "grad_norm": 14.070919036865234, "learning_rate": 1.4757210040228909e-05, "loss": 1.0136, "step": 65570 }, { "epoch": 11.147373788883224, "grad_norm": 16.293588638305664, "learning_rate": 1.4754377018527962e-05, "loss": 1.1885, "step": 65580 }, { "epoch": 11.14907360190379, "grad_norm": 12.6941499710083, "learning_rate": 1.4751543996827016e-05, "loss": 1.2057, "step": 65590 }, { "epoch": 11.150773414924359, "grad_norm": 14.91800594329834, "learning_rate": 1.474871097512607e-05, "loss": 1.1289, "step": 65600 }, { "epoch": 11.152473227944926, "grad_norm": 17.090682983398438, "learning_rate": 1.4745877953425123e-05, "loss": 1.0756, "step": 65610 }, { "epoch": 11.154173040965494, "grad_norm": 21.431182861328125, "learning_rate": 1.4743044931724176e-05, "loss": 1.2304, "step": 65620 }, { "epoch": 11.155872853986061, "grad_norm": 12.503521919250488, "learning_rate": 1.4740211910023232e-05, "loss": 1.1866, "step": 65630 }, { "epoch": 11.15757266700663, "grad_norm": 13.719529151916504, "learning_rate": 1.4737378888322284e-05, "loss": 1.0634, "step": 65640 }, { "epoch": 11.159272480027196, "grad_norm": 16.47857666015625, "learning_rate": 1.4734545866621339e-05, "loss": 1.1349, "step": 65650 }, { "epoch": 11.160972293047765, "grad_norm": 14.20975112915039, "learning_rate": 1.4731712844920392e-05, "loss": 1.2648, "step": 65660 }, { "epoch": 11.162672106068332, "grad_norm": 12.780620574951172, "learning_rate": 1.4728879823219446e-05, "loss": 1.0708, "step": 65670 }, { "epoch": 11.1643719190889, "grad_norm": 12.292299270629883, "learning_rate": 1.47260468015185e-05, "loss": 0.9589, "step": 65680 }, { "epoch": 11.166071732109469, "grad_norm": 12.022910118103027, "learning_rate": 1.4723213779817555e-05, "loss": 1.1014, "step": 65690 }, { "epoch": 11.167771545130035, "grad_norm": 14.882856369018555, "learning_rate": 1.4720380758116608e-05, "loss": 1.1994, "step": 65700 }, { "epoch": 11.169471358150604, "grad_norm": 14.468420028686523, "learning_rate": 1.4717547736415662e-05, "loss": 1.3166, "step": 65710 }, { "epoch": 11.17117117117117, "grad_norm": 10.829750061035156, "learning_rate": 1.4714714714714715e-05, "loss": 1.1329, "step": 65720 }, { "epoch": 11.17287098419174, "grad_norm": 21.88589096069336, "learning_rate": 1.471188169301377e-05, "loss": 1.0915, "step": 65730 }, { "epoch": 11.174570797212306, "grad_norm": 17.792024612426758, "learning_rate": 1.4709048671312822e-05, "loss": 1.2322, "step": 65740 }, { "epoch": 11.176270610232875, "grad_norm": 14.676535606384277, "learning_rate": 1.4706215649611877e-05, "loss": 1.0386, "step": 65750 }, { "epoch": 11.177970423253441, "grad_norm": 16.827869415283203, "learning_rate": 1.4703382627910931e-05, "loss": 0.9747, "step": 65760 }, { "epoch": 11.17967023627401, "grad_norm": 11.265304565429688, "learning_rate": 1.4700549606209983e-05, "loss": 1.1158, "step": 65770 }, { "epoch": 11.181370049294578, "grad_norm": 13.24641227722168, "learning_rate": 1.4697716584509038e-05, "loss": 0.9951, "step": 65780 }, { "epoch": 11.183069862315145, "grad_norm": 13.87015151977539, "learning_rate": 1.4694883562808091e-05, "loss": 1.1982, "step": 65790 }, { "epoch": 11.184769675335714, "grad_norm": 18.544958114624023, "learning_rate": 1.4692050541107145e-05, "loss": 1.1557, "step": 65800 }, { "epoch": 11.18646948835628, "grad_norm": 12.747547149658203, "learning_rate": 1.4689217519406198e-05, "loss": 1.2138, "step": 65810 }, { "epoch": 11.188169301376849, "grad_norm": 12.609434127807617, "learning_rate": 1.4686384497705254e-05, "loss": 1.1444, "step": 65820 }, { "epoch": 11.189869114397416, "grad_norm": 14.125272750854492, "learning_rate": 1.4683551476004306e-05, "loss": 0.9342, "step": 65830 }, { "epoch": 11.191568927417984, "grad_norm": 14.2117280960083, "learning_rate": 1.468071845430336e-05, "loss": 1.0165, "step": 65840 }, { "epoch": 11.193268740438551, "grad_norm": 14.004487991333008, "learning_rate": 1.4677885432602414e-05, "loss": 1.1764, "step": 65850 }, { "epoch": 11.19496855345912, "grad_norm": 11.10302448272705, "learning_rate": 1.4675052410901468e-05, "loss": 1.0484, "step": 65860 }, { "epoch": 11.196668366479686, "grad_norm": 14.560961723327637, "learning_rate": 1.4672219389200521e-05, "loss": 1.1952, "step": 65870 }, { "epoch": 11.198368179500255, "grad_norm": 12.944113731384277, "learning_rate": 1.4669386367499577e-05, "loss": 1.0777, "step": 65880 }, { "epoch": 11.200067992520824, "grad_norm": 15.625896453857422, "learning_rate": 1.4666553345798628e-05, "loss": 1.1719, "step": 65890 }, { "epoch": 11.20176780554139, "grad_norm": 12.85148811340332, "learning_rate": 1.4663720324097684e-05, "loss": 1.0413, "step": 65900 }, { "epoch": 11.203467618561959, "grad_norm": 15.35339641571045, "learning_rate": 1.4660887302396737e-05, "loss": 1.1857, "step": 65910 }, { "epoch": 11.205167431582526, "grad_norm": 20.192962646484375, "learning_rate": 1.465805428069579e-05, "loss": 1.287, "step": 65920 }, { "epoch": 11.206867244603094, "grad_norm": 11.771049499511719, "learning_rate": 1.4655221258994844e-05, "loss": 1.4116, "step": 65930 }, { "epoch": 11.208567057623661, "grad_norm": 17.3460636138916, "learning_rate": 1.46523882372939e-05, "loss": 1.1595, "step": 65940 }, { "epoch": 11.21026687064423, "grad_norm": 16.08847427368164, "learning_rate": 1.4649555215592951e-05, "loss": 1.2966, "step": 65950 }, { "epoch": 11.211966683664796, "grad_norm": 15.786919593811035, "learning_rate": 1.4646722193892005e-05, "loss": 0.9955, "step": 65960 }, { "epoch": 11.213666496685365, "grad_norm": 23.106582641601562, "learning_rate": 1.464388917219106e-05, "loss": 0.8004, "step": 65970 }, { "epoch": 11.215366309705932, "grad_norm": 17.170852661132812, "learning_rate": 1.4641056150490112e-05, "loss": 0.9632, "step": 65980 }, { "epoch": 11.2170661227265, "grad_norm": 20.19025421142578, "learning_rate": 1.4638223128789167e-05, "loss": 1.0677, "step": 65990 }, { "epoch": 11.218765935747069, "grad_norm": 12.17910099029541, "learning_rate": 1.463539010708822e-05, "loss": 1.1241, "step": 66000 }, { "epoch": 11.220465748767635, "grad_norm": 20.363529205322266, "learning_rate": 1.4632557085387274e-05, "loss": 1.0251, "step": 66010 }, { "epoch": 11.222165561788204, "grad_norm": 13.7134370803833, "learning_rate": 1.4629724063686328e-05, "loss": 0.9945, "step": 66020 }, { "epoch": 11.22386537480877, "grad_norm": 16.27011489868164, "learning_rate": 1.4626891041985383e-05, "loss": 1.1572, "step": 66030 }, { "epoch": 11.22556518782934, "grad_norm": 12.69196891784668, "learning_rate": 1.4624058020284435e-05, "loss": 0.9517, "step": 66040 }, { "epoch": 11.227265000849906, "grad_norm": 11.569623947143555, "learning_rate": 1.462122499858349e-05, "loss": 1.1562, "step": 66050 }, { "epoch": 11.228964813870475, "grad_norm": 10.435831069946289, "learning_rate": 1.4618391976882543e-05, "loss": 1.4216, "step": 66060 }, { "epoch": 11.230664626891041, "grad_norm": 27.740633010864258, "learning_rate": 1.4615558955181599e-05, "loss": 1.2249, "step": 66070 }, { "epoch": 11.23236443991161, "grad_norm": 13.000011444091797, "learning_rate": 1.461272593348065e-05, "loss": 1.1226, "step": 66080 }, { "epoch": 11.234064252932177, "grad_norm": 17.5559139251709, "learning_rate": 1.4609892911779706e-05, "loss": 1.0073, "step": 66090 }, { "epoch": 11.235764065952745, "grad_norm": 15.76318073272705, "learning_rate": 1.4607059890078759e-05, "loss": 1.0818, "step": 66100 }, { "epoch": 11.237463878973314, "grad_norm": 17.593271255493164, "learning_rate": 1.4604226868377813e-05, "loss": 1.1037, "step": 66110 }, { "epoch": 11.23916369199388, "grad_norm": 14.760811805725098, "learning_rate": 1.4601393846676866e-05, "loss": 1.1813, "step": 66120 }, { "epoch": 11.240863505014449, "grad_norm": 13.611536026000977, "learning_rate": 1.459856082497592e-05, "loss": 1.1863, "step": 66130 }, { "epoch": 11.242563318035016, "grad_norm": 26.888477325439453, "learning_rate": 1.4595727803274973e-05, "loss": 1.0927, "step": 66140 }, { "epoch": 11.244263131055584, "grad_norm": 15.791674613952637, "learning_rate": 1.4592894781574027e-05, "loss": 0.9604, "step": 66150 }, { "epoch": 11.245962944076151, "grad_norm": 14.792728424072266, "learning_rate": 1.4590061759873082e-05, "loss": 1.1874, "step": 66160 }, { "epoch": 11.24766275709672, "grad_norm": 15.165592193603516, "learning_rate": 1.4587228738172134e-05, "loss": 1.1838, "step": 66170 }, { "epoch": 11.249362570117286, "grad_norm": 11.904696464538574, "learning_rate": 1.4584395716471189e-05, "loss": 1.0619, "step": 66180 }, { "epoch": 11.251062383137855, "grad_norm": 13.295988082885742, "learning_rate": 1.4581562694770242e-05, "loss": 1.1226, "step": 66190 }, { "epoch": 11.252762196158422, "grad_norm": 19.04061508178711, "learning_rate": 1.4578729673069296e-05, "loss": 1.0056, "step": 66200 }, { "epoch": 11.25446200917899, "grad_norm": 16.08214569091797, "learning_rate": 1.457589665136835e-05, "loss": 0.9942, "step": 66210 }, { "epoch": 11.256161822199559, "grad_norm": 10.698518753051758, "learning_rate": 1.4573063629667405e-05, "loss": 1.1815, "step": 66220 }, { "epoch": 11.257861635220126, "grad_norm": 13.743361473083496, "learning_rate": 1.4570230607966457e-05, "loss": 1.1474, "step": 66230 }, { "epoch": 11.259561448240694, "grad_norm": 22.371305465698242, "learning_rate": 1.4567397586265512e-05, "loss": 1.0864, "step": 66240 }, { "epoch": 11.26126126126126, "grad_norm": 12.940874099731445, "learning_rate": 1.4564564564564565e-05, "loss": 1.2449, "step": 66250 }, { "epoch": 11.26296107428183, "grad_norm": 17.362552642822266, "learning_rate": 1.4561731542863619e-05, "loss": 1.0581, "step": 66260 }, { "epoch": 11.264660887302396, "grad_norm": 19.503437042236328, "learning_rate": 1.4558898521162672e-05, "loss": 1.1253, "step": 66270 }, { "epoch": 11.266360700322965, "grad_norm": 12.45335865020752, "learning_rate": 1.4556065499461728e-05, "loss": 1.3578, "step": 66280 }, { "epoch": 11.268060513343531, "grad_norm": 15.455267906188965, "learning_rate": 1.455323247776078e-05, "loss": 1.0618, "step": 66290 }, { "epoch": 11.2697603263641, "grad_norm": 18.169389724731445, "learning_rate": 1.4550399456059833e-05, "loss": 1.0915, "step": 66300 }, { "epoch": 11.271460139384669, "grad_norm": 12.760442733764648, "learning_rate": 1.4547566434358888e-05, "loss": 1.0753, "step": 66310 }, { "epoch": 11.273159952405235, "grad_norm": 16.092086791992188, "learning_rate": 1.454473341265794e-05, "loss": 1.0149, "step": 66320 }, { "epoch": 11.274859765425804, "grad_norm": 16.406227111816406, "learning_rate": 1.4541900390956995e-05, "loss": 1.1515, "step": 66330 }, { "epoch": 11.27655957844637, "grad_norm": 18.50731658935547, "learning_rate": 1.4539067369256049e-05, "loss": 1.0475, "step": 66340 }, { "epoch": 11.27825939146694, "grad_norm": 15.373685836791992, "learning_rate": 1.4536234347555102e-05, "loss": 1.1885, "step": 66350 }, { "epoch": 11.279959204487506, "grad_norm": 16.647367477416992, "learning_rate": 1.4533401325854156e-05, "loss": 1.1078, "step": 66360 }, { "epoch": 11.281659017508074, "grad_norm": 11.319483757019043, "learning_rate": 1.4530568304153211e-05, "loss": 0.9324, "step": 66370 }, { "epoch": 11.283358830528641, "grad_norm": 14.580302238464355, "learning_rate": 1.4527735282452263e-05, "loss": 1.1278, "step": 66380 }, { "epoch": 11.28505864354921, "grad_norm": 58.96070861816406, "learning_rate": 1.4524902260751318e-05, "loss": 1.0879, "step": 66390 }, { "epoch": 11.286758456569776, "grad_norm": 14.79178524017334, "learning_rate": 1.4522069239050372e-05, "loss": 0.9691, "step": 66400 }, { "epoch": 11.288458269590345, "grad_norm": 23.416271209716797, "learning_rate": 1.4519236217349425e-05, "loss": 1.1418, "step": 66410 }, { "epoch": 11.290158082610914, "grad_norm": 17.222326278686523, "learning_rate": 1.4516403195648479e-05, "loss": 1.2713, "step": 66420 }, { "epoch": 11.29185789563148, "grad_norm": 67.56549072265625, "learning_rate": 1.4513570173947534e-05, "loss": 1.4048, "step": 66430 }, { "epoch": 11.293557708652049, "grad_norm": 20.1592960357666, "learning_rate": 1.4510737152246587e-05, "loss": 1.3743, "step": 66440 }, { "epoch": 11.295257521672616, "grad_norm": 15.723198890686035, "learning_rate": 1.450790413054564e-05, "loss": 1.2697, "step": 66450 }, { "epoch": 11.296957334693184, "grad_norm": 13.576674461364746, "learning_rate": 1.4505071108844694e-05, "loss": 1.1752, "step": 66460 }, { "epoch": 11.298657147713751, "grad_norm": 13.352335929870605, "learning_rate": 1.4502238087143748e-05, "loss": 1.0641, "step": 66470 }, { "epoch": 11.30035696073432, "grad_norm": 20.40577507019043, "learning_rate": 1.4499405065442801e-05, "loss": 1.1376, "step": 66480 }, { "epoch": 11.302056773754886, "grad_norm": 11.352638244628906, "learning_rate": 1.4496572043741855e-05, "loss": 1.2364, "step": 66490 }, { "epoch": 11.303756586775455, "grad_norm": 11.663904190063477, "learning_rate": 1.449373902204091e-05, "loss": 1.1818, "step": 66500 }, { "epoch": 11.305456399796022, "grad_norm": 13.33594799041748, "learning_rate": 1.4490906000339962e-05, "loss": 0.9622, "step": 66510 }, { "epoch": 11.30715621281659, "grad_norm": 14.790743827819824, "learning_rate": 1.4488072978639017e-05, "loss": 1.0146, "step": 66520 }, { "epoch": 11.308856025837159, "grad_norm": 14.227194786071777, "learning_rate": 1.448523995693807e-05, "loss": 0.9874, "step": 66530 }, { "epoch": 11.310555838857725, "grad_norm": 14.81507396697998, "learning_rate": 1.4482406935237124e-05, "loss": 1.1569, "step": 66540 }, { "epoch": 11.312255651878294, "grad_norm": 14.147908210754395, "learning_rate": 1.4479573913536178e-05, "loss": 1.0826, "step": 66550 }, { "epoch": 11.31395546489886, "grad_norm": 15.0164794921875, "learning_rate": 1.4476740891835233e-05, "loss": 1.1745, "step": 66560 }, { "epoch": 11.31565527791943, "grad_norm": 13.079915046691895, "learning_rate": 1.4473907870134285e-05, "loss": 1.1032, "step": 66570 }, { "epoch": 11.317355090939996, "grad_norm": 15.86263370513916, "learning_rate": 1.447107484843334e-05, "loss": 1.1095, "step": 66580 }, { "epoch": 11.319054903960565, "grad_norm": 23.638965606689453, "learning_rate": 1.4468241826732394e-05, "loss": 1.1129, "step": 66590 }, { "epoch": 11.320754716981131, "grad_norm": 19.610271453857422, "learning_rate": 1.4465408805031447e-05, "loss": 0.9578, "step": 66600 }, { "epoch": 11.3224545300017, "grad_norm": 13.02903938293457, "learning_rate": 1.44625757833305e-05, "loss": 1.2864, "step": 66610 }, { "epoch": 11.324154343022268, "grad_norm": 14.944393157958984, "learning_rate": 1.4459742761629556e-05, "loss": 0.997, "step": 66620 }, { "epoch": 11.325854156042835, "grad_norm": 35.26960372924805, "learning_rate": 1.4456909739928608e-05, "loss": 1.0243, "step": 66630 }, { "epoch": 11.327553969063404, "grad_norm": 26.636777877807617, "learning_rate": 1.4454076718227661e-05, "loss": 1.2895, "step": 66640 }, { "epoch": 11.32925378208397, "grad_norm": 19.294410705566406, "learning_rate": 1.4451243696526716e-05, "loss": 1.0997, "step": 66650 }, { "epoch": 11.330953595104539, "grad_norm": 16.258371353149414, "learning_rate": 1.4448410674825768e-05, "loss": 1.1791, "step": 66660 }, { "epoch": 11.332653408125106, "grad_norm": 15.077066421508789, "learning_rate": 1.4445577653124823e-05, "loss": 1.0508, "step": 66670 }, { "epoch": 11.334353221145674, "grad_norm": 15.028902053833008, "learning_rate": 1.4442744631423877e-05, "loss": 1.135, "step": 66680 }, { "epoch": 11.336053034166241, "grad_norm": 17.26889991760254, "learning_rate": 1.443991160972293e-05, "loss": 1.1093, "step": 66690 }, { "epoch": 11.33775284718681, "grad_norm": 19.9445743560791, "learning_rate": 1.4437078588021984e-05, "loss": 1.16, "step": 66700 }, { "epoch": 11.339452660207376, "grad_norm": 15.87692642211914, "learning_rate": 1.443424556632104e-05, "loss": 0.9479, "step": 66710 }, { "epoch": 11.341152473227945, "grad_norm": 18.67932891845703, "learning_rate": 1.4431412544620091e-05, "loss": 0.9592, "step": 66720 }, { "epoch": 11.342852286248513, "grad_norm": 15.239215850830078, "learning_rate": 1.4428579522919146e-05, "loss": 1.1491, "step": 66730 }, { "epoch": 11.34455209926908, "grad_norm": 15.584325790405273, "learning_rate": 1.44257465012182e-05, "loss": 1.123, "step": 66740 }, { "epoch": 11.346251912289649, "grad_norm": 34.99394607543945, "learning_rate": 1.4422913479517253e-05, "loss": 1.0235, "step": 66750 }, { "epoch": 11.347951725310216, "grad_norm": 17.473772048950195, "learning_rate": 1.4420080457816307e-05, "loss": 1.1959, "step": 66760 }, { "epoch": 11.349651538330784, "grad_norm": 20.959455490112305, "learning_rate": 1.4417247436115362e-05, "loss": 1.1333, "step": 66770 }, { "epoch": 11.35135135135135, "grad_norm": 16.773893356323242, "learning_rate": 1.4414414414414414e-05, "loss": 0.9289, "step": 66780 }, { "epoch": 11.35305116437192, "grad_norm": 14.148619651794434, "learning_rate": 1.4411581392713469e-05, "loss": 1.0774, "step": 66790 }, { "epoch": 11.354750977392486, "grad_norm": 18.45094108581543, "learning_rate": 1.4408748371012523e-05, "loss": 1.3062, "step": 66800 }, { "epoch": 11.356450790413055, "grad_norm": 12.417492866516113, "learning_rate": 1.4405915349311576e-05, "loss": 1.0721, "step": 66810 }, { "epoch": 11.358150603433621, "grad_norm": 16.89645767211914, "learning_rate": 1.440308232761063e-05, "loss": 1.0811, "step": 66820 }, { "epoch": 11.35985041645419, "grad_norm": 13.684344291687012, "learning_rate": 1.4400249305909683e-05, "loss": 1.2592, "step": 66830 }, { "epoch": 11.361550229474759, "grad_norm": 13.441336631774902, "learning_rate": 1.4397416284208738e-05, "loss": 0.8464, "step": 66840 }, { "epoch": 11.363250042495325, "grad_norm": 11.261781692504883, "learning_rate": 1.439458326250779e-05, "loss": 1.2062, "step": 66850 }, { "epoch": 11.364949855515894, "grad_norm": 14.843968391418457, "learning_rate": 1.4391750240806845e-05, "loss": 1.0454, "step": 66860 }, { "epoch": 11.36664966853646, "grad_norm": 15.39452075958252, "learning_rate": 1.4388917219105899e-05, "loss": 1.1628, "step": 66870 }, { "epoch": 11.36834948155703, "grad_norm": 14.649762153625488, "learning_rate": 1.4386084197404952e-05, "loss": 0.8784, "step": 66880 }, { "epoch": 11.370049294577596, "grad_norm": 11.577861785888672, "learning_rate": 1.4383251175704006e-05, "loss": 0.9762, "step": 66890 }, { "epoch": 11.371749107598164, "grad_norm": 18.051395416259766, "learning_rate": 1.4380418154003061e-05, "loss": 1.0825, "step": 66900 }, { "epoch": 11.373448920618731, "grad_norm": 20.384994506835938, "learning_rate": 1.4377585132302113e-05, "loss": 1.158, "step": 66910 }, { "epoch": 11.3751487336393, "grad_norm": 12.771352767944336, "learning_rate": 1.4374752110601168e-05, "loss": 1.0885, "step": 66920 }, { "epoch": 11.376848546659867, "grad_norm": 14.190561294555664, "learning_rate": 1.4371919088900222e-05, "loss": 0.9219, "step": 66930 }, { "epoch": 11.378548359680435, "grad_norm": 18.88695526123047, "learning_rate": 1.4369086067199275e-05, "loss": 1.0923, "step": 66940 }, { "epoch": 11.380248172701004, "grad_norm": 17.180410385131836, "learning_rate": 1.4366253045498329e-05, "loss": 1.38, "step": 66950 }, { "epoch": 11.38194798572157, "grad_norm": 12.359585762023926, "learning_rate": 1.4363420023797384e-05, "loss": 1.326, "step": 66960 }, { "epoch": 11.383647798742139, "grad_norm": 14.124945640563965, "learning_rate": 1.4360587002096436e-05, "loss": 1.1247, "step": 66970 }, { "epoch": 11.385347611762706, "grad_norm": 12.694958686828613, "learning_rate": 1.435775398039549e-05, "loss": 1.0476, "step": 66980 }, { "epoch": 11.387047424783274, "grad_norm": 13.772104263305664, "learning_rate": 1.4354920958694545e-05, "loss": 1.2976, "step": 66990 }, { "epoch": 11.388747237803841, "grad_norm": 13.514242172241211, "learning_rate": 1.4352087936993596e-05, "loss": 1.0555, "step": 67000 }, { "epoch": 11.39044705082441, "grad_norm": 25.228515625, "learning_rate": 1.4349254915292652e-05, "loss": 1.1476, "step": 67010 }, { "epoch": 11.392146863844976, "grad_norm": 13.13554859161377, "learning_rate": 1.4346421893591705e-05, "loss": 1.1953, "step": 67020 }, { "epoch": 11.393846676865545, "grad_norm": 14.699444770812988, "learning_rate": 1.4343588871890759e-05, "loss": 1.007, "step": 67030 }, { "epoch": 11.395546489886112, "grad_norm": 19.73367691040039, "learning_rate": 1.4340755850189812e-05, "loss": 1.0992, "step": 67040 }, { "epoch": 11.39724630290668, "grad_norm": 14.642021179199219, "learning_rate": 1.4337922828488867e-05, "loss": 1.1696, "step": 67050 }, { "epoch": 11.398946115927249, "grad_norm": 16.036170959472656, "learning_rate": 1.433508980678792e-05, "loss": 1.1757, "step": 67060 }, { "epoch": 11.400645928947815, "grad_norm": 11.691751480102539, "learning_rate": 1.4332256785086974e-05, "loss": 1.1378, "step": 67070 }, { "epoch": 11.402345741968384, "grad_norm": 18.002256393432617, "learning_rate": 1.4329423763386028e-05, "loss": 1.0271, "step": 67080 }, { "epoch": 11.40404555498895, "grad_norm": 10.248108863830566, "learning_rate": 1.4326590741685081e-05, "loss": 1.0009, "step": 67090 }, { "epoch": 11.40574536800952, "grad_norm": 17.39632225036621, "learning_rate": 1.4323757719984135e-05, "loss": 1.0916, "step": 67100 }, { "epoch": 11.407445181030086, "grad_norm": 12.022477149963379, "learning_rate": 1.432092469828319e-05, "loss": 1.1201, "step": 67110 }, { "epoch": 11.409144994050655, "grad_norm": 12.678213119506836, "learning_rate": 1.4318091676582242e-05, "loss": 1.1116, "step": 67120 }, { "epoch": 11.410844807071221, "grad_norm": 9.293185234069824, "learning_rate": 1.4315258654881297e-05, "loss": 1.0053, "step": 67130 }, { "epoch": 11.41254462009179, "grad_norm": 15.041818618774414, "learning_rate": 1.431242563318035e-05, "loss": 1.2682, "step": 67140 }, { "epoch": 11.414244433112358, "grad_norm": 15.264311790466309, "learning_rate": 1.4309592611479404e-05, "loss": 1.2453, "step": 67150 }, { "epoch": 11.415944246132925, "grad_norm": 15.256512641906738, "learning_rate": 1.4306759589778458e-05, "loss": 1.1243, "step": 67160 }, { "epoch": 11.417644059153494, "grad_norm": 16.11127471923828, "learning_rate": 1.4303926568077511e-05, "loss": 1.0152, "step": 67170 }, { "epoch": 11.41934387217406, "grad_norm": 15.627561569213867, "learning_rate": 1.4301093546376567e-05, "loss": 1.0084, "step": 67180 }, { "epoch": 11.421043685194629, "grad_norm": 14.16364574432373, "learning_rate": 1.4298260524675618e-05, "loss": 1.0589, "step": 67190 }, { "epoch": 11.422743498215196, "grad_norm": 21.952165603637695, "learning_rate": 1.4295427502974674e-05, "loss": 0.9371, "step": 67200 }, { "epoch": 11.424443311235764, "grad_norm": 15.949357986450195, "learning_rate": 1.4292594481273727e-05, "loss": 0.9827, "step": 67210 }, { "epoch": 11.426143124256331, "grad_norm": 13.138808250427246, "learning_rate": 1.428976145957278e-05, "loss": 1.0358, "step": 67220 }, { "epoch": 11.4278429372769, "grad_norm": 13.036332130432129, "learning_rate": 1.4286928437871834e-05, "loss": 1.1667, "step": 67230 }, { "epoch": 11.429542750297466, "grad_norm": 24.84844398498535, "learning_rate": 1.428409541617089e-05, "loss": 1.0455, "step": 67240 }, { "epoch": 11.431242563318035, "grad_norm": 19.6259822845459, "learning_rate": 1.4281262394469941e-05, "loss": 1.1621, "step": 67250 }, { "epoch": 11.432942376338604, "grad_norm": 16.09113883972168, "learning_rate": 1.4278429372768996e-05, "loss": 1.1146, "step": 67260 }, { "epoch": 11.43464218935917, "grad_norm": 16.895727157592773, "learning_rate": 1.427559635106805e-05, "loss": 0.9471, "step": 67270 }, { "epoch": 11.436342002379739, "grad_norm": 15.747809410095215, "learning_rate": 1.4272763329367103e-05, "loss": 1.2067, "step": 67280 }, { "epoch": 11.438041815400306, "grad_norm": 18.83017921447754, "learning_rate": 1.4269930307666157e-05, "loss": 1.1263, "step": 67290 }, { "epoch": 11.439741628420874, "grad_norm": 13.28258991241455, "learning_rate": 1.4267097285965212e-05, "loss": 1.2148, "step": 67300 }, { "epoch": 11.441441441441441, "grad_norm": 10.971362113952637, "learning_rate": 1.4264264264264264e-05, "loss": 1.0129, "step": 67310 }, { "epoch": 11.44314125446201, "grad_norm": 12.815669059753418, "learning_rate": 1.426143124256332e-05, "loss": 1.2198, "step": 67320 }, { "epoch": 11.444841067482576, "grad_norm": 19.321929931640625, "learning_rate": 1.4258598220862373e-05, "loss": 1.0894, "step": 67330 }, { "epoch": 11.446540880503145, "grad_norm": 34.80332565307617, "learning_rate": 1.4255765199161425e-05, "loss": 1.0427, "step": 67340 }, { "epoch": 11.448240693523712, "grad_norm": 16.20203971862793, "learning_rate": 1.425293217746048e-05, "loss": 1.0718, "step": 67350 }, { "epoch": 11.44994050654428, "grad_norm": 18.077577590942383, "learning_rate": 1.4250099155759533e-05, "loss": 1.1563, "step": 67360 }, { "epoch": 11.451640319564849, "grad_norm": 16.279706954956055, "learning_rate": 1.4247266134058587e-05, "loss": 1.1296, "step": 67370 }, { "epoch": 11.453340132585415, "grad_norm": 12.536958694458008, "learning_rate": 1.424443311235764e-05, "loss": 1.317, "step": 67380 }, { "epoch": 11.455039945605984, "grad_norm": 15.828106880187988, "learning_rate": 1.4241600090656696e-05, "loss": 1.0482, "step": 67390 }, { "epoch": 11.45673975862655, "grad_norm": 12.708332061767578, "learning_rate": 1.4238767068955747e-05, "loss": 0.8415, "step": 67400 }, { "epoch": 11.45843957164712, "grad_norm": 18.519338607788086, "learning_rate": 1.4235934047254803e-05, "loss": 1.1071, "step": 67410 }, { "epoch": 11.460139384667686, "grad_norm": 16.41606330871582, "learning_rate": 1.4233101025553856e-05, "loss": 1.3158, "step": 67420 }, { "epoch": 11.461839197688255, "grad_norm": 13.190191268920898, "learning_rate": 1.423026800385291e-05, "loss": 1.2056, "step": 67430 }, { "epoch": 11.463539010708821, "grad_norm": 14.096985816955566, "learning_rate": 1.4227434982151963e-05, "loss": 0.9364, "step": 67440 }, { "epoch": 11.46523882372939, "grad_norm": 13.547247886657715, "learning_rate": 1.4224601960451018e-05, "loss": 1.1495, "step": 67450 }, { "epoch": 11.466938636749958, "grad_norm": 20.383296966552734, "learning_rate": 1.422176893875007e-05, "loss": 1.0136, "step": 67460 }, { "epoch": 11.468638449770525, "grad_norm": 13.203871726989746, "learning_rate": 1.4218935917049125e-05, "loss": 1.1193, "step": 67470 }, { "epoch": 11.470338262791094, "grad_norm": 16.285545349121094, "learning_rate": 1.4216102895348179e-05, "loss": 1.0643, "step": 67480 }, { "epoch": 11.47203807581166, "grad_norm": 8.297026634216309, "learning_rate": 1.4213269873647233e-05, "loss": 1.0296, "step": 67490 }, { "epoch": 11.473737888832229, "grad_norm": 15.764192581176758, "learning_rate": 1.4210436851946286e-05, "loss": 0.9176, "step": 67500 }, { "epoch": 11.475437701852796, "grad_norm": 14.781749725341797, "learning_rate": 1.420760383024534e-05, "loss": 1.0589, "step": 67510 }, { "epoch": 11.477137514873364, "grad_norm": 9.976351737976074, "learning_rate": 1.4204770808544393e-05, "loss": 1.3956, "step": 67520 }, { "epoch": 11.478837327893931, "grad_norm": 15.009916305541992, "learning_rate": 1.4201937786843447e-05, "loss": 1.1691, "step": 67530 }, { "epoch": 11.4805371409145, "grad_norm": 15.177306175231934, "learning_rate": 1.4199104765142502e-05, "loss": 1.158, "step": 67540 }, { "epoch": 11.482236953935066, "grad_norm": 12.784672737121582, "learning_rate": 1.4196271743441555e-05, "loss": 1.1576, "step": 67550 }, { "epoch": 11.483936766955635, "grad_norm": 26.806241989135742, "learning_rate": 1.4193438721740609e-05, "loss": 1.0554, "step": 67560 }, { "epoch": 11.485636579976203, "grad_norm": 12.333436965942383, "learning_rate": 1.4190605700039662e-05, "loss": 1.2548, "step": 67570 }, { "epoch": 11.48733639299677, "grad_norm": 17.650196075439453, "learning_rate": 1.4187772678338718e-05, "loss": 1.3029, "step": 67580 }, { "epoch": 11.489036206017339, "grad_norm": 15.5239839553833, "learning_rate": 1.418493965663777e-05, "loss": 1.0733, "step": 67590 }, { "epoch": 11.490736019037906, "grad_norm": 19.26141929626465, "learning_rate": 1.4182106634936825e-05, "loss": 1.1646, "step": 67600 }, { "epoch": 11.492435832058474, "grad_norm": 22.47256088256836, "learning_rate": 1.4179273613235878e-05, "loss": 0.9893, "step": 67610 }, { "epoch": 11.49413564507904, "grad_norm": 17.113019943237305, "learning_rate": 1.4176440591534932e-05, "loss": 1.1751, "step": 67620 }, { "epoch": 11.49583545809961, "grad_norm": 11.798075675964355, "learning_rate": 1.4173607569833985e-05, "loss": 1.0913, "step": 67630 }, { "epoch": 11.497535271120176, "grad_norm": 17.1517333984375, "learning_rate": 1.417077454813304e-05, "loss": 1.0702, "step": 67640 }, { "epoch": 11.499235084140745, "grad_norm": 20.273921966552734, "learning_rate": 1.4167941526432092e-05, "loss": 1.1683, "step": 67650 }, { "epoch": 11.500934897161311, "grad_norm": 27.331554412841797, "learning_rate": 1.4165108504731147e-05, "loss": 1.255, "step": 67660 }, { "epoch": 11.50263471018188, "grad_norm": 14.921266555786133, "learning_rate": 1.4162275483030201e-05, "loss": 1.2696, "step": 67670 }, { "epoch": 11.504334523202449, "grad_norm": 16.774551391601562, "learning_rate": 1.4159442461329253e-05, "loss": 1.1401, "step": 67680 }, { "epoch": 11.506034336223015, "grad_norm": 15.742674827575684, "learning_rate": 1.4156609439628308e-05, "loss": 0.901, "step": 67690 }, { "epoch": 11.507734149243584, "grad_norm": 14.210145950317383, "learning_rate": 1.4153776417927362e-05, "loss": 1.1831, "step": 67700 }, { "epoch": 11.50943396226415, "grad_norm": 16.520566940307617, "learning_rate": 1.4150943396226415e-05, "loss": 1.0589, "step": 67710 }, { "epoch": 11.51113377528472, "grad_norm": 17.236894607543945, "learning_rate": 1.4148110374525469e-05, "loss": 1.2698, "step": 67720 }, { "epoch": 11.512833588305286, "grad_norm": 19.749792098999023, "learning_rate": 1.4145277352824524e-05, "loss": 1.1021, "step": 67730 }, { "epoch": 11.514533401325854, "grad_norm": 15.521490097045898, "learning_rate": 1.4142444331123576e-05, "loss": 1.0755, "step": 67740 }, { "epoch": 11.516233214346421, "grad_norm": 12.350042343139648, "learning_rate": 1.4139611309422631e-05, "loss": 1.1736, "step": 67750 }, { "epoch": 11.51793302736699, "grad_norm": 14.761316299438477, "learning_rate": 1.4136778287721684e-05, "loss": 1.1126, "step": 67760 }, { "epoch": 11.519632840387558, "grad_norm": 16.312339782714844, "learning_rate": 1.4133945266020738e-05, "loss": 1.1021, "step": 67770 }, { "epoch": 11.521332653408125, "grad_norm": 22.32285499572754, "learning_rate": 1.4131112244319791e-05, "loss": 1.2793, "step": 67780 }, { "epoch": 11.523032466428694, "grad_norm": 17.985435485839844, "learning_rate": 1.4128279222618847e-05, "loss": 1.1496, "step": 67790 }, { "epoch": 11.52473227944926, "grad_norm": 14.164604187011719, "learning_rate": 1.4125446200917898e-05, "loss": 1.0443, "step": 67800 }, { "epoch": 11.526432092469829, "grad_norm": 16.08772850036621, "learning_rate": 1.4122613179216954e-05, "loss": 1.3317, "step": 67810 }, { "epoch": 11.528131905490396, "grad_norm": 15.167533874511719, "learning_rate": 1.4119780157516007e-05, "loss": 1.1724, "step": 67820 }, { "epoch": 11.529831718510964, "grad_norm": 13.772418975830078, "learning_rate": 1.411694713581506e-05, "loss": 1.1983, "step": 67830 }, { "epoch": 11.531531531531531, "grad_norm": 12.39172077178955, "learning_rate": 1.4114114114114114e-05, "loss": 1.0976, "step": 67840 }, { "epoch": 11.5332313445521, "grad_norm": 15.206934928894043, "learning_rate": 1.4111281092413168e-05, "loss": 1.0264, "step": 67850 }, { "epoch": 11.534931157572666, "grad_norm": 13.022808074951172, "learning_rate": 1.4108448070712221e-05, "loss": 0.9515, "step": 67860 }, { "epoch": 11.536630970593235, "grad_norm": 13.482563972473145, "learning_rate": 1.4105615049011275e-05, "loss": 1.082, "step": 67870 }, { "epoch": 11.538330783613802, "grad_norm": 10.791824340820312, "learning_rate": 1.410278202731033e-05, "loss": 1.281, "step": 67880 }, { "epoch": 11.54003059663437, "grad_norm": 13.736234664916992, "learning_rate": 1.4099949005609384e-05, "loss": 1.0153, "step": 67890 }, { "epoch": 11.541730409654939, "grad_norm": 17.594804763793945, "learning_rate": 1.4097115983908437e-05, "loss": 0.9833, "step": 67900 }, { "epoch": 11.543430222675505, "grad_norm": 13.058135032653809, "learning_rate": 1.409428296220749e-05, "loss": 1.0339, "step": 67910 }, { "epoch": 11.545130035696074, "grad_norm": 16.02937126159668, "learning_rate": 1.4091449940506546e-05, "loss": 1.0357, "step": 67920 }, { "epoch": 11.54682984871664, "grad_norm": 24.257400512695312, "learning_rate": 1.4088616918805598e-05, "loss": 1.0913, "step": 67930 }, { "epoch": 11.54852966173721, "grad_norm": 15.258853912353516, "learning_rate": 1.4085783897104653e-05, "loss": 1.3832, "step": 67940 }, { "epoch": 11.550229474757776, "grad_norm": 15.972997665405273, "learning_rate": 1.4082950875403706e-05, "loss": 1.1343, "step": 67950 }, { "epoch": 11.551929287778345, "grad_norm": 21.42906379699707, "learning_rate": 1.408011785370276e-05, "loss": 0.9304, "step": 67960 }, { "epoch": 11.553629100798911, "grad_norm": 14.383431434631348, "learning_rate": 1.4077284832001813e-05, "loss": 1.179, "step": 67970 }, { "epoch": 11.55532891381948, "grad_norm": 18.98795509338379, "learning_rate": 1.4074451810300869e-05, "loss": 0.9597, "step": 67980 }, { "epoch": 11.557028726840048, "grad_norm": 19.988014221191406, "learning_rate": 1.407161878859992e-05, "loss": 0.9957, "step": 67990 }, { "epoch": 11.558728539860615, "grad_norm": 18.259199142456055, "learning_rate": 1.4068785766898976e-05, "loss": 0.9779, "step": 68000 }, { "epoch": 11.560428352881184, "grad_norm": 17.95839500427246, "learning_rate": 1.406595274519803e-05, "loss": 1.0969, "step": 68010 }, { "epoch": 11.56212816590175, "grad_norm": 14.108869552612305, "learning_rate": 1.4063119723497081e-05, "loss": 0.9678, "step": 68020 }, { "epoch": 11.563827978922319, "grad_norm": 15.197473526000977, "learning_rate": 1.4060286701796136e-05, "loss": 0.9731, "step": 68030 }, { "epoch": 11.565527791942886, "grad_norm": 10.114274978637695, "learning_rate": 1.405745368009519e-05, "loss": 1.1249, "step": 68040 }, { "epoch": 11.567227604963454, "grad_norm": 14.348432540893555, "learning_rate": 1.4054620658394243e-05, "loss": 0.9919, "step": 68050 }, { "epoch": 11.568927417984021, "grad_norm": 12.649560928344727, "learning_rate": 1.4051787636693297e-05, "loss": 1.0073, "step": 68060 }, { "epoch": 11.57062723100459, "grad_norm": 10.022932052612305, "learning_rate": 1.4048954614992352e-05, "loss": 1.0527, "step": 68070 }, { "epoch": 11.572327044025156, "grad_norm": 13.007437705993652, "learning_rate": 1.4046121593291404e-05, "loss": 1.0965, "step": 68080 }, { "epoch": 11.574026857045725, "grad_norm": 16.05388069152832, "learning_rate": 1.4043288571590459e-05, "loss": 1.1449, "step": 68090 }, { "epoch": 11.575726670066294, "grad_norm": 12.553970336914062, "learning_rate": 1.4040455549889513e-05, "loss": 1.2001, "step": 68100 }, { "epoch": 11.57742648308686, "grad_norm": 18.714181900024414, "learning_rate": 1.4037622528188566e-05, "loss": 1.1058, "step": 68110 }, { "epoch": 11.579126296107429, "grad_norm": 13.729935646057129, "learning_rate": 1.403478950648762e-05, "loss": 1.021, "step": 68120 }, { "epoch": 11.580826109127996, "grad_norm": 12.926006317138672, "learning_rate": 1.4031956484786675e-05, "loss": 1.218, "step": 68130 }, { "epoch": 11.582525922148564, "grad_norm": 13.621049880981445, "learning_rate": 1.4029123463085727e-05, "loss": 1.1494, "step": 68140 }, { "epoch": 11.584225735169131, "grad_norm": 17.312898635864258, "learning_rate": 1.4026290441384782e-05, "loss": 0.9757, "step": 68150 }, { "epoch": 11.5859255481897, "grad_norm": 17.830402374267578, "learning_rate": 1.4023457419683835e-05, "loss": 1.1272, "step": 68160 }, { "epoch": 11.587625361210266, "grad_norm": 14.76999282836914, "learning_rate": 1.4020624397982889e-05, "loss": 1.046, "step": 68170 }, { "epoch": 11.589325174230835, "grad_norm": 14.96609878540039, "learning_rate": 1.4017791376281942e-05, "loss": 1.1894, "step": 68180 }, { "epoch": 11.591024987251402, "grad_norm": 14.455248832702637, "learning_rate": 1.4014958354580996e-05, "loss": 1.0615, "step": 68190 }, { "epoch": 11.59272480027197, "grad_norm": 17.387479782104492, "learning_rate": 1.401212533288005e-05, "loss": 1.2249, "step": 68200 }, { "epoch": 11.594424613292539, "grad_norm": 12.124617576599121, "learning_rate": 1.4009292311179103e-05, "loss": 1.2283, "step": 68210 }, { "epoch": 11.596124426313105, "grad_norm": 15.845399856567383, "learning_rate": 1.4006459289478158e-05, "loss": 1.1075, "step": 68220 }, { "epoch": 11.597824239333674, "grad_norm": 12.246623992919922, "learning_rate": 1.400362626777721e-05, "loss": 1.1532, "step": 68230 }, { "epoch": 11.59952405235424, "grad_norm": 14.42186450958252, "learning_rate": 1.4000793246076265e-05, "loss": 1.1154, "step": 68240 }, { "epoch": 11.60122386537481, "grad_norm": 13.909445762634277, "learning_rate": 1.3997960224375319e-05, "loss": 1.0114, "step": 68250 }, { "epoch": 11.602923678395376, "grad_norm": 12.261598587036133, "learning_rate": 1.3995127202674374e-05, "loss": 1.1261, "step": 68260 }, { "epoch": 11.604623491415945, "grad_norm": 18.08197021484375, "learning_rate": 1.3992294180973426e-05, "loss": 1.1259, "step": 68270 }, { "epoch": 11.606323304436511, "grad_norm": 14.327017784118652, "learning_rate": 1.3989461159272481e-05, "loss": 0.9521, "step": 68280 }, { "epoch": 11.60802311745708, "grad_norm": 14.055109977722168, "learning_rate": 1.3986628137571535e-05, "loss": 1.0846, "step": 68290 }, { "epoch": 11.609722930477648, "grad_norm": 19.361772537231445, "learning_rate": 1.3983795115870588e-05, "loss": 1.2617, "step": 68300 }, { "epoch": 11.611422743498215, "grad_norm": 14.50361156463623, "learning_rate": 1.3980962094169642e-05, "loss": 1.1485, "step": 68310 }, { "epoch": 11.613122556518784, "grad_norm": 14.977278709411621, "learning_rate": 1.3978129072468697e-05, "loss": 0.9788, "step": 68320 }, { "epoch": 11.61482236953935, "grad_norm": 12.802002906799316, "learning_rate": 1.3975296050767749e-05, "loss": 1.3544, "step": 68330 }, { "epoch": 11.616522182559919, "grad_norm": 14.807400703430176, "learning_rate": 1.3972463029066804e-05, "loss": 1.0807, "step": 68340 }, { "epoch": 11.618221995580486, "grad_norm": 11.949710845947266, "learning_rate": 1.3969630007365857e-05, "loss": 1.1837, "step": 68350 }, { "epoch": 11.619921808601054, "grad_norm": 12.153538703918457, "learning_rate": 1.3966796985664911e-05, "loss": 1.0579, "step": 68360 }, { "epoch": 11.621621621621621, "grad_norm": 15.890074729919434, "learning_rate": 1.3963963963963964e-05, "loss": 1.085, "step": 68370 }, { "epoch": 11.62332143464219, "grad_norm": 15.028556823730469, "learning_rate": 1.3961130942263018e-05, "loss": 0.9941, "step": 68380 }, { "epoch": 11.625021247662756, "grad_norm": 12.493165969848633, "learning_rate": 1.3958297920562071e-05, "loss": 0.9424, "step": 68390 }, { "epoch": 11.626721060683325, "grad_norm": 12.335494995117188, "learning_rate": 1.3955464898861125e-05, "loss": 0.8373, "step": 68400 }, { "epoch": 11.628420873703892, "grad_norm": 12.051535606384277, "learning_rate": 1.395263187716018e-05, "loss": 1.2541, "step": 68410 }, { "epoch": 11.63012068672446, "grad_norm": 16.88907241821289, "learning_rate": 1.3949798855459232e-05, "loss": 1.0432, "step": 68420 }, { "epoch": 11.631820499745029, "grad_norm": 16.916532516479492, "learning_rate": 1.3946965833758287e-05, "loss": 1.117, "step": 68430 }, { "epoch": 11.633520312765596, "grad_norm": 18.783273696899414, "learning_rate": 1.394413281205734e-05, "loss": 1.137, "step": 68440 }, { "epoch": 11.635220125786164, "grad_norm": 13.696939468383789, "learning_rate": 1.3941299790356394e-05, "loss": 1.2392, "step": 68450 }, { "epoch": 11.63691993880673, "grad_norm": 18.229873657226562, "learning_rate": 1.3938466768655448e-05, "loss": 1.1846, "step": 68460 }, { "epoch": 11.6386197518273, "grad_norm": 9.681693077087402, "learning_rate": 1.3935633746954503e-05, "loss": 1.133, "step": 68470 }, { "epoch": 11.640319564847866, "grad_norm": 13.077827453613281, "learning_rate": 1.3932800725253555e-05, "loss": 1.0697, "step": 68480 }, { "epoch": 11.642019377868435, "grad_norm": 16.33731460571289, "learning_rate": 1.392996770355261e-05, "loss": 1.316, "step": 68490 }, { "epoch": 11.643719190889001, "grad_norm": 14.311423301696777, "learning_rate": 1.3927134681851664e-05, "loss": 0.9992, "step": 68500 }, { "epoch": 11.64541900390957, "grad_norm": 10.787352561950684, "learning_rate": 1.3924301660150717e-05, "loss": 1.1711, "step": 68510 }, { "epoch": 11.647118816930139, "grad_norm": 17.071224212646484, "learning_rate": 1.392146863844977e-05, "loss": 1.1979, "step": 68520 }, { "epoch": 11.648818629950705, "grad_norm": 17.9189510345459, "learning_rate": 1.3918635616748826e-05, "loss": 1.0499, "step": 68530 }, { "epoch": 11.650518442971274, "grad_norm": 13.29861831665039, "learning_rate": 1.3915802595047878e-05, "loss": 1.0392, "step": 68540 }, { "epoch": 11.65221825599184, "grad_norm": 17.04637908935547, "learning_rate": 1.3912969573346931e-05, "loss": 1.1542, "step": 68550 }, { "epoch": 11.65391806901241, "grad_norm": 15.440120697021484, "learning_rate": 1.3910136551645986e-05, "loss": 1.1692, "step": 68560 }, { "epoch": 11.655617882032976, "grad_norm": 15.805671691894531, "learning_rate": 1.3907303529945038e-05, "loss": 1.1494, "step": 68570 }, { "epoch": 11.657317695053544, "grad_norm": 13.42346477508545, "learning_rate": 1.3904470508244093e-05, "loss": 1.1334, "step": 68580 }, { "epoch": 11.659017508074111, "grad_norm": 10.897233009338379, "learning_rate": 1.3901637486543147e-05, "loss": 1.1704, "step": 68590 }, { "epoch": 11.66071732109468, "grad_norm": 12.622729301452637, "learning_rate": 1.38988044648422e-05, "loss": 1.2493, "step": 68600 }, { "epoch": 11.662417134115246, "grad_norm": 15.370452880859375, "learning_rate": 1.3895971443141254e-05, "loss": 1.2015, "step": 68610 }, { "epoch": 11.664116947135815, "grad_norm": 14.67577075958252, "learning_rate": 1.389313842144031e-05, "loss": 1.1152, "step": 68620 }, { "epoch": 11.665816760156384, "grad_norm": 12.918957710266113, "learning_rate": 1.3890305399739363e-05, "loss": 1.0204, "step": 68630 }, { "epoch": 11.66751657317695, "grad_norm": 12.549659729003906, "learning_rate": 1.3887472378038416e-05, "loss": 1.071, "step": 68640 }, { "epoch": 11.669216386197519, "grad_norm": 12.635700225830078, "learning_rate": 1.388463935633747e-05, "loss": 0.9921, "step": 68650 }, { "epoch": 11.670916199218086, "grad_norm": 18.48334312438965, "learning_rate": 1.3881806334636525e-05, "loss": 1.0609, "step": 68660 }, { "epoch": 11.672616012238654, "grad_norm": 13.686178207397461, "learning_rate": 1.3878973312935577e-05, "loss": 1.1404, "step": 68670 }, { "epoch": 11.674315825259221, "grad_norm": 13.4747953414917, "learning_rate": 1.3876140291234632e-05, "loss": 0.9632, "step": 68680 }, { "epoch": 11.67601563827979, "grad_norm": 15.031800270080566, "learning_rate": 1.3873307269533686e-05, "loss": 1.1451, "step": 68690 }, { "epoch": 11.677715451300356, "grad_norm": 17.127649307250977, "learning_rate": 1.3870474247832739e-05, "loss": 1.0451, "step": 68700 }, { "epoch": 11.679415264320925, "grad_norm": 15.604085922241211, "learning_rate": 1.3867641226131793e-05, "loss": 1.1279, "step": 68710 }, { "epoch": 11.681115077341492, "grad_norm": 12.6366605758667, "learning_rate": 1.3864808204430846e-05, "loss": 0.9508, "step": 68720 }, { "epoch": 11.68281489036206, "grad_norm": 27.410715103149414, "learning_rate": 1.38619751827299e-05, "loss": 1.3406, "step": 68730 }, { "epoch": 11.684514703382629, "grad_norm": 12.543612480163574, "learning_rate": 1.3859142161028953e-05, "loss": 1.0027, "step": 68740 }, { "epoch": 11.686214516403195, "grad_norm": 16.62647247314453, "learning_rate": 1.3856309139328008e-05, "loss": 1.1293, "step": 68750 }, { "epoch": 11.687914329423764, "grad_norm": 13.330418586730957, "learning_rate": 1.385347611762706e-05, "loss": 1.2254, "step": 68760 }, { "epoch": 11.68961414244433, "grad_norm": 17.740245819091797, "learning_rate": 1.3850643095926115e-05, "loss": 1.2127, "step": 68770 }, { "epoch": 11.6913139554649, "grad_norm": 16.575334548950195, "learning_rate": 1.3847810074225169e-05, "loss": 1.1924, "step": 68780 }, { "epoch": 11.693013768485466, "grad_norm": 19.946290969848633, "learning_rate": 1.3844977052524223e-05, "loss": 0.9482, "step": 68790 }, { "epoch": 11.694713581506035, "grad_norm": 11.52513599395752, "learning_rate": 1.3842144030823276e-05, "loss": 1.1141, "step": 68800 }, { "epoch": 11.696413394526601, "grad_norm": 11.779868125915527, "learning_rate": 1.3839311009122331e-05, "loss": 1.166, "step": 68810 }, { "epoch": 11.69811320754717, "grad_norm": 12.758903503417969, "learning_rate": 1.3836477987421383e-05, "loss": 1.2635, "step": 68820 }, { "epoch": 11.699813020567738, "grad_norm": 12.736146926879883, "learning_rate": 1.3833644965720438e-05, "loss": 1.1341, "step": 68830 }, { "epoch": 11.701512833588305, "grad_norm": 12.157357215881348, "learning_rate": 1.3830811944019492e-05, "loss": 1.1276, "step": 68840 }, { "epoch": 11.703212646608874, "grad_norm": 12.716503143310547, "learning_rate": 1.3827978922318545e-05, "loss": 1.2046, "step": 68850 }, { "epoch": 11.70491245962944, "grad_norm": 13.18493938446045, "learning_rate": 1.3825145900617599e-05, "loss": 1.1983, "step": 68860 }, { "epoch": 11.706612272650009, "grad_norm": 11.81243896484375, "learning_rate": 1.3822312878916654e-05, "loss": 1.2095, "step": 68870 }, { "epoch": 11.708312085670576, "grad_norm": 12.605844497680664, "learning_rate": 1.3819479857215706e-05, "loss": 1.0036, "step": 68880 }, { "epoch": 11.710011898691144, "grad_norm": 17.971086502075195, "learning_rate": 1.381664683551476e-05, "loss": 1.0366, "step": 68890 }, { "epoch": 11.711711711711711, "grad_norm": 14.19408893585205, "learning_rate": 1.3813813813813815e-05, "loss": 0.8912, "step": 68900 }, { "epoch": 11.71341152473228, "grad_norm": 15.58366584777832, "learning_rate": 1.3810980792112866e-05, "loss": 1.165, "step": 68910 }, { "epoch": 11.715111337752846, "grad_norm": 14.278524398803711, "learning_rate": 1.3808147770411922e-05, "loss": 1.1064, "step": 68920 }, { "epoch": 11.716811150773415, "grad_norm": 15.761635780334473, "learning_rate": 1.3805314748710975e-05, "loss": 1.2676, "step": 68930 }, { "epoch": 11.718510963793983, "grad_norm": 19.474546432495117, "learning_rate": 1.3802481727010029e-05, "loss": 1.0172, "step": 68940 }, { "epoch": 11.72021077681455, "grad_norm": 20.135425567626953, "learning_rate": 1.3799648705309082e-05, "loss": 0.9695, "step": 68950 }, { "epoch": 11.721910589835119, "grad_norm": 17.151399612426758, "learning_rate": 1.3796815683608137e-05, "loss": 1.0531, "step": 68960 }, { "epoch": 11.723610402855686, "grad_norm": 15.905046463012695, "learning_rate": 1.379398266190719e-05, "loss": 1.2144, "step": 68970 }, { "epoch": 11.725310215876254, "grad_norm": 12.772397994995117, "learning_rate": 1.3791149640206245e-05, "loss": 0.9354, "step": 68980 }, { "epoch": 11.72701002889682, "grad_norm": 15.694269180297852, "learning_rate": 1.3788316618505298e-05, "loss": 1.0623, "step": 68990 }, { "epoch": 11.72870984191739, "grad_norm": 11.3969144821167, "learning_rate": 1.3785483596804353e-05, "loss": 0.982, "step": 69000 }, { "epoch": 11.730409654937956, "grad_norm": 14.947098731994629, "learning_rate": 1.3782650575103405e-05, "loss": 1.0932, "step": 69010 }, { "epoch": 11.732109467958525, "grad_norm": 20.057600021362305, "learning_rate": 1.377981755340246e-05, "loss": 1.0432, "step": 69020 }, { "epoch": 11.733809280979091, "grad_norm": 18.30392074584961, "learning_rate": 1.3776984531701514e-05, "loss": 1.249, "step": 69030 }, { "epoch": 11.73550909399966, "grad_norm": 13.675483703613281, "learning_rate": 1.3774151510000567e-05, "loss": 1.1415, "step": 69040 }, { "epoch": 11.737208907020229, "grad_norm": 11.206977844238281, "learning_rate": 1.3771318488299621e-05, "loss": 1.449, "step": 69050 }, { "epoch": 11.738908720040795, "grad_norm": 19.671537399291992, "learning_rate": 1.3768485466598674e-05, "loss": 1.0615, "step": 69060 }, { "epoch": 11.740608533061364, "grad_norm": 25.266691207885742, "learning_rate": 1.3765652444897728e-05, "loss": 1.0505, "step": 69070 }, { "epoch": 11.74230834608193, "grad_norm": 20.21134376525879, "learning_rate": 1.3762819423196781e-05, "loss": 1.3531, "step": 69080 }, { "epoch": 11.7440081591025, "grad_norm": 28.437904357910156, "learning_rate": 1.3759986401495837e-05, "loss": 1.0603, "step": 69090 }, { "epoch": 11.745707972123066, "grad_norm": 11.16441535949707, "learning_rate": 1.3757153379794888e-05, "loss": 1.0268, "step": 69100 }, { "epoch": 11.747407785143634, "grad_norm": 15.41644287109375, "learning_rate": 1.3754320358093944e-05, "loss": 1.2272, "step": 69110 }, { "epoch": 11.749107598164201, "grad_norm": 23.00167465209961, "learning_rate": 1.3751487336392997e-05, "loss": 1.0664, "step": 69120 }, { "epoch": 11.75080741118477, "grad_norm": 15.690485000610352, "learning_rate": 1.374865431469205e-05, "loss": 1.272, "step": 69130 }, { "epoch": 11.752507224205338, "grad_norm": 14.6351957321167, "learning_rate": 1.3745821292991104e-05, "loss": 1.253, "step": 69140 }, { "epoch": 11.754207037225905, "grad_norm": 15.441112518310547, "learning_rate": 1.374298827129016e-05, "loss": 1.1281, "step": 69150 }, { "epoch": 11.755906850246474, "grad_norm": 15.950894355773926, "learning_rate": 1.3740155249589211e-05, "loss": 0.9556, "step": 69160 }, { "epoch": 11.75760666326704, "grad_norm": 27.854114532470703, "learning_rate": 1.3737322227888267e-05, "loss": 1.2455, "step": 69170 }, { "epoch": 11.759306476287609, "grad_norm": 13.812223434448242, "learning_rate": 1.373448920618732e-05, "loss": 1.0845, "step": 69180 }, { "epoch": 11.761006289308176, "grad_norm": 13.16334342956543, "learning_rate": 1.3731656184486374e-05, "loss": 1.1741, "step": 69190 }, { "epoch": 11.762706102328744, "grad_norm": 21.052204132080078, "learning_rate": 1.3728823162785427e-05, "loss": 1.0898, "step": 69200 }, { "epoch": 11.764405915349311, "grad_norm": 9.698080062866211, "learning_rate": 1.3725990141084482e-05, "loss": 1.1892, "step": 69210 }, { "epoch": 11.76610572836988, "grad_norm": 12.219132423400879, "learning_rate": 1.3723157119383534e-05, "loss": 1.2268, "step": 69220 }, { "epoch": 11.767805541390446, "grad_norm": 18.782577514648438, "learning_rate": 1.3720324097682588e-05, "loss": 1.1793, "step": 69230 }, { "epoch": 11.769505354411015, "grad_norm": 13.195927619934082, "learning_rate": 1.3717491075981643e-05, "loss": 1.3286, "step": 69240 }, { "epoch": 11.771205167431582, "grad_norm": 13.175337791442871, "learning_rate": 1.3714658054280695e-05, "loss": 1.2267, "step": 69250 }, { "epoch": 11.77290498045215, "grad_norm": 14.183133125305176, "learning_rate": 1.371182503257975e-05, "loss": 1.1156, "step": 69260 }, { "epoch": 11.774604793472719, "grad_norm": 26.586626052856445, "learning_rate": 1.3708992010878803e-05, "loss": 1.0459, "step": 69270 }, { "epoch": 11.776304606493285, "grad_norm": 17.8992977142334, "learning_rate": 1.3706158989177857e-05, "loss": 1.1463, "step": 69280 }, { "epoch": 11.778004419513854, "grad_norm": 32.33724594116211, "learning_rate": 1.370332596747691e-05, "loss": 0.9856, "step": 69290 }, { "epoch": 11.77970423253442, "grad_norm": 14.703025817871094, "learning_rate": 1.3700492945775966e-05, "loss": 1.1433, "step": 69300 }, { "epoch": 11.78140404555499, "grad_norm": 14.896038055419922, "learning_rate": 1.3697659924075018e-05, "loss": 1.2917, "step": 69310 }, { "epoch": 11.783103858575556, "grad_norm": 13.349268913269043, "learning_rate": 1.3694826902374073e-05, "loss": 1.2108, "step": 69320 }, { "epoch": 11.784803671596125, "grad_norm": 12.066725730895996, "learning_rate": 1.3691993880673126e-05, "loss": 1.1089, "step": 69330 }, { "epoch": 11.786503484616691, "grad_norm": 21.1227970123291, "learning_rate": 1.368916085897218e-05, "loss": 1.152, "step": 69340 }, { "epoch": 11.78820329763726, "grad_norm": 18.248611450195312, "learning_rate": 1.3686327837271233e-05, "loss": 0.9852, "step": 69350 }, { "epoch": 11.789903110657828, "grad_norm": 14.598196029663086, "learning_rate": 1.3683494815570289e-05, "loss": 1.0805, "step": 69360 }, { "epoch": 11.791602923678395, "grad_norm": 14.369505882263184, "learning_rate": 1.3680661793869342e-05, "loss": 1.0218, "step": 69370 }, { "epoch": 11.793302736698964, "grad_norm": 11.04245376586914, "learning_rate": 1.3677828772168396e-05, "loss": 1.1008, "step": 69380 }, { "epoch": 11.79500254971953, "grad_norm": 15.516544342041016, "learning_rate": 1.3674995750467449e-05, "loss": 1.0891, "step": 69390 }, { "epoch": 11.796702362740099, "grad_norm": 18.895309448242188, "learning_rate": 1.3672162728766504e-05, "loss": 1.0081, "step": 69400 }, { "epoch": 11.798402175760666, "grad_norm": 10.202739715576172, "learning_rate": 1.3669329707065556e-05, "loss": 1.016, "step": 69410 }, { "epoch": 11.800101988781234, "grad_norm": 15.742921829223633, "learning_rate": 1.366649668536461e-05, "loss": 1.1981, "step": 69420 }, { "epoch": 11.801801801801801, "grad_norm": 13.428587913513184, "learning_rate": 1.3663663663663665e-05, "loss": 1.1331, "step": 69430 }, { "epoch": 11.80350161482237, "grad_norm": 11.108297348022461, "learning_rate": 1.3660830641962717e-05, "loss": 1.1613, "step": 69440 }, { "epoch": 11.805201427842936, "grad_norm": 14.468043327331543, "learning_rate": 1.3657997620261772e-05, "loss": 1.1359, "step": 69450 }, { "epoch": 11.806901240863505, "grad_norm": 21.527063369750977, "learning_rate": 1.3655164598560825e-05, "loss": 1.0118, "step": 69460 }, { "epoch": 11.808601053884074, "grad_norm": 11.345060348510742, "learning_rate": 1.3652331576859879e-05, "loss": 1.1707, "step": 69470 }, { "epoch": 11.81030086690464, "grad_norm": 18.10538673400879, "learning_rate": 1.3649498555158932e-05, "loss": 0.979, "step": 69480 }, { "epoch": 11.812000679925209, "grad_norm": 19.247013092041016, "learning_rate": 1.3646665533457988e-05, "loss": 1.04, "step": 69490 }, { "epoch": 11.813700492945776, "grad_norm": 29.31590461730957, "learning_rate": 1.364383251175704e-05, "loss": 1.3072, "step": 69500 }, { "epoch": 11.815400305966344, "grad_norm": 17.617618560791016, "learning_rate": 1.3640999490056095e-05, "loss": 0.8489, "step": 69510 }, { "epoch": 11.817100118986911, "grad_norm": 12.355382919311523, "learning_rate": 1.3638166468355148e-05, "loss": 1.2473, "step": 69520 }, { "epoch": 11.81879993200748, "grad_norm": 11.046104431152344, "learning_rate": 1.3635333446654202e-05, "loss": 1.0731, "step": 69530 }, { "epoch": 11.820499745028046, "grad_norm": 16.043922424316406, "learning_rate": 1.3632500424953255e-05, "loss": 1.0532, "step": 69540 }, { "epoch": 11.822199558048615, "grad_norm": 20.062744140625, "learning_rate": 1.362966740325231e-05, "loss": 1.2536, "step": 69550 }, { "epoch": 11.823899371069182, "grad_norm": 18.406248092651367, "learning_rate": 1.3626834381551362e-05, "loss": 1.3028, "step": 69560 }, { "epoch": 11.82559918408975, "grad_norm": 15.036301612854004, "learning_rate": 1.3624001359850418e-05, "loss": 1.0089, "step": 69570 }, { "epoch": 11.827298997110319, "grad_norm": 12.171120643615723, "learning_rate": 1.3621168338149471e-05, "loss": 0.9188, "step": 69580 }, { "epoch": 11.828998810130885, "grad_norm": 12.48166275024414, "learning_rate": 1.3618335316448523e-05, "loss": 1.1742, "step": 69590 }, { "epoch": 11.830698623151454, "grad_norm": 12.99587631225586, "learning_rate": 1.3615502294747578e-05, "loss": 0.9937, "step": 69600 }, { "epoch": 11.83239843617202, "grad_norm": 12.506189346313477, "learning_rate": 1.3612669273046632e-05, "loss": 1.2048, "step": 69610 }, { "epoch": 11.83409824919259, "grad_norm": 12.811975479125977, "learning_rate": 1.3609836251345685e-05, "loss": 0.8963, "step": 69620 }, { "epoch": 11.835798062213156, "grad_norm": 12.52173137664795, "learning_rate": 1.3607003229644739e-05, "loss": 1.0985, "step": 69630 }, { "epoch": 11.837497875233725, "grad_norm": 12.16456127166748, "learning_rate": 1.3604170207943794e-05, "loss": 1.1305, "step": 69640 }, { "epoch": 11.839197688254291, "grad_norm": 15.810622215270996, "learning_rate": 1.3601337186242846e-05, "loss": 0.9282, "step": 69650 }, { "epoch": 11.84089750127486, "grad_norm": 16.28163719177246, "learning_rate": 1.3598504164541901e-05, "loss": 1.0014, "step": 69660 }, { "epoch": 11.842597314295428, "grad_norm": 14.946602821350098, "learning_rate": 1.3595671142840954e-05, "loss": 1.1383, "step": 69670 }, { "epoch": 11.844297127315995, "grad_norm": 14.575627326965332, "learning_rate": 1.3592838121140008e-05, "loss": 1.0966, "step": 69680 }, { "epoch": 11.845996940336564, "grad_norm": 19.98332405090332, "learning_rate": 1.3590005099439062e-05, "loss": 1.1251, "step": 69690 }, { "epoch": 11.84769675335713, "grad_norm": 16.353500366210938, "learning_rate": 1.3587172077738117e-05, "loss": 1.3884, "step": 69700 }, { "epoch": 11.849396566377699, "grad_norm": 19.5145263671875, "learning_rate": 1.358433905603717e-05, "loss": 0.976, "step": 69710 }, { "epoch": 11.851096379398266, "grad_norm": 30.336380004882812, "learning_rate": 1.3581506034336224e-05, "loss": 1.0073, "step": 69720 }, { "epoch": 11.852796192418834, "grad_norm": 10.595820426940918, "learning_rate": 1.3578673012635277e-05, "loss": 1.2334, "step": 69730 }, { "epoch": 11.854496005439401, "grad_norm": 17.82472801208496, "learning_rate": 1.3575839990934333e-05, "loss": 1.0432, "step": 69740 }, { "epoch": 11.85619581845997, "grad_norm": 17.141380310058594, "learning_rate": 1.3573006969233384e-05, "loss": 1.1381, "step": 69750 }, { "epoch": 11.857895631480536, "grad_norm": 27.146495819091797, "learning_rate": 1.3570173947532438e-05, "loss": 0.9872, "step": 69760 }, { "epoch": 11.859595444501105, "grad_norm": 14.200074195861816, "learning_rate": 1.3567340925831493e-05, "loss": 1.1495, "step": 69770 }, { "epoch": 11.861295257521672, "grad_norm": 18.91799545288086, "learning_rate": 1.3564507904130545e-05, "loss": 1.2211, "step": 69780 }, { "epoch": 11.86299507054224, "grad_norm": 24.46826934814453, "learning_rate": 1.35616748824296e-05, "loss": 0.94, "step": 69790 }, { "epoch": 11.864694883562809, "grad_norm": 15.095091819763184, "learning_rate": 1.3558841860728654e-05, "loss": 1.1363, "step": 69800 }, { "epoch": 11.866394696583376, "grad_norm": 13.334989547729492, "learning_rate": 1.3556008839027707e-05, "loss": 1.1159, "step": 69810 }, { "epoch": 11.868094509603944, "grad_norm": 13.776881217956543, "learning_rate": 1.355317581732676e-05, "loss": 1.1595, "step": 69820 }, { "epoch": 11.86979432262451, "grad_norm": 16.244007110595703, "learning_rate": 1.3550342795625816e-05, "loss": 1.2012, "step": 69830 }, { "epoch": 11.87149413564508, "grad_norm": 16.896556854248047, "learning_rate": 1.3547509773924868e-05, "loss": 1.2481, "step": 69840 }, { "epoch": 11.873193948665646, "grad_norm": 14.641180038452148, "learning_rate": 1.3544676752223923e-05, "loss": 1.0645, "step": 69850 }, { "epoch": 11.874893761686215, "grad_norm": 17.603105545043945, "learning_rate": 1.3541843730522976e-05, "loss": 1.104, "step": 69860 }, { "epoch": 11.876593574706781, "grad_norm": 12.109023094177246, "learning_rate": 1.353901070882203e-05, "loss": 1.0241, "step": 69870 }, { "epoch": 11.87829338772735, "grad_norm": 16.475200653076172, "learning_rate": 1.3536177687121084e-05, "loss": 1.0861, "step": 69880 }, { "epoch": 11.879993200747919, "grad_norm": 16.894981384277344, "learning_rate": 1.3533344665420139e-05, "loss": 1.2399, "step": 69890 }, { "epoch": 11.881693013768485, "grad_norm": 18.3775634765625, "learning_rate": 1.353051164371919e-05, "loss": 1.3874, "step": 69900 }, { "epoch": 11.883392826789054, "grad_norm": 14.006726264953613, "learning_rate": 1.3527678622018246e-05, "loss": 1.2489, "step": 69910 }, { "epoch": 11.88509263980962, "grad_norm": 16.595720291137695, "learning_rate": 1.35248456003173e-05, "loss": 1.1402, "step": 69920 }, { "epoch": 11.88679245283019, "grad_norm": 13.405865669250488, "learning_rate": 1.3522012578616351e-05, "loss": 1.0475, "step": 69930 }, { "epoch": 11.888492265850756, "grad_norm": 19.82172203063965, "learning_rate": 1.3519179556915406e-05, "loss": 1.0931, "step": 69940 }, { "epoch": 11.890192078871324, "grad_norm": 12.843315124511719, "learning_rate": 1.351634653521446e-05, "loss": 1.1019, "step": 69950 }, { "epoch": 11.891891891891891, "grad_norm": 17.323902130126953, "learning_rate": 1.3513513513513513e-05, "loss": 0.8859, "step": 69960 }, { "epoch": 11.89359170491246, "grad_norm": 12.265050888061523, "learning_rate": 1.3510680491812567e-05, "loss": 1.0606, "step": 69970 }, { "epoch": 11.895291517933028, "grad_norm": 19.29819107055664, "learning_rate": 1.3507847470111622e-05, "loss": 1.0133, "step": 69980 }, { "epoch": 11.896991330953595, "grad_norm": 13.608529090881348, "learning_rate": 1.3505014448410674e-05, "loss": 1.0538, "step": 69990 }, { "epoch": 11.898691143974164, "grad_norm": 15.839487075805664, "learning_rate": 1.350218142670973e-05, "loss": 1.2544, "step": 70000 }, { "epoch": 11.90039095699473, "grad_norm": 23.6589412689209, "learning_rate": 1.3499348405008783e-05, "loss": 0.9805, "step": 70010 }, { "epoch": 11.902090770015299, "grad_norm": 15.948975563049316, "learning_rate": 1.3496515383307836e-05, "loss": 0.9982, "step": 70020 }, { "epoch": 11.903790583035866, "grad_norm": 15.735613822937012, "learning_rate": 1.349368236160689e-05, "loss": 1.3736, "step": 70030 }, { "epoch": 11.905490396056434, "grad_norm": 12.436710357666016, "learning_rate": 1.3490849339905945e-05, "loss": 1.2183, "step": 70040 }, { "epoch": 11.907190209077001, "grad_norm": 14.274085998535156, "learning_rate": 1.3488016318204997e-05, "loss": 1.0773, "step": 70050 }, { "epoch": 11.90889002209757, "grad_norm": 12.439334869384766, "learning_rate": 1.3485183296504052e-05, "loss": 1.1697, "step": 70060 }, { "epoch": 11.910589835118136, "grad_norm": 19.59959602355957, "learning_rate": 1.3482350274803106e-05, "loss": 1.0975, "step": 70070 }, { "epoch": 11.912289648138705, "grad_norm": 15.43077564239502, "learning_rate": 1.3479517253102159e-05, "loss": 1.0561, "step": 70080 }, { "epoch": 11.913989461159272, "grad_norm": 17.082807540893555, "learning_rate": 1.3476684231401213e-05, "loss": 1.3338, "step": 70090 }, { "epoch": 11.91568927417984, "grad_norm": 32.90380096435547, "learning_rate": 1.3473851209700266e-05, "loss": 1.0474, "step": 70100 }, { "epoch": 11.917389087200409, "grad_norm": 12.974775314331055, "learning_rate": 1.3471018187999321e-05, "loss": 1.2536, "step": 70110 }, { "epoch": 11.919088900220975, "grad_norm": 16.992578506469727, "learning_rate": 1.3468185166298373e-05, "loss": 0.8917, "step": 70120 }, { "epoch": 11.920788713241544, "grad_norm": 12.967818260192871, "learning_rate": 1.3465352144597428e-05, "loss": 1.006, "step": 70130 }, { "epoch": 11.92248852626211, "grad_norm": 12.853816986083984, "learning_rate": 1.3462519122896482e-05, "loss": 1.0728, "step": 70140 }, { "epoch": 11.92418833928268, "grad_norm": 13.46440315246582, "learning_rate": 1.3459686101195535e-05, "loss": 1.1824, "step": 70150 }, { "epoch": 11.925888152303246, "grad_norm": 13.069982528686523, "learning_rate": 1.3456853079494589e-05, "loss": 1.0967, "step": 70160 }, { "epoch": 11.927587965323815, "grad_norm": 16.105178833007812, "learning_rate": 1.3454020057793644e-05, "loss": 1.285, "step": 70170 }, { "epoch": 11.929287778344381, "grad_norm": 19.626216888427734, "learning_rate": 1.3451187036092696e-05, "loss": 1.3058, "step": 70180 }, { "epoch": 11.93098759136495, "grad_norm": 16.565916061401367, "learning_rate": 1.3448354014391751e-05, "loss": 1.0868, "step": 70190 }, { "epoch": 11.932687404385518, "grad_norm": 12.008597373962402, "learning_rate": 1.3445520992690805e-05, "loss": 1.263, "step": 70200 }, { "epoch": 11.934387217406085, "grad_norm": 12.692599296569824, "learning_rate": 1.3442687970989858e-05, "loss": 1.0599, "step": 70210 }, { "epoch": 11.936087030426654, "grad_norm": 14.524142265319824, "learning_rate": 1.3439854949288912e-05, "loss": 1.0049, "step": 70220 }, { "epoch": 11.93778684344722, "grad_norm": 11.811577796936035, "learning_rate": 1.3437021927587967e-05, "loss": 1.0193, "step": 70230 }, { "epoch": 11.939486656467789, "grad_norm": 17.234451293945312, "learning_rate": 1.3434188905887019e-05, "loss": 1.0888, "step": 70240 }, { "epoch": 11.941186469488356, "grad_norm": 16.963220596313477, "learning_rate": 1.3431355884186074e-05, "loss": 0.8591, "step": 70250 }, { "epoch": 11.942886282508924, "grad_norm": 14.724427223205566, "learning_rate": 1.3428522862485128e-05, "loss": 0.9523, "step": 70260 }, { "epoch": 11.944586095529491, "grad_norm": 14.808043479919434, "learning_rate": 1.342568984078418e-05, "loss": 1.1313, "step": 70270 }, { "epoch": 11.94628590855006, "grad_norm": 19.92992401123047, "learning_rate": 1.3422856819083235e-05, "loss": 1.0297, "step": 70280 }, { "epoch": 11.947985721570626, "grad_norm": 14.963335990905762, "learning_rate": 1.3420023797382288e-05, "loss": 1.1881, "step": 70290 }, { "epoch": 11.949685534591195, "grad_norm": 17.651050567626953, "learning_rate": 1.3417190775681342e-05, "loss": 1.2043, "step": 70300 }, { "epoch": 11.951385347611764, "grad_norm": 19.60648536682129, "learning_rate": 1.3414357753980395e-05, "loss": 1.1236, "step": 70310 }, { "epoch": 11.95308516063233, "grad_norm": 11.83277702331543, "learning_rate": 1.341152473227945e-05, "loss": 0.9973, "step": 70320 }, { "epoch": 11.954784973652899, "grad_norm": 15.953845977783203, "learning_rate": 1.3408691710578502e-05, "loss": 0.9158, "step": 70330 }, { "epoch": 11.956484786673466, "grad_norm": 17.77422332763672, "learning_rate": 1.3405858688877557e-05, "loss": 1.0488, "step": 70340 }, { "epoch": 11.958184599694034, "grad_norm": 16.814464569091797, "learning_rate": 1.3403025667176611e-05, "loss": 1.2717, "step": 70350 }, { "epoch": 11.959884412714601, "grad_norm": 14.801413536071777, "learning_rate": 1.3400192645475664e-05, "loss": 1.1385, "step": 70360 }, { "epoch": 11.96158422573517, "grad_norm": 39.60513687133789, "learning_rate": 1.3397359623774718e-05, "loss": 1.2659, "step": 70370 }, { "epoch": 11.963284038755736, "grad_norm": 12.414729118347168, "learning_rate": 1.3394526602073773e-05, "loss": 1.1463, "step": 70380 }, { "epoch": 11.964983851776305, "grad_norm": 13.166606903076172, "learning_rate": 1.3391693580372825e-05, "loss": 1.1282, "step": 70390 }, { "epoch": 11.966683664796872, "grad_norm": 18.050233840942383, "learning_rate": 1.338886055867188e-05, "loss": 1.0221, "step": 70400 }, { "epoch": 11.96838347781744, "grad_norm": 17.099990844726562, "learning_rate": 1.3386027536970934e-05, "loss": 1.1353, "step": 70410 }, { "epoch": 11.970083290838009, "grad_norm": 15.266036033630371, "learning_rate": 1.3383194515269987e-05, "loss": 1.0119, "step": 70420 }, { "epoch": 11.971783103858575, "grad_norm": 24.74404525756836, "learning_rate": 1.338036149356904e-05, "loss": 1.2431, "step": 70430 }, { "epoch": 11.973482916879144, "grad_norm": 14.526795387268066, "learning_rate": 1.3377528471868094e-05, "loss": 0.9547, "step": 70440 }, { "epoch": 11.97518272989971, "grad_norm": 15.170853614807129, "learning_rate": 1.337469545016715e-05, "loss": 1.1464, "step": 70450 }, { "epoch": 11.97688254292028, "grad_norm": 12.405691146850586, "learning_rate": 1.3371862428466201e-05, "loss": 1.2644, "step": 70460 }, { "epoch": 11.978582355940846, "grad_norm": 13.059246063232422, "learning_rate": 1.3369029406765257e-05, "loss": 1.1994, "step": 70470 }, { "epoch": 11.980282168961415, "grad_norm": 17.07392692565918, "learning_rate": 1.336619638506431e-05, "loss": 1.2368, "step": 70480 }, { "epoch": 11.981981981981981, "grad_norm": 17.0518741607666, "learning_rate": 1.3363363363363364e-05, "loss": 1.2174, "step": 70490 }, { "epoch": 11.98368179500255, "grad_norm": 12.56690502166748, "learning_rate": 1.3360530341662417e-05, "loss": 1.2916, "step": 70500 }, { "epoch": 11.985381608023118, "grad_norm": 13.487627029418945, "learning_rate": 1.3357697319961472e-05, "loss": 1.2343, "step": 70510 }, { "epoch": 11.987081421043685, "grad_norm": 14.160197257995605, "learning_rate": 1.3354864298260524e-05, "loss": 1.2288, "step": 70520 }, { "epoch": 11.988781234064254, "grad_norm": 12.276676177978516, "learning_rate": 1.335203127655958e-05, "loss": 1.1697, "step": 70530 }, { "epoch": 11.99048104708482, "grad_norm": 1662.2445068359375, "learning_rate": 1.3349198254858633e-05, "loss": 1.1387, "step": 70540 }, { "epoch": 11.992180860105389, "grad_norm": 14.04030990600586, "learning_rate": 1.3346365233157686e-05, "loss": 0.8387, "step": 70550 }, { "epoch": 11.993880673125956, "grad_norm": 13.998709678649902, "learning_rate": 1.334353221145674e-05, "loss": 1.1965, "step": 70560 }, { "epoch": 11.995580486146524, "grad_norm": 10.340587615966797, "learning_rate": 1.3340699189755795e-05, "loss": 1.1222, "step": 70570 }, { "epoch": 11.997280299167091, "grad_norm": 8.085679054260254, "learning_rate": 1.3337866168054847e-05, "loss": 1.1302, "step": 70580 }, { "epoch": 11.99898011218766, "grad_norm": 17.68060874938965, "learning_rate": 1.3335033146353902e-05, "loss": 1.0196, "step": 70590 }, { "epoch": 12.0, "eval_cer": 1.0, "eval_loss": 2.4513533115386963, "eval_runtime": 1997.7038, "eval_samples_per_second": 0.236, "eval_steps_per_second": 0.236, "step": 70596 }, { "epoch": 12.000679925208226, "grad_norm": 11.308221817016602, "learning_rate": 1.3332200124652956e-05, "loss": 0.9272, "step": 70600 }, { "epoch": 12.002379738228795, "grad_norm": 18.49628448486328, "learning_rate": 1.332936710295201e-05, "loss": 1.2542, "step": 70610 }, { "epoch": 12.004079551249363, "grad_norm": 14.104609489440918, "learning_rate": 1.3326534081251063e-05, "loss": 1.0352, "step": 70620 }, { "epoch": 12.00577936426993, "grad_norm": 13.67963981628418, "learning_rate": 1.3323701059550116e-05, "loss": 1.175, "step": 70630 }, { "epoch": 12.007479177290499, "grad_norm": 12.903705596923828, "learning_rate": 1.332086803784917e-05, "loss": 0.8477, "step": 70640 }, { "epoch": 12.009178990311066, "grad_norm": 15.058073997497559, "learning_rate": 1.3318035016148223e-05, "loss": 0.9478, "step": 70650 }, { "epoch": 12.010878803331634, "grad_norm": 13.789802551269531, "learning_rate": 1.3315201994447279e-05, "loss": 0.8464, "step": 70660 }, { "epoch": 12.0125786163522, "grad_norm": 15.680578231811523, "learning_rate": 1.331236897274633e-05, "loss": 0.8642, "step": 70670 }, { "epoch": 12.01427842937277, "grad_norm": 12.87254810333252, "learning_rate": 1.3309535951045386e-05, "loss": 1.0189, "step": 70680 }, { "epoch": 12.015978242393336, "grad_norm": 15.339861869812012, "learning_rate": 1.3306702929344439e-05, "loss": 0.956, "step": 70690 }, { "epoch": 12.017678055413905, "grad_norm": 12.260162353515625, "learning_rate": 1.3303869907643493e-05, "loss": 1.1823, "step": 70700 }, { "epoch": 12.019377868434471, "grad_norm": 15.740485191345215, "learning_rate": 1.3301036885942546e-05, "loss": 0.8333, "step": 70710 }, { "epoch": 12.02107768145504, "grad_norm": 15.94528865814209, "learning_rate": 1.3298203864241601e-05, "loss": 0.9395, "step": 70720 }, { "epoch": 12.022777494475609, "grad_norm": 13.297184944152832, "learning_rate": 1.3295370842540653e-05, "loss": 0.9309, "step": 70730 }, { "epoch": 12.024477307496175, "grad_norm": 16.89821434020996, "learning_rate": 1.3292537820839708e-05, "loss": 0.9667, "step": 70740 }, { "epoch": 12.026177120516744, "grad_norm": 24.25714874267578, "learning_rate": 1.3289704799138762e-05, "loss": 1.0675, "step": 70750 }, { "epoch": 12.02787693353731, "grad_norm": 13.861952781677246, "learning_rate": 1.3286871777437815e-05, "loss": 0.9033, "step": 70760 }, { "epoch": 12.02957674655788, "grad_norm": 20.64820098876953, "learning_rate": 1.3284038755736869e-05, "loss": 1.1291, "step": 70770 }, { "epoch": 12.031276559578446, "grad_norm": 16.62470817565918, "learning_rate": 1.3281205734035924e-05, "loss": 1.1652, "step": 70780 }, { "epoch": 12.032976372599014, "grad_norm": 14.746055603027344, "learning_rate": 1.3278372712334976e-05, "loss": 1.0142, "step": 70790 }, { "epoch": 12.034676185619581, "grad_norm": 9.531356811523438, "learning_rate": 1.327553969063403e-05, "loss": 0.9832, "step": 70800 }, { "epoch": 12.03637599864015, "grad_norm": 12.49056339263916, "learning_rate": 1.3272706668933085e-05, "loss": 1.0326, "step": 70810 }, { "epoch": 12.038075811660716, "grad_norm": 23.456798553466797, "learning_rate": 1.3269873647232138e-05, "loss": 0.8507, "step": 70820 }, { "epoch": 12.039775624681285, "grad_norm": 14.941399574279785, "learning_rate": 1.3267040625531192e-05, "loss": 1.0642, "step": 70830 }, { "epoch": 12.041475437701854, "grad_norm": 16.792505264282227, "learning_rate": 1.3264207603830245e-05, "loss": 0.9917, "step": 70840 }, { "epoch": 12.04317525072242, "grad_norm": 14.399917602539062, "learning_rate": 1.32613745821293e-05, "loss": 1.0687, "step": 70850 }, { "epoch": 12.044875063742989, "grad_norm": 10.543254852294922, "learning_rate": 1.3258541560428352e-05, "loss": 0.9834, "step": 70860 }, { "epoch": 12.046574876763556, "grad_norm": 13.739842414855957, "learning_rate": 1.3255708538727408e-05, "loss": 1.1598, "step": 70870 }, { "epoch": 12.048274689784124, "grad_norm": 12.36056137084961, "learning_rate": 1.3252875517026461e-05, "loss": 1.0151, "step": 70880 }, { "epoch": 12.049974502804691, "grad_norm": 11.83410930633545, "learning_rate": 1.3250042495325515e-05, "loss": 1.1678, "step": 70890 }, { "epoch": 12.05167431582526, "grad_norm": 14.278373718261719, "learning_rate": 1.3247209473624568e-05, "loss": 1.0051, "step": 70900 }, { "epoch": 12.053374128845826, "grad_norm": 16.928813934326172, "learning_rate": 1.3244376451923623e-05, "loss": 0.9882, "step": 70910 }, { "epoch": 12.055073941866395, "grad_norm": 13.121731758117676, "learning_rate": 1.3241543430222675e-05, "loss": 0.9801, "step": 70920 }, { "epoch": 12.056773754886962, "grad_norm": 15.663103103637695, "learning_rate": 1.323871040852173e-05, "loss": 0.9673, "step": 70930 }, { "epoch": 12.05847356790753, "grad_norm": 9.993942260742188, "learning_rate": 1.3235877386820784e-05, "loss": 1.0288, "step": 70940 }, { "epoch": 12.060173380928099, "grad_norm": 18.6983585357666, "learning_rate": 1.3233044365119837e-05, "loss": 0.9771, "step": 70950 }, { "epoch": 12.061873193948665, "grad_norm": 17.584245681762695, "learning_rate": 1.3230211343418891e-05, "loss": 1.0136, "step": 70960 }, { "epoch": 12.063573006969234, "grad_norm": 13.421331405639648, "learning_rate": 1.3227378321717944e-05, "loss": 0.8991, "step": 70970 }, { "epoch": 12.0652728199898, "grad_norm": 11.364997863769531, "learning_rate": 1.3224545300016998e-05, "loss": 0.9785, "step": 70980 }, { "epoch": 12.06697263301037, "grad_norm": 13.312518119812012, "learning_rate": 1.3221712278316052e-05, "loss": 0.9136, "step": 70990 }, { "epoch": 12.068672446030936, "grad_norm": 12.44146728515625, "learning_rate": 1.3218879256615107e-05, "loss": 1.0366, "step": 71000 }, { "epoch": 12.070372259051505, "grad_norm": 14.69394302368164, "learning_rate": 1.3216046234914159e-05, "loss": 0.9494, "step": 71010 }, { "epoch": 12.072072072072071, "grad_norm": 14.337352752685547, "learning_rate": 1.3213213213213214e-05, "loss": 0.9203, "step": 71020 }, { "epoch": 12.07377188509264, "grad_norm": 91.02469635009766, "learning_rate": 1.3210380191512267e-05, "loss": 0.9893, "step": 71030 }, { "epoch": 12.075471698113208, "grad_norm": 27.223417282104492, "learning_rate": 1.320754716981132e-05, "loss": 1.107, "step": 71040 }, { "epoch": 12.077171511133775, "grad_norm": 11.678031921386719, "learning_rate": 1.3204714148110374e-05, "loss": 0.8799, "step": 71050 }, { "epoch": 12.078871324154344, "grad_norm": 33.85340881347656, "learning_rate": 1.320188112640943e-05, "loss": 0.973, "step": 71060 }, { "epoch": 12.08057113717491, "grad_norm": 14.551752090454102, "learning_rate": 1.3199048104708481e-05, "loss": 0.9836, "step": 71070 }, { "epoch": 12.082270950195479, "grad_norm": 10.945669174194336, "learning_rate": 1.3196215083007537e-05, "loss": 0.7907, "step": 71080 }, { "epoch": 12.083970763216046, "grad_norm": 15.594341278076172, "learning_rate": 1.319338206130659e-05, "loss": 0.9719, "step": 71090 }, { "epoch": 12.085670576236614, "grad_norm": 14.761030197143555, "learning_rate": 1.3190549039605644e-05, "loss": 0.8788, "step": 71100 }, { "epoch": 12.087370389257181, "grad_norm": 14.105204582214355, "learning_rate": 1.3187716017904697e-05, "loss": 0.9863, "step": 71110 }, { "epoch": 12.08907020227775, "grad_norm": 15.314985275268555, "learning_rate": 1.3184882996203752e-05, "loss": 0.982, "step": 71120 }, { "epoch": 12.090770015298316, "grad_norm": 14.584630966186523, "learning_rate": 1.3182049974502804e-05, "loss": 0.9188, "step": 71130 }, { "epoch": 12.092469828318885, "grad_norm": 13.915105819702148, "learning_rate": 1.3179216952801858e-05, "loss": 0.8147, "step": 71140 }, { "epoch": 12.094169641339453, "grad_norm": 11.755346298217773, "learning_rate": 1.3176383931100913e-05, "loss": 0.984, "step": 71150 }, { "epoch": 12.09586945436002, "grad_norm": 33.640663146972656, "learning_rate": 1.3173550909399966e-05, "loss": 0.9044, "step": 71160 }, { "epoch": 12.097569267380589, "grad_norm": 16.288583755493164, "learning_rate": 1.317071788769902e-05, "loss": 0.9536, "step": 71170 }, { "epoch": 12.099269080401156, "grad_norm": 15.719586372375488, "learning_rate": 1.3167884865998074e-05, "loss": 1.0427, "step": 71180 }, { "epoch": 12.100968893421724, "grad_norm": 12.735761642456055, "learning_rate": 1.3165051844297129e-05, "loss": 0.9986, "step": 71190 }, { "epoch": 12.10266870644229, "grad_norm": 20.845754623413086, "learning_rate": 1.316221882259618e-05, "loss": 0.9995, "step": 71200 }, { "epoch": 12.10436851946286, "grad_norm": 18.533809661865234, "learning_rate": 1.3159385800895236e-05, "loss": 0.9696, "step": 71210 }, { "epoch": 12.106068332483426, "grad_norm": 16.23822593688965, "learning_rate": 1.315655277919429e-05, "loss": 1.0333, "step": 71220 }, { "epoch": 12.107768145503995, "grad_norm": 18.179235458374023, "learning_rate": 1.3153719757493343e-05, "loss": 1.2156, "step": 71230 }, { "epoch": 12.109467958524561, "grad_norm": 15.781384468078613, "learning_rate": 1.3150886735792396e-05, "loss": 0.8755, "step": 71240 }, { "epoch": 12.11116777154513, "grad_norm": 27.105607986450195, "learning_rate": 1.3148053714091452e-05, "loss": 1.2413, "step": 71250 }, { "epoch": 12.112867584565699, "grad_norm": 12.673589706420898, "learning_rate": 1.3145220692390503e-05, "loss": 0.9501, "step": 71260 }, { "epoch": 12.114567397586265, "grad_norm": 16.338838577270508, "learning_rate": 1.3142387670689559e-05, "loss": 1.1076, "step": 71270 }, { "epoch": 12.116267210606834, "grad_norm": 19.86359977722168, "learning_rate": 1.3139554648988612e-05, "loss": 1.1187, "step": 71280 }, { "epoch": 12.1179670236274, "grad_norm": 21.159971237182617, "learning_rate": 1.3136721627287666e-05, "loss": 1.2664, "step": 71290 }, { "epoch": 12.11966683664797, "grad_norm": 11.72671890258789, "learning_rate": 1.313388860558672e-05, "loss": 1.0122, "step": 71300 }, { "epoch": 12.121366649668536, "grad_norm": 12.852884292602539, "learning_rate": 1.3131055583885773e-05, "loss": 1.1289, "step": 71310 }, { "epoch": 12.123066462689104, "grad_norm": 10.957450866699219, "learning_rate": 1.3128222562184826e-05, "loss": 1.071, "step": 71320 }, { "epoch": 12.124766275709671, "grad_norm": 16.80079460144043, "learning_rate": 1.312538954048388e-05, "loss": 0.9578, "step": 71330 }, { "epoch": 12.12646608873024, "grad_norm": 14.160737991333008, "learning_rate": 1.3122556518782935e-05, "loss": 0.8251, "step": 71340 }, { "epoch": 12.128165901750807, "grad_norm": 15.08543872833252, "learning_rate": 1.3119723497081987e-05, "loss": 1.2472, "step": 71350 }, { "epoch": 12.129865714771375, "grad_norm": 12.435504913330078, "learning_rate": 1.3116890475381042e-05, "loss": 1.1596, "step": 71360 }, { "epoch": 12.131565527791944, "grad_norm": 14.964962005615234, "learning_rate": 1.3114057453680096e-05, "loss": 0.8278, "step": 71370 }, { "epoch": 12.13326534081251, "grad_norm": 16.531692504882812, "learning_rate": 1.3111224431979149e-05, "loss": 1.0544, "step": 71380 }, { "epoch": 12.134965153833079, "grad_norm": 14.407548904418945, "learning_rate": 1.3108391410278203e-05, "loss": 1.1541, "step": 71390 }, { "epoch": 12.136664966853646, "grad_norm": 11.780860900878906, "learning_rate": 1.3105558388577258e-05, "loss": 1.1279, "step": 71400 }, { "epoch": 12.138364779874214, "grad_norm": 15.034832954406738, "learning_rate": 1.310272536687631e-05, "loss": 1.2026, "step": 71410 }, { "epoch": 12.140064592894781, "grad_norm": 12.090458869934082, "learning_rate": 1.3099892345175365e-05, "loss": 1.1438, "step": 71420 }, { "epoch": 12.14176440591535, "grad_norm": 13.156399726867676, "learning_rate": 1.3097059323474418e-05, "loss": 0.9679, "step": 71430 }, { "epoch": 12.143464218935916, "grad_norm": 16.06421661376953, "learning_rate": 1.3094226301773472e-05, "loss": 1.006, "step": 71440 }, { "epoch": 12.145164031956485, "grad_norm": 17.31895637512207, "learning_rate": 1.3091393280072525e-05, "loss": 0.9874, "step": 71450 }, { "epoch": 12.146863844977052, "grad_norm": 12.895819664001465, "learning_rate": 1.308856025837158e-05, "loss": 0.9614, "step": 71460 }, { "epoch": 12.14856365799762, "grad_norm": 13.478409767150879, "learning_rate": 1.3085727236670632e-05, "loss": 1.0641, "step": 71470 }, { "epoch": 12.150263471018189, "grad_norm": 11.3074312210083, "learning_rate": 1.3082894214969686e-05, "loss": 1.0331, "step": 71480 }, { "epoch": 12.151963284038755, "grad_norm": 12.454940795898438, "learning_rate": 1.3080061193268741e-05, "loss": 1.0394, "step": 71490 }, { "epoch": 12.153663097059324, "grad_norm": 14.950180053710938, "learning_rate": 1.3077228171567793e-05, "loss": 1.265, "step": 71500 }, { "epoch": 12.15536291007989, "grad_norm": 10.689148902893066, "learning_rate": 1.3074395149866848e-05, "loss": 1.1436, "step": 71510 }, { "epoch": 12.15706272310046, "grad_norm": 14.27952766418457, "learning_rate": 1.3071562128165902e-05, "loss": 1.0121, "step": 71520 }, { "epoch": 12.158762536121026, "grad_norm": 26.273494720458984, "learning_rate": 1.3068729106464955e-05, "loss": 1.0049, "step": 71530 }, { "epoch": 12.160462349141595, "grad_norm": 12.016531944274902, "learning_rate": 1.3065896084764009e-05, "loss": 0.9999, "step": 71540 }, { "epoch": 12.162162162162161, "grad_norm": 17.735132217407227, "learning_rate": 1.3063063063063064e-05, "loss": 1.0299, "step": 71550 }, { "epoch": 12.16386197518273, "grad_norm": 16.440210342407227, "learning_rate": 1.3060230041362118e-05, "loss": 1.2058, "step": 71560 }, { "epoch": 12.165561788203298, "grad_norm": 15.076404571533203, "learning_rate": 1.3057397019661171e-05, "loss": 1.0444, "step": 71570 }, { "epoch": 12.167261601223865, "grad_norm": 13.551289558410645, "learning_rate": 1.3054563997960225e-05, "loss": 1.0453, "step": 71580 }, { "epoch": 12.168961414244434, "grad_norm": 12.433460235595703, "learning_rate": 1.305173097625928e-05, "loss": 0.8923, "step": 71590 }, { "epoch": 12.170661227265, "grad_norm": 16.303239822387695, "learning_rate": 1.3048897954558332e-05, "loss": 0.9511, "step": 71600 }, { "epoch": 12.172361040285569, "grad_norm": 19.3454647064209, "learning_rate": 1.3046064932857387e-05, "loss": 1.1951, "step": 71610 }, { "epoch": 12.174060853306136, "grad_norm": 16.18383026123047, "learning_rate": 1.304323191115644e-05, "loss": 0.9614, "step": 71620 }, { "epoch": 12.175760666326704, "grad_norm": 15.171202659606934, "learning_rate": 1.3040398889455494e-05, "loss": 0.9762, "step": 71630 }, { "epoch": 12.177460479347271, "grad_norm": 14.955760955810547, "learning_rate": 1.3037565867754547e-05, "loss": 1.055, "step": 71640 }, { "epoch": 12.17916029236784, "grad_norm": 11.207314491271973, "learning_rate": 1.3034732846053601e-05, "loss": 0.9472, "step": 71650 }, { "epoch": 12.180860105388406, "grad_norm": 17.053529739379883, "learning_rate": 1.3031899824352654e-05, "loss": 0.8321, "step": 71660 }, { "epoch": 12.182559918408975, "grad_norm": 11.58076000213623, "learning_rate": 1.3029066802651708e-05, "loss": 0.9228, "step": 71670 }, { "epoch": 12.184259731429544, "grad_norm": 13.133845329284668, "learning_rate": 1.3026233780950763e-05, "loss": 0.9755, "step": 71680 }, { "epoch": 12.18595954445011, "grad_norm": 12.059296607971191, "learning_rate": 1.3023400759249815e-05, "loss": 1.0717, "step": 71690 }, { "epoch": 12.187659357470679, "grad_norm": 23.265329360961914, "learning_rate": 1.302056773754887e-05, "loss": 1.1678, "step": 71700 }, { "epoch": 12.189359170491246, "grad_norm": 13.01193904876709, "learning_rate": 1.3017734715847924e-05, "loss": 1.0252, "step": 71710 }, { "epoch": 12.191058983511814, "grad_norm": 12.770431518554688, "learning_rate": 1.3014901694146977e-05, "loss": 0.9451, "step": 71720 }, { "epoch": 12.192758796532381, "grad_norm": 12.921329498291016, "learning_rate": 1.301206867244603e-05, "loss": 1.0626, "step": 71730 }, { "epoch": 12.19445860955295, "grad_norm": 13.445108413696289, "learning_rate": 1.3009235650745086e-05, "loss": 1.0927, "step": 71740 }, { "epoch": 12.196158422573516, "grad_norm": 13.47433853149414, "learning_rate": 1.3006402629044138e-05, "loss": 0.9805, "step": 71750 }, { "epoch": 12.197858235594085, "grad_norm": 11.364320755004883, "learning_rate": 1.3003569607343193e-05, "loss": 1.2131, "step": 71760 }, { "epoch": 12.199558048614652, "grad_norm": 15.156947135925293, "learning_rate": 1.3000736585642247e-05, "loss": 0.9903, "step": 71770 }, { "epoch": 12.20125786163522, "grad_norm": 24.854721069335938, "learning_rate": 1.29979035639413e-05, "loss": 1.0021, "step": 71780 }, { "epoch": 12.202957674655789, "grad_norm": 12.806279182434082, "learning_rate": 1.2995070542240354e-05, "loss": 1.0254, "step": 71790 }, { "epoch": 12.204657487676355, "grad_norm": 20.587512969970703, "learning_rate": 1.2992237520539409e-05, "loss": 0.9373, "step": 71800 }, { "epoch": 12.206357300696924, "grad_norm": 15.0407133102417, "learning_rate": 1.298940449883846e-05, "loss": 0.9841, "step": 71810 }, { "epoch": 12.20805711371749, "grad_norm": 12.811668395996094, "learning_rate": 1.2986571477137516e-05, "loss": 1.0692, "step": 71820 }, { "epoch": 12.20975692673806, "grad_norm": 12.863908767700195, "learning_rate": 1.298373845543657e-05, "loss": 1.0849, "step": 71830 }, { "epoch": 12.211456739758626, "grad_norm": 16.69502067565918, "learning_rate": 1.2980905433735621e-05, "loss": 1.0889, "step": 71840 }, { "epoch": 12.213156552779195, "grad_norm": 13.597264289855957, "learning_rate": 1.2978072412034676e-05, "loss": 0.8784, "step": 71850 }, { "epoch": 12.214856365799761, "grad_norm": 19.271194458007812, "learning_rate": 1.297523939033373e-05, "loss": 1.0303, "step": 71860 }, { "epoch": 12.21655617882033, "grad_norm": 15.061891555786133, "learning_rate": 1.2972406368632783e-05, "loss": 0.9866, "step": 71870 }, { "epoch": 12.218255991840898, "grad_norm": 12.828861236572266, "learning_rate": 1.2969573346931837e-05, "loss": 1.0357, "step": 71880 }, { "epoch": 12.219955804861465, "grad_norm": 11.525276184082031, "learning_rate": 1.2966740325230892e-05, "loss": 0.9194, "step": 71890 }, { "epoch": 12.221655617882034, "grad_norm": 19.34809684753418, "learning_rate": 1.2963907303529946e-05, "loss": 1.1689, "step": 71900 }, { "epoch": 12.2233554309026, "grad_norm": 15.898906707763672, "learning_rate": 1.2961074281829e-05, "loss": 1.0761, "step": 71910 }, { "epoch": 12.225055243923169, "grad_norm": 12.905197143554688, "learning_rate": 1.2958241260128053e-05, "loss": 1.19, "step": 71920 }, { "epoch": 12.226755056943736, "grad_norm": 16.552474975585938, "learning_rate": 1.2955408238427108e-05, "loss": 0.8274, "step": 71930 }, { "epoch": 12.228454869964304, "grad_norm": 35.613243103027344, "learning_rate": 1.295257521672616e-05, "loss": 0.8474, "step": 71940 }, { "epoch": 12.230154682984871, "grad_norm": 9.725595474243164, "learning_rate": 1.2949742195025215e-05, "loss": 1.1804, "step": 71950 }, { "epoch": 12.23185449600544, "grad_norm": 18.90228843688965, "learning_rate": 1.2946909173324269e-05, "loss": 1.0061, "step": 71960 }, { "epoch": 12.233554309026006, "grad_norm": 16.29656982421875, "learning_rate": 1.2944076151623322e-05, "loss": 0.9321, "step": 71970 }, { "epoch": 12.235254122046575, "grad_norm": 14.292237281799316, "learning_rate": 1.2941243129922376e-05, "loss": 0.8785, "step": 71980 }, { "epoch": 12.236953935067143, "grad_norm": 14.630817413330078, "learning_rate": 1.293841010822143e-05, "loss": 1.0164, "step": 71990 }, { "epoch": 12.23865374808771, "grad_norm": 12.764840126037598, "learning_rate": 1.2935577086520483e-05, "loss": 1.0508, "step": 72000 }, { "epoch": 12.240353561108279, "grad_norm": 11.546404838562012, "learning_rate": 1.2932744064819536e-05, "loss": 0.9602, "step": 72010 }, { "epoch": 12.242053374128846, "grad_norm": 12.884907722473145, "learning_rate": 1.2929911043118591e-05, "loss": 1.0881, "step": 72020 }, { "epoch": 12.243753187149414, "grad_norm": 15.542961120605469, "learning_rate": 1.2927078021417643e-05, "loss": 0.9695, "step": 72030 }, { "epoch": 12.24545300016998, "grad_norm": 16.400421142578125, "learning_rate": 1.2924244999716698e-05, "loss": 0.9869, "step": 72040 }, { "epoch": 12.24715281319055, "grad_norm": 11.730104446411133, "learning_rate": 1.2921411978015752e-05, "loss": 1.0021, "step": 72050 }, { "epoch": 12.248852626211116, "grad_norm": 16.092988967895508, "learning_rate": 1.2918578956314805e-05, "loss": 0.9302, "step": 72060 }, { "epoch": 12.250552439231685, "grad_norm": 19.073822021484375, "learning_rate": 1.2915745934613859e-05, "loss": 0.9852, "step": 72070 }, { "epoch": 12.252252252252251, "grad_norm": 18.539588928222656, "learning_rate": 1.2912912912912914e-05, "loss": 1.1382, "step": 72080 }, { "epoch": 12.25395206527282, "grad_norm": 11.928035736083984, "learning_rate": 1.2910079891211966e-05, "loss": 0.9344, "step": 72090 }, { "epoch": 12.255651878293389, "grad_norm": 13.112313270568848, "learning_rate": 1.2907246869511021e-05, "loss": 1.0544, "step": 72100 }, { "epoch": 12.257351691313955, "grad_norm": 11.152213096618652, "learning_rate": 1.2904413847810075e-05, "loss": 1.1827, "step": 72110 }, { "epoch": 12.259051504334524, "grad_norm": 13.363117218017578, "learning_rate": 1.2901580826109128e-05, "loss": 0.9645, "step": 72120 }, { "epoch": 12.26075131735509, "grad_norm": 13.785894393920898, "learning_rate": 1.2898747804408182e-05, "loss": 1.027, "step": 72130 }, { "epoch": 12.26245113037566, "grad_norm": 16.147722244262695, "learning_rate": 1.2895914782707237e-05, "loss": 0.9464, "step": 72140 }, { "epoch": 12.264150943396226, "grad_norm": 21.029237747192383, "learning_rate": 1.2893081761006289e-05, "loss": 1.0138, "step": 72150 }, { "epoch": 12.265850756416794, "grad_norm": 19.16269874572754, "learning_rate": 1.2890248739305344e-05, "loss": 0.9795, "step": 72160 }, { "epoch": 12.267550569437361, "grad_norm": 13.66389274597168, "learning_rate": 1.2887415717604398e-05, "loss": 0.9844, "step": 72170 }, { "epoch": 12.26925038245793, "grad_norm": 15.304993629455566, "learning_rate": 1.288458269590345e-05, "loss": 1.0306, "step": 72180 }, { "epoch": 12.270950195478497, "grad_norm": 20.87747573852539, "learning_rate": 1.2881749674202505e-05, "loss": 1.0821, "step": 72190 }, { "epoch": 12.272650008499065, "grad_norm": 15.70749568939209, "learning_rate": 1.2878916652501558e-05, "loss": 1.1547, "step": 72200 }, { "epoch": 12.274349821519634, "grad_norm": 10.582924842834473, "learning_rate": 1.2876083630800612e-05, "loss": 0.8623, "step": 72210 }, { "epoch": 12.2760496345402, "grad_norm": 17.068714141845703, "learning_rate": 1.2873250609099665e-05, "loss": 1.1915, "step": 72220 }, { "epoch": 12.277749447560769, "grad_norm": 15.098653793334961, "learning_rate": 1.287041758739872e-05, "loss": 0.7988, "step": 72230 }, { "epoch": 12.279449260581336, "grad_norm": 17.04929542541504, "learning_rate": 1.2867584565697772e-05, "loss": 1.0725, "step": 72240 }, { "epoch": 12.281149073601904, "grad_norm": 16.579265594482422, "learning_rate": 1.2864751543996827e-05, "loss": 1.1266, "step": 72250 }, { "epoch": 12.282848886622471, "grad_norm": 18.92554473876953, "learning_rate": 1.2861918522295881e-05, "loss": 1.0358, "step": 72260 }, { "epoch": 12.28454869964304, "grad_norm": 19.836978912353516, "learning_rate": 1.2859085500594936e-05, "loss": 1.0293, "step": 72270 }, { "epoch": 12.286248512663606, "grad_norm": 20.890071868896484, "learning_rate": 1.2856252478893988e-05, "loss": 0.9832, "step": 72280 }, { "epoch": 12.287948325684175, "grad_norm": 15.004475593566895, "learning_rate": 1.2853419457193043e-05, "loss": 1.0117, "step": 72290 }, { "epoch": 12.289648138704742, "grad_norm": 15.229911804199219, "learning_rate": 1.2850586435492097e-05, "loss": 1.1461, "step": 72300 }, { "epoch": 12.29134795172531, "grad_norm": 30.171428680419922, "learning_rate": 1.284775341379115e-05, "loss": 0.9279, "step": 72310 }, { "epoch": 12.293047764745879, "grad_norm": 139.95289611816406, "learning_rate": 1.2844920392090204e-05, "loss": 0.8688, "step": 72320 }, { "epoch": 12.294747577766445, "grad_norm": 12.586435317993164, "learning_rate": 1.2842087370389259e-05, "loss": 0.9922, "step": 72330 }, { "epoch": 12.296447390787014, "grad_norm": 14.692996978759766, "learning_rate": 1.2839254348688311e-05, "loss": 1.0501, "step": 72340 }, { "epoch": 12.29814720380758, "grad_norm": 14.14269733428955, "learning_rate": 1.2836421326987364e-05, "loss": 1.0551, "step": 72350 }, { "epoch": 12.29984701682815, "grad_norm": 21.157533645629883, "learning_rate": 1.283358830528642e-05, "loss": 1.0558, "step": 72360 }, { "epoch": 12.301546829848716, "grad_norm": 11.925776481628418, "learning_rate": 1.2830755283585471e-05, "loss": 1.109, "step": 72370 }, { "epoch": 12.303246642869285, "grad_norm": 23.33827781677246, "learning_rate": 1.2827922261884527e-05, "loss": 1.0214, "step": 72380 }, { "epoch": 12.304946455889851, "grad_norm": 11.69457721710205, "learning_rate": 1.282508924018358e-05, "loss": 1.0204, "step": 72390 }, { "epoch": 12.30664626891042, "grad_norm": 16.659358978271484, "learning_rate": 1.2822256218482634e-05, "loss": 0.9912, "step": 72400 }, { "epoch": 12.308346081930988, "grad_norm": 17.37411880493164, "learning_rate": 1.2819423196781687e-05, "loss": 0.9074, "step": 72410 }, { "epoch": 12.310045894951555, "grad_norm": 18.120351791381836, "learning_rate": 1.2816590175080742e-05, "loss": 0.9938, "step": 72420 }, { "epoch": 12.311745707972124, "grad_norm": 13.85544490814209, "learning_rate": 1.2813757153379794e-05, "loss": 0.8585, "step": 72430 }, { "epoch": 12.31344552099269, "grad_norm": 12.278038024902344, "learning_rate": 1.281092413167885e-05, "loss": 1.0643, "step": 72440 }, { "epoch": 12.315145334013259, "grad_norm": 15.013713836669922, "learning_rate": 1.2808091109977903e-05, "loss": 0.9438, "step": 72450 }, { "epoch": 12.316845147033826, "grad_norm": 19.45812225341797, "learning_rate": 1.2805258088276957e-05, "loss": 0.8536, "step": 72460 }, { "epoch": 12.318544960054394, "grad_norm": 14.45763111114502, "learning_rate": 1.280242506657601e-05, "loss": 0.9182, "step": 72470 }, { "epoch": 12.320244773074961, "grad_norm": 14.388740539550781, "learning_rate": 1.2799592044875065e-05, "loss": 1.1839, "step": 72480 }, { "epoch": 12.32194458609553, "grad_norm": 16.09999656677246, "learning_rate": 1.2796759023174117e-05, "loss": 1.1, "step": 72490 }, { "epoch": 12.323644399116096, "grad_norm": 9.433077812194824, "learning_rate": 1.2793926001473172e-05, "loss": 0.9479, "step": 72500 }, { "epoch": 12.325344212136665, "grad_norm": 10.1449613571167, "learning_rate": 1.2791092979772226e-05, "loss": 0.9529, "step": 72510 }, { "epoch": 12.327044025157234, "grad_norm": 14.444766998291016, "learning_rate": 1.2788259958071278e-05, "loss": 1.089, "step": 72520 }, { "epoch": 12.3287438381778, "grad_norm": 36.305728912353516, "learning_rate": 1.2785426936370333e-05, "loss": 1.0379, "step": 72530 }, { "epoch": 12.330443651198369, "grad_norm": 21.90367317199707, "learning_rate": 1.2782593914669386e-05, "loss": 0.9426, "step": 72540 }, { "epoch": 12.332143464218936, "grad_norm": 10.626805305480957, "learning_rate": 1.277976089296844e-05, "loss": 1.0011, "step": 72550 }, { "epoch": 12.333843277239504, "grad_norm": 12.821106910705566, "learning_rate": 1.2776927871267493e-05, "loss": 0.9927, "step": 72560 }, { "epoch": 12.335543090260071, "grad_norm": 10.29810905456543, "learning_rate": 1.2774094849566549e-05, "loss": 1.0585, "step": 72570 }, { "epoch": 12.33724290328064, "grad_norm": 14.016542434692383, "learning_rate": 1.27712618278656e-05, "loss": 1.0098, "step": 72580 }, { "epoch": 12.338942716301206, "grad_norm": 19.452795028686523, "learning_rate": 1.2768428806164656e-05, "loss": 1.1467, "step": 72590 }, { "epoch": 12.340642529321775, "grad_norm": 15.372335433959961, "learning_rate": 1.276559578446371e-05, "loss": 0.9196, "step": 72600 }, { "epoch": 12.342342342342342, "grad_norm": 17.32657241821289, "learning_rate": 1.2762762762762763e-05, "loss": 0.9494, "step": 72610 }, { "epoch": 12.34404215536291, "grad_norm": 13.753336906433105, "learning_rate": 1.2759929741061816e-05, "loss": 1.0804, "step": 72620 }, { "epoch": 12.345741968383479, "grad_norm": 22.788177490234375, "learning_rate": 1.2757096719360871e-05, "loss": 1.0429, "step": 72630 }, { "epoch": 12.347441781404045, "grad_norm": 16.652774810791016, "learning_rate": 1.2754263697659925e-05, "loss": 1.0492, "step": 72640 }, { "epoch": 12.349141594424614, "grad_norm": 12.36865234375, "learning_rate": 1.2751430675958979e-05, "loss": 0.9277, "step": 72650 }, { "epoch": 12.35084140744518, "grad_norm": 15.49846076965332, "learning_rate": 1.2748597654258032e-05, "loss": 1.0314, "step": 72660 }, { "epoch": 12.35254122046575, "grad_norm": 10.796989440917969, "learning_rate": 1.2745764632557087e-05, "loss": 1.0223, "step": 72670 }, { "epoch": 12.354241033486316, "grad_norm": 34.596824645996094, "learning_rate": 1.2742931610856139e-05, "loss": 0.9197, "step": 72680 }, { "epoch": 12.355940846506885, "grad_norm": 11.870951652526855, "learning_rate": 1.2740098589155193e-05, "loss": 0.9849, "step": 72690 }, { "epoch": 12.357640659527451, "grad_norm": 26.37343978881836, "learning_rate": 1.2737265567454248e-05, "loss": 0.9187, "step": 72700 }, { "epoch": 12.35934047254802, "grad_norm": 19.783428192138672, "learning_rate": 1.27344325457533e-05, "loss": 1.1432, "step": 72710 }, { "epoch": 12.361040285568588, "grad_norm": 12.145027160644531, "learning_rate": 1.2731599524052355e-05, "loss": 1.1645, "step": 72720 }, { "epoch": 12.362740098589155, "grad_norm": 17.783157348632812, "learning_rate": 1.2728766502351408e-05, "loss": 1.018, "step": 72730 }, { "epoch": 12.364439911609724, "grad_norm": 12.242566108703613, "learning_rate": 1.2725933480650462e-05, "loss": 0.8731, "step": 72740 }, { "epoch": 12.36613972463029, "grad_norm": 12.637332916259766, "learning_rate": 1.2723100458949515e-05, "loss": 0.7536, "step": 72750 }, { "epoch": 12.367839537650859, "grad_norm": 13.929612159729004, "learning_rate": 1.272026743724857e-05, "loss": 1.2085, "step": 72760 }, { "epoch": 12.369539350671426, "grad_norm": 16.221284866333008, "learning_rate": 1.2717434415547622e-05, "loss": 0.99, "step": 72770 }, { "epoch": 12.371239163691994, "grad_norm": 22.279272079467773, "learning_rate": 1.2714601393846678e-05, "loss": 0.9021, "step": 72780 }, { "epoch": 12.372938976712561, "grad_norm": 18.423702239990234, "learning_rate": 1.2711768372145731e-05, "loss": 1.1594, "step": 72790 }, { "epoch": 12.37463878973313, "grad_norm": 13.005115509033203, "learning_rate": 1.2708935350444785e-05, "loss": 1.1588, "step": 72800 }, { "epoch": 12.376338602753696, "grad_norm": 11.930571556091309, "learning_rate": 1.2706102328743838e-05, "loss": 1.0494, "step": 72810 }, { "epoch": 12.378038415774265, "grad_norm": 28.311214447021484, "learning_rate": 1.2703269307042893e-05, "loss": 0.9466, "step": 72820 }, { "epoch": 12.379738228794832, "grad_norm": 15.381478309631348, "learning_rate": 1.2700436285341945e-05, "loss": 1.2741, "step": 72830 }, { "epoch": 12.3814380418154, "grad_norm": 23.11528205871582, "learning_rate": 1.2697603263641e-05, "loss": 0.779, "step": 72840 }, { "epoch": 12.383137854835969, "grad_norm": 12.92796516418457, "learning_rate": 1.2694770241940054e-05, "loss": 1.0736, "step": 72850 }, { "epoch": 12.384837667856536, "grad_norm": 20.749900817871094, "learning_rate": 1.2691937220239108e-05, "loss": 1.2195, "step": 72860 }, { "epoch": 12.386537480877104, "grad_norm": 14.081503868103027, "learning_rate": 1.2689104198538161e-05, "loss": 1.0923, "step": 72870 }, { "epoch": 12.38823729389767, "grad_norm": 12.599132537841797, "learning_rate": 1.2686271176837215e-05, "loss": 1.2768, "step": 72880 }, { "epoch": 12.38993710691824, "grad_norm": 18.688720703125, "learning_rate": 1.2683438155136268e-05, "loss": 0.8797, "step": 72890 }, { "epoch": 12.391636919938806, "grad_norm": 15.618633270263672, "learning_rate": 1.2680605133435322e-05, "loss": 1.2708, "step": 72900 }, { "epoch": 12.393336732959375, "grad_norm": 14.65711498260498, "learning_rate": 1.2677772111734377e-05, "loss": 1.1482, "step": 72910 }, { "epoch": 12.395036545979941, "grad_norm": 21.373199462890625, "learning_rate": 1.2674939090033429e-05, "loss": 1.0052, "step": 72920 }, { "epoch": 12.39673635900051, "grad_norm": 14.997023582458496, "learning_rate": 1.2672106068332484e-05, "loss": 0.9874, "step": 72930 }, { "epoch": 12.398436172021079, "grad_norm": 18.12722396850586, "learning_rate": 1.2669273046631537e-05, "loss": 1.1323, "step": 72940 }, { "epoch": 12.400135985041645, "grad_norm": 16.190250396728516, "learning_rate": 1.2666440024930591e-05, "loss": 1.0394, "step": 72950 }, { "epoch": 12.401835798062214, "grad_norm": 15.571874618530273, "learning_rate": 1.2663607003229644e-05, "loss": 1.0228, "step": 72960 }, { "epoch": 12.40353561108278, "grad_norm": 14.500103950500488, "learning_rate": 1.26607739815287e-05, "loss": 0.8145, "step": 72970 }, { "epoch": 12.40523542410335, "grad_norm": 16.17804527282715, "learning_rate": 1.2657940959827752e-05, "loss": 0.929, "step": 72980 }, { "epoch": 12.406935237123916, "grad_norm": 17.367145538330078, "learning_rate": 1.2655107938126807e-05, "loss": 0.9492, "step": 72990 }, { "epoch": 12.408635050144484, "grad_norm": 16.635086059570312, "learning_rate": 1.265227491642586e-05, "loss": 1.1339, "step": 73000 }, { "epoch": 12.410334863165051, "grad_norm": 18.67603302001953, "learning_rate": 1.2649441894724915e-05, "loss": 0.898, "step": 73010 }, { "epoch": 12.41203467618562, "grad_norm": 15.48856258392334, "learning_rate": 1.2646608873023967e-05, "loss": 0.9052, "step": 73020 }, { "epoch": 12.413734489206186, "grad_norm": 22.275455474853516, "learning_rate": 1.2643775851323023e-05, "loss": 1.1089, "step": 73030 }, { "epoch": 12.415434302226755, "grad_norm": 18.631990432739258, "learning_rate": 1.2640942829622076e-05, "loss": 0.9206, "step": 73040 }, { "epoch": 12.417134115247324, "grad_norm": 17.819284439086914, "learning_rate": 1.2638109807921128e-05, "loss": 1.1888, "step": 73050 }, { "epoch": 12.41883392826789, "grad_norm": 16.331012725830078, "learning_rate": 1.2635276786220183e-05, "loss": 1.0149, "step": 73060 }, { "epoch": 12.420533741288459, "grad_norm": 18.561206817626953, "learning_rate": 1.2632443764519237e-05, "loss": 1.0556, "step": 73070 }, { "epoch": 12.422233554309026, "grad_norm": 20.208843231201172, "learning_rate": 1.262961074281829e-05, "loss": 0.9341, "step": 73080 }, { "epoch": 12.423933367329594, "grad_norm": 14.607671737670898, "learning_rate": 1.2626777721117344e-05, "loss": 0.9619, "step": 73090 }, { "epoch": 12.425633180350161, "grad_norm": 15.281368255615234, "learning_rate": 1.2623944699416399e-05, "loss": 0.9671, "step": 73100 }, { "epoch": 12.42733299337073, "grad_norm": 15.972799301147461, "learning_rate": 1.262111167771545e-05, "loss": 0.9998, "step": 73110 }, { "epoch": 12.429032806391296, "grad_norm": 16.159107208251953, "learning_rate": 1.2618278656014506e-05, "loss": 0.9464, "step": 73120 }, { "epoch": 12.430732619411865, "grad_norm": 15.251547813415527, "learning_rate": 1.261544563431356e-05, "loss": 1.1084, "step": 73130 }, { "epoch": 12.432432432432432, "grad_norm": 25.55221939086914, "learning_rate": 1.2612612612612613e-05, "loss": 1.0059, "step": 73140 }, { "epoch": 12.434132245453, "grad_norm": 19.917354583740234, "learning_rate": 1.2609779590911666e-05, "loss": 0.9157, "step": 73150 }, { "epoch": 12.435832058473569, "grad_norm": 11.88599967956543, "learning_rate": 1.2606946569210722e-05, "loss": 1.0127, "step": 73160 }, { "epoch": 12.437531871494135, "grad_norm": 12.140617370605469, "learning_rate": 1.2604113547509774e-05, "loss": 0.8698, "step": 73170 }, { "epoch": 12.439231684514704, "grad_norm": 13.500761985778809, "learning_rate": 1.2601280525808829e-05, "loss": 0.9096, "step": 73180 }, { "epoch": 12.44093149753527, "grad_norm": 12.688373565673828, "learning_rate": 1.2598447504107882e-05, "loss": 1.0484, "step": 73190 }, { "epoch": 12.44263131055584, "grad_norm": 12.674068450927734, "learning_rate": 1.2595614482406936e-05, "loss": 0.9486, "step": 73200 }, { "epoch": 12.444331123576406, "grad_norm": 14.636157989501953, "learning_rate": 1.259278146070599e-05, "loss": 0.964, "step": 73210 }, { "epoch": 12.446030936596975, "grad_norm": 15.214919090270996, "learning_rate": 1.2589948439005043e-05, "loss": 1.0548, "step": 73220 }, { "epoch": 12.447730749617541, "grad_norm": 11.16472339630127, "learning_rate": 1.2587115417304096e-05, "loss": 1.0178, "step": 73230 }, { "epoch": 12.44943056263811, "grad_norm": 13.258532524108887, "learning_rate": 1.258428239560315e-05, "loss": 1.0483, "step": 73240 }, { "epoch": 12.451130375658678, "grad_norm": 15.59104061126709, "learning_rate": 1.2581449373902205e-05, "loss": 0.9368, "step": 73250 }, { "epoch": 12.452830188679245, "grad_norm": 19.883420944213867, "learning_rate": 1.2578616352201257e-05, "loss": 1.1897, "step": 73260 }, { "epoch": 12.454530001699814, "grad_norm": 16.139741897583008, "learning_rate": 1.2575783330500312e-05, "loss": 1.1335, "step": 73270 }, { "epoch": 12.45622981472038, "grad_norm": 16.048879623413086, "learning_rate": 1.2572950308799366e-05, "loss": 1.0336, "step": 73280 }, { "epoch": 12.457929627740949, "grad_norm": 9.15404224395752, "learning_rate": 1.257011728709842e-05, "loss": 1.1753, "step": 73290 }, { "epoch": 12.459629440761516, "grad_norm": 18.39584732055664, "learning_rate": 1.2567284265397473e-05, "loss": 0.8822, "step": 73300 }, { "epoch": 12.461329253782084, "grad_norm": 15.24767017364502, "learning_rate": 1.2564451243696528e-05, "loss": 1.0309, "step": 73310 }, { "epoch": 12.463029066802651, "grad_norm": 11.803357124328613, "learning_rate": 1.256161822199558e-05, "loss": 1.0358, "step": 73320 }, { "epoch": 12.46472887982322, "grad_norm": 11.475337028503418, "learning_rate": 1.2558785200294635e-05, "loss": 1.0715, "step": 73330 }, { "epoch": 12.466428692843786, "grad_norm": 10.659965515136719, "learning_rate": 1.2555952178593688e-05, "loss": 0.9091, "step": 73340 }, { "epoch": 12.468128505864355, "grad_norm": 16.42650604248047, "learning_rate": 1.2553119156892742e-05, "loss": 1.0051, "step": 73350 }, { "epoch": 12.469828318884923, "grad_norm": 14.644230842590332, "learning_rate": 1.2550286135191796e-05, "loss": 0.8998, "step": 73360 }, { "epoch": 12.47152813190549, "grad_norm": 14.933170318603516, "learning_rate": 1.254745311349085e-05, "loss": 1.0403, "step": 73370 }, { "epoch": 12.473227944926059, "grad_norm": 16.450538635253906, "learning_rate": 1.2544620091789904e-05, "loss": 1.0795, "step": 73380 }, { "epoch": 12.474927757946626, "grad_norm": 18.030519485473633, "learning_rate": 1.2541787070088956e-05, "loss": 1.0272, "step": 73390 }, { "epoch": 12.476627570967194, "grad_norm": 22.840002059936523, "learning_rate": 1.2538954048388011e-05, "loss": 0.9547, "step": 73400 }, { "epoch": 12.47832738398776, "grad_norm": 13.525665283203125, "learning_rate": 1.2536121026687065e-05, "loss": 1.0779, "step": 73410 }, { "epoch": 12.48002719700833, "grad_norm": 19.219446182250977, "learning_rate": 1.2533288004986118e-05, "loss": 0.8823, "step": 73420 }, { "epoch": 12.481727010028896, "grad_norm": 21.870637893676758, "learning_rate": 1.2530454983285172e-05, "loss": 1.0363, "step": 73430 }, { "epoch": 12.483426823049465, "grad_norm": 12.196649551391602, "learning_rate": 1.2527621961584227e-05, "loss": 1.2153, "step": 73440 }, { "epoch": 12.485126636070031, "grad_norm": 16.367904663085938, "learning_rate": 1.2524788939883279e-05, "loss": 0.8854, "step": 73450 }, { "epoch": 12.4868264490906, "grad_norm": 17.7204647064209, "learning_rate": 1.2521955918182334e-05, "loss": 1.0666, "step": 73460 }, { "epoch": 12.488526262111169, "grad_norm": 12.416131973266602, "learning_rate": 1.2519122896481388e-05, "loss": 1.1028, "step": 73470 }, { "epoch": 12.490226075131735, "grad_norm": 10.780573844909668, "learning_rate": 1.2516289874780441e-05, "loss": 0.9907, "step": 73480 }, { "epoch": 12.491925888152304, "grad_norm": 15.619773864746094, "learning_rate": 1.2513456853079495e-05, "loss": 0.9764, "step": 73490 }, { "epoch": 12.49362570117287, "grad_norm": 11.55026626586914, "learning_rate": 1.251062383137855e-05, "loss": 1.038, "step": 73500 }, { "epoch": 12.49532551419344, "grad_norm": 14.37516975402832, "learning_rate": 1.2507790809677602e-05, "loss": 1.0429, "step": 73510 }, { "epoch": 12.497025327214006, "grad_norm": 13.746585845947266, "learning_rate": 1.2504957787976657e-05, "loss": 1.2303, "step": 73520 }, { "epoch": 12.498725140234574, "grad_norm": 16.902278900146484, "learning_rate": 1.250212476627571e-05, "loss": 1.122, "step": 73530 }, { "epoch": 12.500424953255141, "grad_norm": 17.487016677856445, "learning_rate": 1.2499291744574764e-05, "loss": 0.9739, "step": 73540 }, { "epoch": 12.50212476627571, "grad_norm": 21.814197540283203, "learning_rate": 1.2496458722873818e-05, "loss": 0.9392, "step": 73550 }, { "epoch": 12.503824579296278, "grad_norm": 14.719728469848633, "learning_rate": 1.2493625701172871e-05, "loss": 1.043, "step": 73560 }, { "epoch": 12.505524392316845, "grad_norm": 32.433815002441406, "learning_rate": 1.2490792679471925e-05, "loss": 1.0218, "step": 73570 }, { "epoch": 12.507224205337414, "grad_norm": 14.903437614440918, "learning_rate": 1.2487959657770978e-05, "loss": 0.9143, "step": 73580 }, { "epoch": 12.50892401835798, "grad_norm": 16.595428466796875, "learning_rate": 1.2485126636070033e-05, "loss": 0.8752, "step": 73590 }, { "epoch": 12.510623831378549, "grad_norm": 14.083894729614258, "learning_rate": 1.2482293614369085e-05, "loss": 1.0646, "step": 73600 }, { "epoch": 12.512323644399116, "grad_norm": 20.944068908691406, "learning_rate": 1.247946059266814e-05, "loss": 1.3282, "step": 73610 }, { "epoch": 12.514023457419684, "grad_norm": 14.724822998046875, "learning_rate": 1.2476627570967194e-05, "loss": 1.0099, "step": 73620 }, { "epoch": 12.515723270440251, "grad_norm": 16.26312828063965, "learning_rate": 1.2473794549266247e-05, "loss": 1.0605, "step": 73630 }, { "epoch": 12.51742308346082, "grad_norm": 10.296588897705078, "learning_rate": 1.2470961527565301e-05, "loss": 0.8963, "step": 73640 }, { "epoch": 12.519122896481386, "grad_norm": 15.807109832763672, "learning_rate": 1.2468128505864356e-05, "loss": 1.1998, "step": 73650 }, { "epoch": 12.520822709501955, "grad_norm": 25.482410430908203, "learning_rate": 1.2465295484163408e-05, "loss": 1.0266, "step": 73660 }, { "epoch": 12.522522522522522, "grad_norm": 22.365821838378906, "learning_rate": 1.2462462462462463e-05, "loss": 0.8439, "step": 73670 }, { "epoch": 12.52422233554309, "grad_norm": 13.39316463470459, "learning_rate": 1.2459629440761517e-05, "loss": 0.9948, "step": 73680 }, { "epoch": 12.525922148563659, "grad_norm": 19.706907272338867, "learning_rate": 1.245679641906057e-05, "loss": 1.038, "step": 73690 }, { "epoch": 12.527621961584225, "grad_norm": 15.788958549499512, "learning_rate": 1.2453963397359624e-05, "loss": 0.9635, "step": 73700 }, { "epoch": 12.529321774604794, "grad_norm": 12.34305191040039, "learning_rate": 1.2451130375658679e-05, "loss": 1.0625, "step": 73710 }, { "epoch": 12.53102158762536, "grad_norm": 13.413646697998047, "learning_rate": 1.2448297353957732e-05, "loss": 1.1213, "step": 73720 }, { "epoch": 12.53272140064593, "grad_norm": 16.64726448059082, "learning_rate": 1.2445464332256784e-05, "loss": 1.0744, "step": 73730 }, { "epoch": 12.534421213666496, "grad_norm": 11.65947151184082, "learning_rate": 1.244263131055584e-05, "loss": 1.131, "step": 73740 }, { "epoch": 12.536121026687065, "grad_norm": 23.74558448791504, "learning_rate": 1.2439798288854893e-05, "loss": 0.9516, "step": 73750 }, { "epoch": 12.537820839707631, "grad_norm": 13.83263874053955, "learning_rate": 1.2436965267153947e-05, "loss": 1.1057, "step": 73760 }, { "epoch": 12.5395206527282, "grad_norm": 14.188746452331543, "learning_rate": 1.2434132245453e-05, "loss": 0.9699, "step": 73770 }, { "epoch": 12.541220465748768, "grad_norm": 8.252645492553711, "learning_rate": 1.2431299223752055e-05, "loss": 0.852, "step": 73780 }, { "epoch": 12.542920278769335, "grad_norm": 13.9044771194458, "learning_rate": 1.2428466202051107e-05, "loss": 0.7646, "step": 73790 }, { "epoch": 12.544620091789904, "grad_norm": 22.36651611328125, "learning_rate": 1.2425633180350162e-05, "loss": 1.0133, "step": 73800 }, { "epoch": 12.54631990481047, "grad_norm": 15.47368335723877, "learning_rate": 1.2422800158649216e-05, "loss": 1.0569, "step": 73810 }, { "epoch": 12.548019717831039, "grad_norm": 23.962207794189453, "learning_rate": 1.241996713694827e-05, "loss": 0.9945, "step": 73820 }, { "epoch": 12.549719530851606, "grad_norm": 21.947040557861328, "learning_rate": 1.2417134115247323e-05, "loss": 1.1697, "step": 73830 }, { "epoch": 12.551419343872174, "grad_norm": 14.491456031799316, "learning_rate": 1.2414301093546378e-05, "loss": 0.8539, "step": 73840 }, { "epoch": 12.553119156892741, "grad_norm": 16.088773727416992, "learning_rate": 1.241146807184543e-05, "loss": 1.0603, "step": 73850 }, { "epoch": 12.55481896991331, "grad_norm": 11.393144607543945, "learning_rate": 1.2408635050144485e-05, "loss": 0.8761, "step": 73860 }, { "epoch": 12.556518782933876, "grad_norm": 13.270913124084473, "learning_rate": 1.2405802028443539e-05, "loss": 0.9801, "step": 73870 }, { "epoch": 12.558218595954445, "grad_norm": 14.524109840393066, "learning_rate": 1.2402969006742592e-05, "loss": 0.8787, "step": 73880 }, { "epoch": 12.559918408975014, "grad_norm": 24.73788833618164, "learning_rate": 1.2400135985041646e-05, "loss": 0.9742, "step": 73890 }, { "epoch": 12.56161822199558, "grad_norm": 15.259163856506348, "learning_rate": 1.23973029633407e-05, "loss": 0.981, "step": 73900 }, { "epoch": 12.563318035016149, "grad_norm": 22.94682502746582, "learning_rate": 1.2394469941639753e-05, "loss": 1.1613, "step": 73910 }, { "epoch": 12.565017848036716, "grad_norm": 14.346970558166504, "learning_rate": 1.2391636919938806e-05, "loss": 0.8622, "step": 73920 }, { "epoch": 12.566717661057284, "grad_norm": 15.784242630004883, "learning_rate": 1.2388803898237861e-05, "loss": 1.0915, "step": 73930 }, { "epoch": 12.568417474077851, "grad_norm": 14.644271850585938, "learning_rate": 1.2385970876536913e-05, "loss": 0.9142, "step": 73940 }, { "epoch": 12.57011728709842, "grad_norm": 20.9244384765625, "learning_rate": 1.2383137854835969e-05, "loss": 1.1698, "step": 73950 }, { "epoch": 12.571817100118986, "grad_norm": 17.069684982299805, "learning_rate": 1.2380304833135022e-05, "loss": 1.1144, "step": 73960 }, { "epoch": 12.573516913139555, "grad_norm": 18.703386306762695, "learning_rate": 1.2377471811434076e-05, "loss": 0.958, "step": 73970 }, { "epoch": 12.575216726160122, "grad_norm": 18.903200149536133, "learning_rate": 1.2374638789733129e-05, "loss": 0.9062, "step": 73980 }, { "epoch": 12.57691653918069, "grad_norm": 13.170713424682617, "learning_rate": 1.2371805768032184e-05, "loss": 0.9696, "step": 73990 }, { "epoch": 12.578616352201259, "grad_norm": 12.799047470092773, "learning_rate": 1.2368972746331236e-05, "loss": 0.9614, "step": 74000 }, { "epoch": 12.580316165221825, "grad_norm": 37.68075180053711, "learning_rate": 1.2366139724630291e-05, "loss": 1.002, "step": 74010 }, { "epoch": 12.582015978242394, "grad_norm": 9.930187225341797, "learning_rate": 1.2363306702929345e-05, "loss": 0.9369, "step": 74020 }, { "epoch": 12.58371579126296, "grad_norm": 13.862937927246094, "learning_rate": 1.2360473681228398e-05, "loss": 1.0315, "step": 74030 }, { "epoch": 12.58541560428353, "grad_norm": 15.253826141357422, "learning_rate": 1.2357640659527452e-05, "loss": 0.9982, "step": 74040 }, { "epoch": 12.587115417304096, "grad_norm": 11.445209503173828, "learning_rate": 1.2354807637826507e-05, "loss": 1.0813, "step": 74050 }, { "epoch": 12.588815230324665, "grad_norm": 17.70821762084961, "learning_rate": 1.2351974616125559e-05, "loss": 1.1668, "step": 74060 }, { "epoch": 12.590515043345231, "grad_norm": 15.924484252929688, "learning_rate": 1.2349141594424614e-05, "loss": 0.9908, "step": 74070 }, { "epoch": 12.5922148563658, "grad_norm": 19.43361473083496, "learning_rate": 1.2346308572723668e-05, "loss": 1.0227, "step": 74080 }, { "epoch": 12.593914669386368, "grad_norm": 16.810832977294922, "learning_rate": 1.2343475551022721e-05, "loss": 1.0461, "step": 74090 }, { "epoch": 12.595614482406935, "grad_norm": 15.621959686279297, "learning_rate": 1.2340642529321775e-05, "loss": 1.0758, "step": 74100 }, { "epoch": 12.597314295427504, "grad_norm": 13.164169311523438, "learning_rate": 1.2337809507620828e-05, "loss": 0.7892, "step": 74110 }, { "epoch": 12.59901410844807, "grad_norm": 11.170845031738281, "learning_rate": 1.2334976485919883e-05, "loss": 0.8815, "step": 74120 }, { "epoch": 12.600713921468639, "grad_norm": 17.749866485595703, "learning_rate": 1.2332143464218935e-05, "loss": 1.0443, "step": 74130 }, { "epoch": 12.602413734489206, "grad_norm": 13.28198528289795, "learning_rate": 1.232931044251799e-05, "loss": 0.9184, "step": 74140 }, { "epoch": 12.604113547509774, "grad_norm": 17.832141876220703, "learning_rate": 1.2326477420817044e-05, "loss": 1.0836, "step": 74150 }, { "epoch": 12.605813360530341, "grad_norm": 18.571552276611328, "learning_rate": 1.2323644399116098e-05, "loss": 1.0175, "step": 74160 }, { "epoch": 12.60751317355091, "grad_norm": 19.98784065246582, "learning_rate": 1.2320811377415151e-05, "loss": 1.0241, "step": 74170 }, { "epoch": 12.609212986571476, "grad_norm": 14.857078552246094, "learning_rate": 1.2317978355714206e-05, "loss": 0.7917, "step": 74180 }, { "epoch": 12.610912799592045, "grad_norm": 13.209774017333984, "learning_rate": 1.2315145334013258e-05, "loss": 0.9451, "step": 74190 }, { "epoch": 12.612612612612612, "grad_norm": 15.040987014770508, "learning_rate": 1.2312312312312313e-05, "loss": 1.0337, "step": 74200 }, { "epoch": 12.61431242563318, "grad_norm": 17.933374404907227, "learning_rate": 1.2309479290611367e-05, "loss": 0.8836, "step": 74210 }, { "epoch": 12.616012238653749, "grad_norm": 13.277966499328613, "learning_rate": 1.230664626891042e-05, "loss": 1.1188, "step": 74220 }, { "epoch": 12.617712051674316, "grad_norm": 14.626134872436523, "learning_rate": 1.2303813247209474e-05, "loss": 1.0151, "step": 74230 }, { "epoch": 12.619411864694884, "grad_norm": 13.918839454650879, "learning_rate": 1.2300980225508529e-05, "loss": 1.0323, "step": 74240 }, { "epoch": 12.62111167771545, "grad_norm": 13.222956657409668, "learning_rate": 1.2298147203807581e-05, "loss": 1.0208, "step": 74250 }, { "epoch": 12.62281149073602, "grad_norm": 13.43243408203125, "learning_rate": 1.2295314182106634e-05, "loss": 0.9109, "step": 74260 }, { "epoch": 12.624511303756586, "grad_norm": 15.158943176269531, "learning_rate": 1.229248116040569e-05, "loss": 1.0199, "step": 74270 }, { "epoch": 12.626211116777155, "grad_norm": 14.831951141357422, "learning_rate": 1.2289648138704742e-05, "loss": 0.9395, "step": 74280 }, { "epoch": 12.627910929797721, "grad_norm": 16.165224075317383, "learning_rate": 1.2286815117003797e-05, "loss": 0.7952, "step": 74290 }, { "epoch": 12.62961074281829, "grad_norm": 17.767784118652344, "learning_rate": 1.228398209530285e-05, "loss": 1.0135, "step": 74300 }, { "epoch": 12.631310555838859, "grad_norm": 14.93272590637207, "learning_rate": 1.2281149073601904e-05, "loss": 1.0558, "step": 74310 }, { "epoch": 12.633010368859425, "grad_norm": 15.09469985961914, "learning_rate": 1.2278316051900957e-05, "loss": 1.0114, "step": 74320 }, { "epoch": 12.634710181879994, "grad_norm": 17.69672966003418, "learning_rate": 1.2275483030200013e-05, "loss": 1.2051, "step": 74330 }, { "epoch": 12.63640999490056, "grad_norm": 22.138111114501953, "learning_rate": 1.2272650008499064e-05, "loss": 1.059, "step": 74340 }, { "epoch": 12.63810980792113, "grad_norm": 16.64874839782715, "learning_rate": 1.226981698679812e-05, "loss": 0.9803, "step": 74350 }, { "epoch": 12.639809620941696, "grad_norm": 12.60263729095459, "learning_rate": 1.2266983965097173e-05, "loss": 1.1405, "step": 74360 }, { "epoch": 12.641509433962264, "grad_norm": 34.67531204223633, "learning_rate": 1.2264150943396227e-05, "loss": 0.8918, "step": 74370 }, { "epoch": 12.643209246982831, "grad_norm": 12.70238971710205, "learning_rate": 1.226131792169528e-05, "loss": 1.1175, "step": 74380 }, { "epoch": 12.6449090600034, "grad_norm": 14.594178199768066, "learning_rate": 1.2258484899994335e-05, "loss": 1.0147, "step": 74390 }, { "epoch": 12.646608873023968, "grad_norm": 11.99266242980957, "learning_rate": 1.2255651878293387e-05, "loss": 0.9726, "step": 74400 }, { "epoch": 12.648308686044535, "grad_norm": 15.75233268737793, "learning_rate": 1.2252818856592442e-05, "loss": 0.8086, "step": 74410 }, { "epoch": 12.650008499065104, "grad_norm": 15.32608699798584, "learning_rate": 1.2249985834891496e-05, "loss": 1.1917, "step": 74420 }, { "epoch": 12.65170831208567, "grad_norm": 23.768522262573242, "learning_rate": 1.2247152813190548e-05, "loss": 1.1486, "step": 74430 }, { "epoch": 12.653408125106239, "grad_norm": 23.98399543762207, "learning_rate": 1.2244319791489603e-05, "loss": 0.9881, "step": 74440 }, { "epoch": 12.655107938126806, "grad_norm": 12.959613800048828, "learning_rate": 1.2241486769788656e-05, "loss": 1.092, "step": 74450 }, { "epoch": 12.656807751147374, "grad_norm": 39.690486907958984, "learning_rate": 1.2238653748087712e-05, "loss": 0.9913, "step": 74460 }, { "epoch": 12.658507564167941, "grad_norm": 24.203088760375977, "learning_rate": 1.2235820726386764e-05, "loss": 0.92, "step": 74470 }, { "epoch": 12.66020737718851, "grad_norm": 11.491244316101074, "learning_rate": 1.2232987704685819e-05, "loss": 1.0684, "step": 74480 }, { "epoch": 12.661907190209076, "grad_norm": 25.87104606628418, "learning_rate": 1.2230154682984872e-05, "loss": 1.084, "step": 74490 }, { "epoch": 12.663607003229645, "grad_norm": 11.673575401306152, "learning_rate": 1.2227321661283926e-05, "loss": 1.0146, "step": 74500 }, { "epoch": 12.665306816250212, "grad_norm": 10.597000122070312, "learning_rate": 1.222448863958298e-05, "loss": 1.1781, "step": 74510 }, { "epoch": 12.66700662927078, "grad_norm": 16.31815528869629, "learning_rate": 1.2221655617882035e-05, "loss": 1.0029, "step": 74520 }, { "epoch": 12.668706442291349, "grad_norm": 14.258591651916504, "learning_rate": 1.2218822596181086e-05, "loss": 1.039, "step": 74530 }, { "epoch": 12.670406255311915, "grad_norm": 13.940574645996094, "learning_rate": 1.2215989574480142e-05, "loss": 0.9934, "step": 74540 }, { "epoch": 12.672106068332484, "grad_norm": 13.803890228271484, "learning_rate": 1.2213156552779195e-05, "loss": 1.0739, "step": 74550 }, { "epoch": 12.67380588135305, "grad_norm": 12.966364860534668, "learning_rate": 1.2210323531078249e-05, "loss": 0.9189, "step": 74560 }, { "epoch": 12.67550569437362, "grad_norm": 13.390076637268066, "learning_rate": 1.2207490509377302e-05, "loss": 1.0896, "step": 74570 }, { "epoch": 12.677205507394186, "grad_norm": 10.813632011413574, "learning_rate": 1.2204657487676357e-05, "loss": 1.0297, "step": 74580 }, { "epoch": 12.678905320414755, "grad_norm": 12.795689582824707, "learning_rate": 1.220182446597541e-05, "loss": 1.0875, "step": 74590 }, { "epoch": 12.680605133435321, "grad_norm": 13.355804443359375, "learning_rate": 1.2198991444274463e-05, "loss": 0.8269, "step": 74600 }, { "epoch": 12.68230494645589, "grad_norm": 14.256839752197266, "learning_rate": 1.2196158422573518e-05, "loss": 0.9174, "step": 74610 }, { "epoch": 12.684004759476458, "grad_norm": 13.310483932495117, "learning_rate": 1.219332540087257e-05, "loss": 1.1471, "step": 74620 }, { "epoch": 12.685704572497025, "grad_norm": 39.1812744140625, "learning_rate": 1.2190492379171625e-05, "loss": 0.9755, "step": 74630 }, { "epoch": 12.687404385517594, "grad_norm": 13.722918510437012, "learning_rate": 1.2187659357470678e-05, "loss": 0.8904, "step": 74640 }, { "epoch": 12.68910419853816, "grad_norm": 14.000957489013672, "learning_rate": 1.2184826335769732e-05, "loss": 0.9731, "step": 74650 }, { "epoch": 12.690804011558729, "grad_norm": 14.546090126037598, "learning_rate": 1.2181993314068786e-05, "loss": 1.2273, "step": 74660 }, { "epoch": 12.692503824579296, "grad_norm": 21.082439422607422, "learning_rate": 1.217916029236784e-05, "loss": 1.0576, "step": 74670 }, { "epoch": 12.694203637599864, "grad_norm": 28.328073501586914, "learning_rate": 1.2176327270666893e-05, "loss": 0.7968, "step": 74680 }, { "epoch": 12.695903450620431, "grad_norm": 13.766427040100098, "learning_rate": 1.2173494248965948e-05, "loss": 0.927, "step": 74690 }, { "epoch": 12.697603263641, "grad_norm": 17.358760833740234, "learning_rate": 1.2170661227265001e-05, "loss": 1.1682, "step": 74700 }, { "epoch": 12.699303076661566, "grad_norm": 11.998700141906738, "learning_rate": 1.2167828205564055e-05, "loss": 1.0686, "step": 74710 }, { "epoch": 12.701002889682135, "grad_norm": 15.871207237243652, "learning_rate": 1.2164995183863108e-05, "loss": 1.256, "step": 74720 }, { "epoch": 12.702702702702704, "grad_norm": 15.330760955810547, "learning_rate": 1.2162162162162164e-05, "loss": 0.8937, "step": 74730 }, { "epoch": 12.70440251572327, "grad_norm": 17.279830932617188, "learning_rate": 1.2159329140461215e-05, "loss": 0.8846, "step": 74740 }, { "epoch": 12.706102328743839, "grad_norm": 11.72743034362793, "learning_rate": 1.215649611876027e-05, "loss": 0.8928, "step": 74750 }, { "epoch": 12.707802141764406, "grad_norm": 14.814111709594727, "learning_rate": 1.2153663097059324e-05, "loss": 1.1513, "step": 74760 }, { "epoch": 12.709501954784974, "grad_norm": 15.96503734588623, "learning_rate": 1.2150830075358376e-05, "loss": 0.9424, "step": 74770 }, { "epoch": 12.711201767805541, "grad_norm": 16.279644012451172, "learning_rate": 1.2147997053657431e-05, "loss": 0.89, "step": 74780 }, { "epoch": 12.71290158082611, "grad_norm": 18.306453704833984, "learning_rate": 1.2145164031956485e-05, "loss": 1.1772, "step": 74790 }, { "epoch": 12.714601393846676, "grad_norm": 22.86690330505371, "learning_rate": 1.2142331010255538e-05, "loss": 0.9949, "step": 74800 }, { "epoch": 12.716301206867245, "grad_norm": 15.000184059143066, "learning_rate": 1.2139497988554592e-05, "loss": 1.0426, "step": 74810 }, { "epoch": 12.718001019887812, "grad_norm": 13.093199729919434, "learning_rate": 1.2136664966853647e-05, "loss": 0.8594, "step": 74820 }, { "epoch": 12.71970083290838, "grad_norm": 17.166439056396484, "learning_rate": 1.21338319451527e-05, "loss": 0.9347, "step": 74830 }, { "epoch": 12.721400645928949, "grad_norm": 16.466289520263672, "learning_rate": 1.2130998923451754e-05, "loss": 0.9919, "step": 74840 }, { "epoch": 12.723100458949515, "grad_norm": 11.285197257995605, "learning_rate": 1.2128165901750808e-05, "loss": 0.7044, "step": 74850 }, { "epoch": 12.724800271970084, "grad_norm": 12.50585651397705, "learning_rate": 1.2125332880049863e-05, "loss": 1.1071, "step": 74860 }, { "epoch": 12.72650008499065, "grad_norm": 15.135801315307617, "learning_rate": 1.2122499858348915e-05, "loss": 1.0129, "step": 74870 }, { "epoch": 12.72819989801122, "grad_norm": 14.832651138305664, "learning_rate": 1.211966683664797e-05, "loss": 1.0863, "step": 74880 }, { "epoch": 12.729899711031786, "grad_norm": 11.078461647033691, "learning_rate": 1.2116833814947023e-05, "loss": 1.0487, "step": 74890 }, { "epoch": 12.731599524052355, "grad_norm": 13.277034759521484, "learning_rate": 1.2114000793246077e-05, "loss": 1.1097, "step": 74900 }, { "epoch": 12.733299337072921, "grad_norm": 11.709981918334961, "learning_rate": 1.211116777154513e-05, "loss": 1.0948, "step": 74910 }, { "epoch": 12.73499915009349, "grad_norm": 11.76952838897705, "learning_rate": 1.2108334749844186e-05, "loss": 0.9763, "step": 74920 }, { "epoch": 12.736698963114058, "grad_norm": 69.80035400390625, "learning_rate": 1.2105501728143237e-05, "loss": 0.9772, "step": 74930 }, { "epoch": 12.738398776134625, "grad_norm": 15.528470039367676, "learning_rate": 1.2102668706442291e-05, "loss": 0.7528, "step": 74940 }, { "epoch": 12.740098589155194, "grad_norm": 12.397980690002441, "learning_rate": 1.2099835684741346e-05, "loss": 0.8351, "step": 74950 }, { "epoch": 12.74179840217576, "grad_norm": 9.66718864440918, "learning_rate": 1.2097002663040398e-05, "loss": 1.0949, "step": 74960 }, { "epoch": 12.743498215196329, "grad_norm": 12.77209186553955, "learning_rate": 1.2094169641339453e-05, "loss": 0.9888, "step": 74970 }, { "epoch": 12.745198028216896, "grad_norm": 16.40740966796875, "learning_rate": 1.2091336619638507e-05, "loss": 1.0708, "step": 74980 }, { "epoch": 12.746897841237464, "grad_norm": 16.121746063232422, "learning_rate": 1.208850359793756e-05, "loss": 0.9792, "step": 74990 }, { "epoch": 12.748597654258031, "grad_norm": 15.26567268371582, "learning_rate": 1.2085670576236614e-05, "loss": 1.099, "step": 75000 }, { "epoch": 12.7502974672786, "grad_norm": 18.75192642211914, "learning_rate": 1.2082837554535669e-05, "loss": 0.966, "step": 75010 }, { "epoch": 12.751997280299166, "grad_norm": 18.56500244140625, "learning_rate": 1.208000453283472e-05, "loss": 0.8257, "step": 75020 }, { "epoch": 12.753697093319735, "grad_norm": 16.485448837280273, "learning_rate": 1.2077171511133776e-05, "loss": 0.9841, "step": 75030 }, { "epoch": 12.755396906340302, "grad_norm": 14.415229797363281, "learning_rate": 1.207433848943283e-05, "loss": 1.0186, "step": 75040 }, { "epoch": 12.75709671936087, "grad_norm": 12.059663772583008, "learning_rate": 1.2071505467731883e-05, "loss": 0.9943, "step": 75050 }, { "epoch": 12.758796532381439, "grad_norm": 14.636137008666992, "learning_rate": 1.2068672446030937e-05, "loss": 1.0592, "step": 75060 }, { "epoch": 12.760496345402006, "grad_norm": 10.941023826599121, "learning_rate": 1.2065839424329992e-05, "loss": 0.7681, "step": 75070 }, { "epoch": 12.762196158422574, "grad_norm": 13.391070365905762, "learning_rate": 1.2063006402629044e-05, "loss": 1.0729, "step": 75080 }, { "epoch": 12.76389597144314, "grad_norm": 15.290701866149902, "learning_rate": 1.2060173380928099e-05, "loss": 1.1012, "step": 75090 }, { "epoch": 12.76559578446371, "grad_norm": 17.19480323791504, "learning_rate": 1.2057340359227152e-05, "loss": 1.1701, "step": 75100 }, { "epoch": 12.767295597484276, "grad_norm": 12.734838485717773, "learning_rate": 1.2054507337526204e-05, "loss": 0.8721, "step": 75110 }, { "epoch": 12.768995410504845, "grad_norm": 15.898110389709473, "learning_rate": 1.205167431582526e-05, "loss": 0.8283, "step": 75120 }, { "epoch": 12.770695223525411, "grad_norm": 12.346315383911133, "learning_rate": 1.2048841294124313e-05, "loss": 1.0215, "step": 75130 }, { "epoch": 12.77239503654598, "grad_norm": 13.835050582885742, "learning_rate": 1.2046008272423366e-05, "loss": 0.9884, "step": 75140 }, { "epoch": 12.774094849566549, "grad_norm": 14.417010307312012, "learning_rate": 1.204317525072242e-05, "loss": 0.8892, "step": 75150 }, { "epoch": 12.775794662587115, "grad_norm": 17.448970794677734, "learning_rate": 1.2040342229021475e-05, "loss": 1.1341, "step": 75160 }, { "epoch": 12.777494475607684, "grad_norm": 13.97681999206543, "learning_rate": 1.2037509207320527e-05, "loss": 0.9289, "step": 75170 }, { "epoch": 12.77919428862825, "grad_norm": 10.700515747070312, "learning_rate": 1.2034676185619582e-05, "loss": 1.0902, "step": 75180 }, { "epoch": 12.78089410164882, "grad_norm": 15.597600936889648, "learning_rate": 1.2031843163918636e-05, "loss": 1.3987, "step": 75190 }, { "epoch": 12.782593914669386, "grad_norm": 10.354004859924316, "learning_rate": 1.2029010142217691e-05, "loss": 1.0758, "step": 75200 }, { "epoch": 12.784293727689954, "grad_norm": 14.728808403015137, "learning_rate": 1.2026177120516743e-05, "loss": 0.9338, "step": 75210 }, { "epoch": 12.785993540710521, "grad_norm": 22.317283630371094, "learning_rate": 1.2023344098815798e-05, "loss": 1.2208, "step": 75220 }, { "epoch": 12.78769335373109, "grad_norm": 18.672313690185547, "learning_rate": 1.2020511077114852e-05, "loss": 0.9182, "step": 75230 }, { "epoch": 12.789393166751656, "grad_norm": 11.910073280334473, "learning_rate": 1.2017678055413905e-05, "loss": 0.9065, "step": 75240 }, { "epoch": 12.791092979772225, "grad_norm": 12.456409454345703, "learning_rate": 1.2014845033712959e-05, "loss": 1.0132, "step": 75250 }, { "epoch": 12.792792792792794, "grad_norm": 16.12849235534668, "learning_rate": 1.2012012012012014e-05, "loss": 1.1102, "step": 75260 }, { "epoch": 12.79449260581336, "grad_norm": 20.701417922973633, "learning_rate": 1.2009178990311066e-05, "loss": 0.9778, "step": 75270 }, { "epoch": 12.796192418833929, "grad_norm": 18.37472152709961, "learning_rate": 1.200634596861012e-05, "loss": 1.0624, "step": 75280 }, { "epoch": 12.797892231854496, "grad_norm": 14.304396629333496, "learning_rate": 1.2003512946909174e-05, "loss": 1.0884, "step": 75290 }, { "epoch": 12.799592044875064, "grad_norm": 13.603397369384766, "learning_rate": 1.2000679925208226e-05, "loss": 1.1214, "step": 75300 }, { "epoch": 12.801291857895631, "grad_norm": 13.116205215454102, "learning_rate": 1.1997846903507281e-05, "loss": 0.9716, "step": 75310 }, { "epoch": 12.8029916709162, "grad_norm": 15.917253494262695, "learning_rate": 1.1995013881806335e-05, "loss": 1.042, "step": 75320 }, { "epoch": 12.804691483936766, "grad_norm": 14.952603340148926, "learning_rate": 1.1992180860105388e-05, "loss": 1.0946, "step": 75330 }, { "epoch": 12.806391296957335, "grad_norm": 14.6908540725708, "learning_rate": 1.1989347838404442e-05, "loss": 1.1137, "step": 75340 }, { "epoch": 12.808091109977902, "grad_norm": 17.333616256713867, "learning_rate": 1.1986514816703497e-05, "loss": 0.9515, "step": 75350 }, { "epoch": 12.80979092299847, "grad_norm": 13.54381275177002, "learning_rate": 1.1983681795002549e-05, "loss": 0.8934, "step": 75360 }, { "epoch": 12.811490736019039, "grad_norm": 14.124107360839844, "learning_rate": 1.1980848773301604e-05, "loss": 1.164, "step": 75370 }, { "epoch": 12.813190549039605, "grad_norm": 23.400901794433594, "learning_rate": 1.1978015751600658e-05, "loss": 1.1176, "step": 75380 }, { "epoch": 12.814890362060174, "grad_norm": 17.420169830322266, "learning_rate": 1.1975182729899711e-05, "loss": 0.8555, "step": 75390 }, { "epoch": 12.81659017508074, "grad_norm": 15.135274887084961, "learning_rate": 1.1972349708198765e-05, "loss": 0.9188, "step": 75400 }, { "epoch": 12.81828998810131, "grad_norm": 12.477042198181152, "learning_rate": 1.196951668649782e-05, "loss": 1.0127, "step": 75410 }, { "epoch": 12.819989801121876, "grad_norm": 13.057149887084961, "learning_rate": 1.1966683664796872e-05, "loss": 0.8871, "step": 75420 }, { "epoch": 12.821689614142445, "grad_norm": 15.372275352478027, "learning_rate": 1.1963850643095927e-05, "loss": 0.9509, "step": 75430 }, { "epoch": 12.823389427163011, "grad_norm": 16.440706253051758, "learning_rate": 1.196101762139498e-05, "loss": 1.1446, "step": 75440 }, { "epoch": 12.82508924018358, "grad_norm": 16.79180908203125, "learning_rate": 1.1958184599694034e-05, "loss": 1.198, "step": 75450 }, { "epoch": 12.826789053204148, "grad_norm": 15.983476638793945, "learning_rate": 1.1955351577993088e-05, "loss": 1.0242, "step": 75460 }, { "epoch": 12.828488866224715, "grad_norm": 15.305601119995117, "learning_rate": 1.1952518556292141e-05, "loss": 0.8932, "step": 75470 }, { "epoch": 12.830188679245284, "grad_norm": 12.570691108703613, "learning_rate": 1.1949685534591195e-05, "loss": 1.1085, "step": 75480 }, { "epoch": 12.83188849226585, "grad_norm": 11.77219295501709, "learning_rate": 1.1946852512890248e-05, "loss": 0.9723, "step": 75490 }, { "epoch": 12.833588305286419, "grad_norm": 17.920612335205078, "learning_rate": 1.1944019491189303e-05, "loss": 0.8847, "step": 75500 }, { "epoch": 12.835288118306986, "grad_norm": 15.425994873046875, "learning_rate": 1.1941186469488355e-05, "loss": 0.9402, "step": 75510 }, { "epoch": 12.836987931327554, "grad_norm": 13.898781776428223, "learning_rate": 1.193835344778741e-05, "loss": 0.8922, "step": 75520 }, { "epoch": 12.838687744348121, "grad_norm": 17.88460350036621, "learning_rate": 1.1935520426086464e-05, "loss": 0.8374, "step": 75530 }, { "epoch": 12.84038755736869, "grad_norm": 14.419448852539062, "learning_rate": 1.1932687404385517e-05, "loss": 1.055, "step": 75540 }, { "epoch": 12.842087370389256, "grad_norm": 13.72642707824707, "learning_rate": 1.1929854382684571e-05, "loss": 0.8744, "step": 75550 }, { "epoch": 12.843787183409825, "grad_norm": 11.336100578308105, "learning_rate": 1.1927021360983626e-05, "loss": 1.094, "step": 75560 }, { "epoch": 12.845486996430393, "grad_norm": 15.463812828063965, "learning_rate": 1.192418833928268e-05, "loss": 0.9788, "step": 75570 }, { "epoch": 12.84718680945096, "grad_norm": 17.53856086730957, "learning_rate": 1.1921355317581733e-05, "loss": 1.0081, "step": 75580 }, { "epoch": 12.848886622471529, "grad_norm": 33.54016876220703, "learning_rate": 1.1918522295880787e-05, "loss": 0.975, "step": 75590 }, { "epoch": 12.850586435492096, "grad_norm": 14.076641082763672, "learning_rate": 1.1915689274179842e-05, "loss": 1.2099, "step": 75600 }, { "epoch": 12.852286248512664, "grad_norm": 12.514032363891602, "learning_rate": 1.1912856252478894e-05, "loss": 1.306, "step": 75610 }, { "epoch": 12.85398606153323, "grad_norm": 14.686786651611328, "learning_rate": 1.1910023230777949e-05, "loss": 1.0102, "step": 75620 }, { "epoch": 12.8556858745538, "grad_norm": 34.3964958190918, "learning_rate": 1.1907190209077003e-05, "loss": 0.9048, "step": 75630 }, { "epoch": 12.857385687574366, "grad_norm": 13.369572639465332, "learning_rate": 1.1904357187376054e-05, "loss": 1.1097, "step": 75640 }, { "epoch": 12.859085500594935, "grad_norm": 18.741836547851562, "learning_rate": 1.190152416567511e-05, "loss": 1.1285, "step": 75650 }, { "epoch": 12.860785313615501, "grad_norm": 15.991293907165527, "learning_rate": 1.1898691143974163e-05, "loss": 1.0957, "step": 75660 }, { "epoch": 12.86248512663607, "grad_norm": 16.232288360595703, "learning_rate": 1.1895858122273217e-05, "loss": 1.001, "step": 75670 }, { "epoch": 12.864184939656639, "grad_norm": 14.114986419677734, "learning_rate": 1.189302510057227e-05, "loss": 1.0594, "step": 75680 }, { "epoch": 12.865884752677205, "grad_norm": 16.0872745513916, "learning_rate": 1.1890192078871325e-05, "loss": 1.0775, "step": 75690 }, { "epoch": 12.867584565697774, "grad_norm": 16.587892532348633, "learning_rate": 1.1887359057170377e-05, "loss": 1.3141, "step": 75700 }, { "epoch": 12.86928437871834, "grad_norm": 24.502431869506836, "learning_rate": 1.1884526035469432e-05, "loss": 1.0684, "step": 75710 }, { "epoch": 12.87098419173891, "grad_norm": 12.192739486694336, "learning_rate": 1.1881693013768486e-05, "loss": 1.1867, "step": 75720 }, { "epoch": 12.872684004759476, "grad_norm": 30.229990005493164, "learning_rate": 1.187885999206754e-05, "loss": 0.8411, "step": 75730 }, { "epoch": 12.874383817780044, "grad_norm": 13.345376968383789, "learning_rate": 1.1876026970366593e-05, "loss": 1.1653, "step": 75740 }, { "epoch": 12.876083630800611, "grad_norm": 10.623579025268555, "learning_rate": 1.1873193948665648e-05, "loss": 1.153, "step": 75750 }, { "epoch": 12.87778344382118, "grad_norm": 16.42326545715332, "learning_rate": 1.18703609269647e-05, "loss": 1.1486, "step": 75760 }, { "epoch": 12.879483256841748, "grad_norm": 16.0644474029541, "learning_rate": 1.1867527905263755e-05, "loss": 1.0586, "step": 75770 }, { "epoch": 12.881183069862315, "grad_norm": 21.17926788330078, "learning_rate": 1.1864694883562809e-05, "loss": 0.9887, "step": 75780 }, { "epoch": 12.882882882882884, "grad_norm": 14.797754287719727, "learning_rate": 1.1861861861861862e-05, "loss": 0.9499, "step": 75790 }, { "epoch": 12.88458269590345, "grad_norm": 19.140478134155273, "learning_rate": 1.1859028840160916e-05, "loss": 0.774, "step": 75800 }, { "epoch": 12.886282508924019, "grad_norm": 12.54073429107666, "learning_rate": 1.185619581845997e-05, "loss": 1.0919, "step": 75810 }, { "epoch": 12.887982321944586, "grad_norm": 19.803987503051758, "learning_rate": 1.1853362796759023e-05, "loss": 0.8893, "step": 75820 }, { "epoch": 12.889682134965154, "grad_norm": 13.704693794250488, "learning_rate": 1.1850529775058076e-05, "loss": 1.0201, "step": 75830 }, { "epoch": 12.891381947985721, "grad_norm": 12.907613754272461, "learning_rate": 1.1847696753357132e-05, "loss": 0.9039, "step": 75840 }, { "epoch": 12.89308176100629, "grad_norm": 9.265589714050293, "learning_rate": 1.1844863731656183e-05, "loss": 1.0694, "step": 75850 }, { "epoch": 12.894781574026856, "grad_norm": 36.043941497802734, "learning_rate": 1.1842030709955239e-05, "loss": 1.2482, "step": 75860 }, { "epoch": 12.896481387047425, "grad_norm": 20.35649299621582, "learning_rate": 1.1839197688254292e-05, "loss": 0.9579, "step": 75870 }, { "epoch": 12.898181200067992, "grad_norm": 17.08846664428711, "learning_rate": 1.1836364666553346e-05, "loss": 1.0809, "step": 75880 }, { "epoch": 12.89988101308856, "grad_norm": 15.097613334655762, "learning_rate": 1.18335316448524e-05, "loss": 1.0501, "step": 75890 }, { "epoch": 12.901580826109129, "grad_norm": 12.469206809997559, "learning_rate": 1.1830698623151454e-05, "loss": 1.0115, "step": 75900 }, { "epoch": 12.903280639129695, "grad_norm": 12.034581184387207, "learning_rate": 1.1827865601450508e-05, "loss": 1.0208, "step": 75910 }, { "epoch": 12.904980452150264, "grad_norm": 11.108855247497559, "learning_rate": 1.1825032579749561e-05, "loss": 1.1356, "step": 75920 }, { "epoch": 12.90668026517083, "grad_norm": 11.718653678894043, "learning_rate": 1.1822199558048615e-05, "loss": 1.0814, "step": 75930 }, { "epoch": 12.9083800781914, "grad_norm": 15.091021537780762, "learning_rate": 1.181936653634767e-05, "loss": 0.879, "step": 75940 }, { "epoch": 12.910079891211966, "grad_norm": 15.03564453125, "learning_rate": 1.1816533514646722e-05, "loss": 1.0158, "step": 75950 }, { "epoch": 12.911779704232535, "grad_norm": 61.41984176635742, "learning_rate": 1.1813700492945777e-05, "loss": 1.0375, "step": 75960 }, { "epoch": 12.913479517253101, "grad_norm": 14.231405258178711, "learning_rate": 1.181086747124483e-05, "loss": 0.9507, "step": 75970 }, { "epoch": 12.91517933027367, "grad_norm": 15.163511276245117, "learning_rate": 1.1808034449543883e-05, "loss": 0.9961, "step": 75980 }, { "epoch": 12.916879143294238, "grad_norm": 22.20620346069336, "learning_rate": 1.1805201427842938e-05, "loss": 0.9693, "step": 75990 }, { "epoch": 12.918578956314805, "grad_norm": 20.145572662353516, "learning_rate": 1.1802368406141991e-05, "loss": 1.0317, "step": 76000 }, { "epoch": 12.920278769335374, "grad_norm": 11.163675308227539, "learning_rate": 1.1799535384441045e-05, "loss": 1.1298, "step": 76010 }, { "epoch": 12.92197858235594, "grad_norm": 13.32911205291748, "learning_rate": 1.1796702362740098e-05, "loss": 1.1454, "step": 76020 }, { "epoch": 12.923678395376509, "grad_norm": 11.315788269042969, "learning_rate": 1.1793869341039154e-05, "loss": 1.1692, "step": 76030 }, { "epoch": 12.925378208397076, "grad_norm": 15.3287992477417, "learning_rate": 1.1791036319338205e-05, "loss": 0.9305, "step": 76040 }, { "epoch": 12.927078021417644, "grad_norm": 18.06903076171875, "learning_rate": 1.178820329763726e-05, "loss": 0.9537, "step": 76050 }, { "epoch": 12.928777834438211, "grad_norm": 14.362330436706543, "learning_rate": 1.1785370275936314e-05, "loss": 1.0947, "step": 76060 }, { "epoch": 12.93047764745878, "grad_norm": 14.748292922973633, "learning_rate": 1.1782537254235368e-05, "loss": 0.9106, "step": 76070 }, { "epoch": 12.932177460479346, "grad_norm": 11.632745742797852, "learning_rate": 1.1779704232534421e-05, "loss": 1.1616, "step": 76080 }, { "epoch": 12.933877273499915, "grad_norm": 14.629947662353516, "learning_rate": 1.1776871210833476e-05, "loss": 1.0024, "step": 76090 }, { "epoch": 12.935577086520484, "grad_norm": 14.651885986328125, "learning_rate": 1.1774038189132528e-05, "loss": 0.8558, "step": 76100 }, { "epoch": 12.93727689954105, "grad_norm": 16.91404914855957, "learning_rate": 1.1771205167431583e-05, "loss": 1.0755, "step": 76110 }, { "epoch": 12.938976712561619, "grad_norm": 18.931684494018555, "learning_rate": 1.1768372145730637e-05, "loss": 0.9286, "step": 76120 }, { "epoch": 12.940676525582186, "grad_norm": 13.92760181427002, "learning_rate": 1.176553912402969e-05, "loss": 0.9875, "step": 76130 }, { "epoch": 12.942376338602754, "grad_norm": 12.123698234558105, "learning_rate": 1.1762706102328744e-05, "loss": 0.9079, "step": 76140 }, { "epoch": 12.944076151623321, "grad_norm": 9.562174797058105, "learning_rate": 1.1759873080627798e-05, "loss": 1.0223, "step": 76150 }, { "epoch": 12.94577596464389, "grad_norm": 38.03489303588867, "learning_rate": 1.1757040058926851e-05, "loss": 1.1097, "step": 76160 }, { "epoch": 12.947475777664456, "grad_norm": 14.950166702270508, "learning_rate": 1.1754207037225905e-05, "loss": 1.1083, "step": 76170 }, { "epoch": 12.949175590685025, "grad_norm": 14.210777282714844, "learning_rate": 1.175137401552496e-05, "loss": 1.1793, "step": 76180 }, { "epoch": 12.950875403705592, "grad_norm": 17.02183723449707, "learning_rate": 1.1748540993824012e-05, "loss": 0.9503, "step": 76190 }, { "epoch": 12.95257521672616, "grad_norm": 13.369267463684082, "learning_rate": 1.1745707972123067e-05, "loss": 1.1364, "step": 76200 }, { "epoch": 12.954275029746729, "grad_norm": 14.146634101867676, "learning_rate": 1.174287495042212e-05, "loss": 1.1133, "step": 76210 }, { "epoch": 12.955974842767295, "grad_norm": 12.656209945678711, "learning_rate": 1.1740041928721174e-05, "loss": 1.0499, "step": 76220 }, { "epoch": 12.957674655787864, "grad_norm": 14.26394271850586, "learning_rate": 1.1737208907020227e-05, "loss": 0.9055, "step": 76230 }, { "epoch": 12.95937446880843, "grad_norm": 16.92902374267578, "learning_rate": 1.1734375885319283e-05, "loss": 1.1245, "step": 76240 }, { "epoch": 12.961074281829, "grad_norm": 12.700515747070312, "learning_rate": 1.1731542863618334e-05, "loss": 1.36, "step": 76250 }, { "epoch": 12.962774094849566, "grad_norm": 27.19908905029297, "learning_rate": 1.172870984191739e-05, "loss": 0.9993, "step": 76260 }, { "epoch": 12.964473907870135, "grad_norm": 20.43283462524414, "learning_rate": 1.1725876820216443e-05, "loss": 0.9011, "step": 76270 }, { "epoch": 12.966173720890701, "grad_norm": 16.466426849365234, "learning_rate": 1.1723043798515498e-05, "loss": 0.9477, "step": 76280 }, { "epoch": 12.96787353391127, "grad_norm": 21.500049591064453, "learning_rate": 1.172021077681455e-05, "loss": 1.0422, "step": 76290 }, { "epoch": 12.969573346931838, "grad_norm": 15.398246765136719, "learning_rate": 1.1717377755113605e-05, "loss": 0.9734, "step": 76300 }, { "epoch": 12.971273159952405, "grad_norm": 17.643375396728516, "learning_rate": 1.1714544733412659e-05, "loss": 1.0295, "step": 76310 }, { "epoch": 12.972972972972974, "grad_norm": 14.912607192993164, "learning_rate": 1.171171171171171e-05, "loss": 0.9343, "step": 76320 }, { "epoch": 12.97467278599354, "grad_norm": 17.80035972595215, "learning_rate": 1.1708878690010766e-05, "loss": 1.1018, "step": 76330 }, { "epoch": 12.976372599014109, "grad_norm": 14.012857437133789, "learning_rate": 1.170604566830982e-05, "loss": 1.0157, "step": 76340 }, { "epoch": 12.978072412034676, "grad_norm": 12.34062671661377, "learning_rate": 1.1703212646608873e-05, "loss": 1.0034, "step": 76350 }, { "epoch": 12.979772225055244, "grad_norm": 14.487496376037598, "learning_rate": 1.1700379624907927e-05, "loss": 0.9609, "step": 76360 }, { "epoch": 12.981472038075811, "grad_norm": 20.761878967285156, "learning_rate": 1.1697546603206982e-05, "loss": 0.9839, "step": 76370 }, { "epoch": 12.98317185109638, "grad_norm": 16.75369644165039, "learning_rate": 1.1694713581506034e-05, "loss": 0.9415, "step": 76380 }, { "epoch": 12.984871664116946, "grad_norm": 13.763152122497559, "learning_rate": 1.1691880559805089e-05, "loss": 1.0435, "step": 76390 }, { "epoch": 12.986571477137515, "grad_norm": 13.98508071899414, "learning_rate": 1.1689047538104142e-05, "loss": 1.0234, "step": 76400 }, { "epoch": 12.988271290158082, "grad_norm": 14.01479721069336, "learning_rate": 1.1686214516403196e-05, "loss": 0.9004, "step": 76410 }, { "epoch": 12.98997110317865, "grad_norm": 15.525555610656738, "learning_rate": 1.168338149470225e-05, "loss": 1.0658, "step": 76420 }, { "epoch": 12.991670916199219, "grad_norm": 11.844548225402832, "learning_rate": 1.1680548473001305e-05, "loss": 1.019, "step": 76430 }, { "epoch": 12.993370729219786, "grad_norm": 13.755415916442871, "learning_rate": 1.1677715451300356e-05, "loss": 1.3127, "step": 76440 }, { "epoch": 12.995070542240354, "grad_norm": 13.505614280700684, "learning_rate": 1.1674882429599412e-05, "loss": 1.1025, "step": 76450 }, { "epoch": 12.99677035526092, "grad_norm": 12.11274528503418, "learning_rate": 1.1672049407898465e-05, "loss": 1.1119, "step": 76460 }, { "epoch": 12.99847016828149, "grad_norm": 13.385542869567871, "learning_rate": 1.1669216386197519e-05, "loss": 1.1475, "step": 76470 }, { "epoch": 13.0, "eval_cer": 1.0, "eval_loss": 2.56784987449646, "eval_runtime": 2029.6173, "eval_samples_per_second": 0.232, "eval_steps_per_second": 0.232, "step": 76479 }, { "epoch": 13.000169981302056, "grad_norm": 11.113241195678711, "learning_rate": 1.1666383364496572e-05, "loss": 1.0668, "step": 76480 }, { "epoch": 13.001869794322625, "grad_norm": 12.337892532348633, "learning_rate": 1.1663550342795627e-05, "loss": 0.9984, "step": 76490 }, { "epoch": 13.003569607343191, "grad_norm": 16.5306453704834, "learning_rate": 1.166071732109468e-05, "loss": 0.9124, "step": 76500 }, { "epoch": 13.00526942036376, "grad_norm": 16.398799896240234, "learning_rate": 1.1657884299393733e-05, "loss": 1.0942, "step": 76510 }, { "epoch": 13.006969233384329, "grad_norm": 15.537219047546387, "learning_rate": 1.1655051277692788e-05, "loss": 1.2986, "step": 76520 }, { "epoch": 13.008669046404895, "grad_norm": 13.299960136413574, "learning_rate": 1.165221825599184e-05, "loss": 0.8422, "step": 76530 }, { "epoch": 13.010368859425464, "grad_norm": 12.45922565460205, "learning_rate": 1.1649385234290895e-05, "loss": 0.854, "step": 76540 }, { "epoch": 13.01206867244603, "grad_norm": 10.799757957458496, "learning_rate": 1.1646552212589949e-05, "loss": 0.7688, "step": 76550 }, { "epoch": 13.0137684854666, "grad_norm": 12.44224739074707, "learning_rate": 1.1643719190889002e-05, "loss": 1.0456, "step": 76560 }, { "epoch": 13.015468298487166, "grad_norm": 16.66382598876953, "learning_rate": 1.1640886169188056e-05, "loss": 0.8155, "step": 76570 }, { "epoch": 13.017168111507734, "grad_norm": 21.02565574645996, "learning_rate": 1.163805314748711e-05, "loss": 0.7853, "step": 76580 }, { "epoch": 13.018867924528301, "grad_norm": 16.455900192260742, "learning_rate": 1.1635220125786163e-05, "loss": 0.8719, "step": 76590 }, { "epoch": 13.02056773754887, "grad_norm": 13.44214916229248, "learning_rate": 1.1632387104085218e-05, "loss": 0.7481, "step": 76600 }, { "epoch": 13.022267550569437, "grad_norm": 14.297119140625, "learning_rate": 1.1629554082384271e-05, "loss": 0.9131, "step": 76610 }, { "epoch": 13.023967363590005, "grad_norm": 12.719025611877441, "learning_rate": 1.1626721060683325e-05, "loss": 0.7458, "step": 76620 }, { "epoch": 13.025667176610574, "grad_norm": 16.668058395385742, "learning_rate": 1.1623888038982378e-05, "loss": 0.8193, "step": 76630 }, { "epoch": 13.02736698963114, "grad_norm": 13.205857276916504, "learning_rate": 1.1621055017281434e-05, "loss": 0.8268, "step": 76640 }, { "epoch": 13.029066802651709, "grad_norm": 15.549055099487305, "learning_rate": 1.1618221995580487e-05, "loss": 1.1015, "step": 76650 }, { "epoch": 13.030766615672276, "grad_norm": 15.758252143859863, "learning_rate": 1.161538897387954e-05, "loss": 0.7249, "step": 76660 }, { "epoch": 13.032466428692844, "grad_norm": 13.190099716186523, "learning_rate": 1.1612555952178594e-05, "loss": 0.8275, "step": 76670 }, { "epoch": 13.034166241713411, "grad_norm": 11.27623462677002, "learning_rate": 1.1609722930477648e-05, "loss": 1.0394, "step": 76680 }, { "epoch": 13.03586605473398, "grad_norm": 12.430938720703125, "learning_rate": 1.1606889908776701e-05, "loss": 0.8974, "step": 76690 }, { "epoch": 13.037565867754546, "grad_norm": 31.884475708007812, "learning_rate": 1.1604056887075755e-05, "loss": 0.7992, "step": 76700 }, { "epoch": 13.039265680775115, "grad_norm": 13.378474235534668, "learning_rate": 1.160122386537481e-05, "loss": 1.0581, "step": 76710 }, { "epoch": 13.040965493795682, "grad_norm": 11.454998970031738, "learning_rate": 1.1598390843673862e-05, "loss": 1.0204, "step": 76720 }, { "epoch": 13.04266530681625, "grad_norm": 11.900732040405273, "learning_rate": 1.1595557821972917e-05, "loss": 0.9036, "step": 76730 }, { "epoch": 13.044365119836819, "grad_norm": 24.72733497619629, "learning_rate": 1.159272480027197e-05, "loss": 0.8097, "step": 76740 }, { "epoch": 13.046064932857385, "grad_norm": 12.024064064025879, "learning_rate": 1.1589891778571024e-05, "loss": 0.9751, "step": 76750 }, { "epoch": 13.047764745877954, "grad_norm": 18.42201042175293, "learning_rate": 1.1587058756870078e-05, "loss": 1.0479, "step": 76760 }, { "epoch": 13.04946455889852, "grad_norm": 15.726460456848145, "learning_rate": 1.1584225735169133e-05, "loss": 1.0523, "step": 76770 }, { "epoch": 13.05116437191909, "grad_norm": 17.415264129638672, "learning_rate": 1.1581392713468185e-05, "loss": 0.8015, "step": 76780 }, { "epoch": 13.052864184939656, "grad_norm": 13.779866218566895, "learning_rate": 1.157855969176724e-05, "loss": 0.8778, "step": 76790 }, { "epoch": 13.054563997960225, "grad_norm": 17.100889205932617, "learning_rate": 1.1575726670066293e-05, "loss": 1.098, "step": 76800 }, { "epoch": 13.056263810980791, "grad_norm": 17.843631744384766, "learning_rate": 1.1572893648365347e-05, "loss": 0.8676, "step": 76810 }, { "epoch": 13.05796362400136, "grad_norm": 11.25027847290039, "learning_rate": 1.15700606266644e-05, "loss": 0.7403, "step": 76820 }, { "epoch": 13.059663437021928, "grad_norm": 13.70500373840332, "learning_rate": 1.1567227604963456e-05, "loss": 0.9544, "step": 76830 }, { "epoch": 13.061363250042495, "grad_norm": 14.314311027526855, "learning_rate": 1.1564394583262507e-05, "loss": 0.9073, "step": 76840 }, { "epoch": 13.063063063063064, "grad_norm": 17.072078704833984, "learning_rate": 1.1561561561561561e-05, "loss": 0.8282, "step": 76850 }, { "epoch": 13.06476287608363, "grad_norm": 15.840925216674805, "learning_rate": 1.1558728539860616e-05, "loss": 0.9618, "step": 76860 }, { "epoch": 13.066462689104199, "grad_norm": 15.497159004211426, "learning_rate": 1.1555895518159668e-05, "loss": 0.9205, "step": 76870 }, { "epoch": 13.068162502124766, "grad_norm": 14.146989822387695, "learning_rate": 1.1553062496458723e-05, "loss": 0.9772, "step": 76880 }, { "epoch": 13.069862315145334, "grad_norm": 19.277807235717773, "learning_rate": 1.1550229474757777e-05, "loss": 0.9548, "step": 76890 }, { "epoch": 13.071562128165901, "grad_norm": 11.393549919128418, "learning_rate": 1.154739645305683e-05, "loss": 1.0424, "step": 76900 }, { "epoch": 13.07326194118647, "grad_norm": 14.612698554992676, "learning_rate": 1.1544563431355884e-05, "loss": 1.0763, "step": 76910 }, { "epoch": 13.074961754207036, "grad_norm": 14.833239555358887, "learning_rate": 1.1541730409654939e-05, "loss": 0.9759, "step": 76920 }, { "epoch": 13.076661567227605, "grad_norm": 13.716155052185059, "learning_rate": 1.1538897387953991e-05, "loss": 0.8486, "step": 76930 }, { "epoch": 13.078361380248174, "grad_norm": 17.75796890258789, "learning_rate": 1.1536064366253046e-05, "loss": 0.789, "step": 76940 }, { "epoch": 13.08006119326874, "grad_norm": 14.57613754272461, "learning_rate": 1.15332313445521e-05, "loss": 0.7879, "step": 76950 }, { "epoch": 13.081761006289309, "grad_norm": 10.940472602844238, "learning_rate": 1.1530398322851153e-05, "loss": 0.8174, "step": 76960 }, { "epoch": 13.083460819309876, "grad_norm": 19.82240867614746, "learning_rate": 1.1527565301150207e-05, "loss": 0.8853, "step": 76970 }, { "epoch": 13.085160632330444, "grad_norm": 16.541250228881836, "learning_rate": 1.1524732279449262e-05, "loss": 0.936, "step": 76980 }, { "epoch": 13.086860445351011, "grad_norm": 25.298450469970703, "learning_rate": 1.1521899257748314e-05, "loss": 0.8885, "step": 76990 }, { "epoch": 13.08856025837158, "grad_norm": 16.249141693115234, "learning_rate": 1.1519066236047369e-05, "loss": 0.9191, "step": 77000 }, { "epoch": 13.090260071392146, "grad_norm": 14.399884223937988, "learning_rate": 1.1516233214346422e-05, "loss": 1.0308, "step": 77010 }, { "epoch": 13.091959884412715, "grad_norm": 13.65301513671875, "learning_rate": 1.1513400192645476e-05, "loss": 0.768, "step": 77020 }, { "epoch": 13.093659697433282, "grad_norm": 13.688096046447754, "learning_rate": 1.151056717094453e-05, "loss": 0.9957, "step": 77030 }, { "epoch": 13.09535951045385, "grad_norm": 13.099075317382812, "learning_rate": 1.1507734149243583e-05, "loss": 0.8542, "step": 77040 }, { "epoch": 13.097059323474419, "grad_norm": 13.356253623962402, "learning_rate": 1.1504901127542638e-05, "loss": 0.9598, "step": 77050 }, { "epoch": 13.098759136494985, "grad_norm": 11.596378326416016, "learning_rate": 1.150206810584169e-05, "loss": 0.8081, "step": 77060 }, { "epoch": 13.100458949515554, "grad_norm": 17.056547164916992, "learning_rate": 1.1499235084140745e-05, "loss": 0.9458, "step": 77070 }, { "epoch": 13.10215876253612, "grad_norm": 17.341062545776367, "learning_rate": 1.1496402062439799e-05, "loss": 0.9588, "step": 77080 }, { "epoch": 13.10385857555669, "grad_norm": 14.211402893066406, "learning_rate": 1.1493569040738852e-05, "loss": 0.8427, "step": 77090 }, { "epoch": 13.105558388577256, "grad_norm": 11.965507507324219, "learning_rate": 1.1490736019037906e-05, "loss": 0.8985, "step": 77100 }, { "epoch": 13.107258201597825, "grad_norm": 23.132856369018555, "learning_rate": 1.1487902997336961e-05, "loss": 1.1726, "step": 77110 }, { "epoch": 13.108958014618391, "grad_norm": 14.640426635742188, "learning_rate": 1.1485069975636013e-05, "loss": 0.9459, "step": 77120 }, { "epoch": 13.11065782763896, "grad_norm": 21.550586700439453, "learning_rate": 1.1482236953935068e-05, "loss": 1.0279, "step": 77130 }, { "epoch": 13.112357640659527, "grad_norm": 14.256326675415039, "learning_rate": 1.1479403932234122e-05, "loss": 0.8267, "step": 77140 }, { "epoch": 13.114057453680095, "grad_norm": 23.139755249023438, "learning_rate": 1.1476570910533175e-05, "loss": 0.716, "step": 77150 }, { "epoch": 13.115757266700664, "grad_norm": 9.036715507507324, "learning_rate": 1.1473737888832229e-05, "loss": 0.8964, "step": 77160 }, { "epoch": 13.11745707972123, "grad_norm": 11.763635635375977, "learning_rate": 1.1470904867131284e-05, "loss": 0.9114, "step": 77170 }, { "epoch": 13.119156892741799, "grad_norm": 14.474801063537598, "learning_rate": 1.1468071845430336e-05, "loss": 1.0202, "step": 77180 }, { "epoch": 13.120856705762366, "grad_norm": 18.728178024291992, "learning_rate": 1.146523882372939e-05, "loss": 1.0069, "step": 77190 }, { "epoch": 13.122556518782934, "grad_norm": 17.984718322753906, "learning_rate": 1.1462405802028444e-05, "loss": 0.9868, "step": 77200 }, { "epoch": 13.124256331803501, "grad_norm": 14.732629776000977, "learning_rate": 1.1459572780327496e-05, "loss": 0.9092, "step": 77210 }, { "epoch": 13.12595614482407, "grad_norm": 18.373342514038086, "learning_rate": 1.1456739758626551e-05, "loss": 0.9317, "step": 77220 }, { "epoch": 13.127655957844636, "grad_norm": 10.774474143981934, "learning_rate": 1.1453906736925605e-05, "loss": 1.0144, "step": 77230 }, { "epoch": 13.129355770865205, "grad_norm": 8.88404369354248, "learning_rate": 1.1451073715224659e-05, "loss": 0.9245, "step": 77240 }, { "epoch": 13.131055583885773, "grad_norm": 13.353208541870117, "learning_rate": 1.1448240693523712e-05, "loss": 0.984, "step": 77250 }, { "epoch": 13.13275539690634, "grad_norm": 13.529540061950684, "learning_rate": 1.1445407671822767e-05, "loss": 1.0586, "step": 77260 }, { "epoch": 13.134455209926909, "grad_norm": 9.541154861450195, "learning_rate": 1.1442574650121819e-05, "loss": 0.9693, "step": 77270 }, { "epoch": 13.136155022947476, "grad_norm": 15.077287673950195, "learning_rate": 1.1439741628420874e-05, "loss": 0.9145, "step": 77280 }, { "epoch": 13.137854835968044, "grad_norm": 13.376625061035156, "learning_rate": 1.1436908606719928e-05, "loss": 0.9343, "step": 77290 }, { "epoch": 13.13955464898861, "grad_norm": 13.152056694030762, "learning_rate": 1.1434075585018981e-05, "loss": 1.0098, "step": 77300 }, { "epoch": 13.14125446200918, "grad_norm": 13.500475883483887, "learning_rate": 1.1431242563318035e-05, "loss": 0.8867, "step": 77310 }, { "epoch": 13.142954275029746, "grad_norm": 14.083845138549805, "learning_rate": 1.142840954161709e-05, "loss": 0.8015, "step": 77320 }, { "epoch": 13.144654088050315, "grad_norm": 18.97845458984375, "learning_rate": 1.1425576519916142e-05, "loss": 0.9461, "step": 77330 }, { "epoch": 13.146353901070881, "grad_norm": 14.3585205078125, "learning_rate": 1.1422743498215197e-05, "loss": 0.8394, "step": 77340 }, { "epoch": 13.14805371409145, "grad_norm": 11.475493431091309, "learning_rate": 1.141991047651425e-05, "loss": 0.989, "step": 77350 }, { "epoch": 13.149753527112019, "grad_norm": 15.922459602355957, "learning_rate": 1.1417077454813304e-05, "loss": 0.9671, "step": 77360 }, { "epoch": 13.151453340132585, "grad_norm": 16.26936912536621, "learning_rate": 1.1414244433112358e-05, "loss": 0.9324, "step": 77370 }, { "epoch": 13.153153153153154, "grad_norm": 11.277046203613281, "learning_rate": 1.1411411411411411e-05, "loss": 0.9151, "step": 77380 }, { "epoch": 13.15485296617372, "grad_norm": 13.078980445861816, "learning_rate": 1.1408578389710466e-05, "loss": 0.9125, "step": 77390 }, { "epoch": 13.15655277919429, "grad_norm": 16.573970794677734, "learning_rate": 1.1405745368009518e-05, "loss": 0.9768, "step": 77400 }, { "epoch": 13.158252592214856, "grad_norm": 12.107610702514648, "learning_rate": 1.1402912346308573e-05, "loss": 0.9342, "step": 77410 }, { "epoch": 13.159952405235424, "grad_norm": 68.8357162475586, "learning_rate": 1.1400079324607627e-05, "loss": 0.6838, "step": 77420 }, { "epoch": 13.161652218255991, "grad_norm": 17.264375686645508, "learning_rate": 1.139724630290668e-05, "loss": 0.9676, "step": 77430 }, { "epoch": 13.16335203127656, "grad_norm": 20.598388671875, "learning_rate": 1.1394413281205734e-05, "loss": 0.8533, "step": 77440 }, { "epoch": 13.165051844297126, "grad_norm": 18.237140655517578, "learning_rate": 1.139158025950479e-05, "loss": 1.0761, "step": 77450 }, { "epoch": 13.166751657317695, "grad_norm": 8.156296730041504, "learning_rate": 1.1388747237803841e-05, "loss": 0.7969, "step": 77460 }, { "epoch": 13.168451470338264, "grad_norm": 16.457365036010742, "learning_rate": 1.1385914216102896e-05, "loss": 0.8663, "step": 77470 }, { "epoch": 13.17015128335883, "grad_norm": 18.868135452270508, "learning_rate": 1.138308119440195e-05, "loss": 0.9112, "step": 77480 }, { "epoch": 13.171851096379399, "grad_norm": 15.999444961547852, "learning_rate": 1.1380248172701003e-05, "loss": 0.9845, "step": 77490 }, { "epoch": 13.173550909399966, "grad_norm": 12.605375289916992, "learning_rate": 1.1377415151000057e-05, "loss": 0.8755, "step": 77500 }, { "epoch": 13.175250722420534, "grad_norm": 14.688749313354492, "learning_rate": 1.1374582129299112e-05, "loss": 0.891, "step": 77510 }, { "epoch": 13.176950535441101, "grad_norm": 12.210177421569824, "learning_rate": 1.1371749107598164e-05, "loss": 1.0849, "step": 77520 }, { "epoch": 13.17865034846167, "grad_norm": 13.10725212097168, "learning_rate": 1.1368916085897219e-05, "loss": 1.0146, "step": 77530 }, { "epoch": 13.180350161482236, "grad_norm": 13.450618743896484, "learning_rate": 1.1366083064196273e-05, "loss": 0.9487, "step": 77540 }, { "epoch": 13.182049974502805, "grad_norm": 13.99819564819336, "learning_rate": 1.1363250042495324e-05, "loss": 0.7171, "step": 77550 }, { "epoch": 13.183749787523372, "grad_norm": 12.082834243774414, "learning_rate": 1.136041702079438e-05, "loss": 0.7563, "step": 77560 }, { "epoch": 13.18544960054394, "grad_norm": 16.206069946289062, "learning_rate": 1.1357583999093433e-05, "loss": 0.787, "step": 77570 }, { "epoch": 13.187149413564509, "grad_norm": 12.901646614074707, "learning_rate": 1.1354750977392487e-05, "loss": 0.8601, "step": 77580 }, { "epoch": 13.188849226585075, "grad_norm": 20.766258239746094, "learning_rate": 1.135191795569154e-05, "loss": 0.9768, "step": 77590 }, { "epoch": 13.190549039605644, "grad_norm": 13.241246223449707, "learning_rate": 1.1349084933990595e-05, "loss": 0.9944, "step": 77600 }, { "epoch": 13.19224885262621, "grad_norm": 23.331920623779297, "learning_rate": 1.1346251912289647e-05, "loss": 0.9552, "step": 77610 }, { "epoch": 13.19394866564678, "grad_norm": 13.036016464233398, "learning_rate": 1.1343418890588703e-05, "loss": 1.0604, "step": 77620 }, { "epoch": 13.195648478667346, "grad_norm": 11.828996658325195, "learning_rate": 1.1340585868887756e-05, "loss": 1.0145, "step": 77630 }, { "epoch": 13.197348291687915, "grad_norm": 17.39860725402832, "learning_rate": 1.133775284718681e-05, "loss": 0.9165, "step": 77640 }, { "epoch": 13.199048104708481, "grad_norm": 13.79350757598877, "learning_rate": 1.1334919825485863e-05, "loss": 0.8941, "step": 77650 }, { "epoch": 13.20074791772905, "grad_norm": 16.67502784729004, "learning_rate": 1.1332086803784918e-05, "loss": 0.718, "step": 77660 }, { "epoch": 13.202447730749618, "grad_norm": 130.807861328125, "learning_rate": 1.132925378208397e-05, "loss": 0.9152, "step": 77670 }, { "epoch": 13.204147543770185, "grad_norm": 13.957781791687012, "learning_rate": 1.1326420760383025e-05, "loss": 0.8938, "step": 77680 }, { "epoch": 13.205847356790754, "grad_norm": 14.343793869018555, "learning_rate": 1.1323587738682079e-05, "loss": 0.8832, "step": 77690 }, { "epoch": 13.20754716981132, "grad_norm": 14.656609535217285, "learning_rate": 1.1320754716981132e-05, "loss": 0.8227, "step": 77700 }, { "epoch": 13.209246982831889, "grad_norm": 15.258963584899902, "learning_rate": 1.1317921695280186e-05, "loss": 0.593, "step": 77710 }, { "epoch": 13.210946795852456, "grad_norm": 13.086180686950684, "learning_rate": 1.131508867357924e-05, "loss": 0.817, "step": 77720 }, { "epoch": 13.212646608873024, "grad_norm": 31.025049209594727, "learning_rate": 1.1312255651878293e-05, "loss": 0.9085, "step": 77730 }, { "epoch": 13.214346421893591, "grad_norm": 13.460680961608887, "learning_rate": 1.1309422630177346e-05, "loss": 0.8697, "step": 77740 }, { "epoch": 13.21604623491416, "grad_norm": 17.58226203918457, "learning_rate": 1.1306589608476402e-05, "loss": 1.0044, "step": 77750 }, { "epoch": 13.217746047934726, "grad_norm": 18.57984733581543, "learning_rate": 1.1303756586775455e-05, "loss": 0.9001, "step": 77760 }, { "epoch": 13.219445860955295, "grad_norm": 16.212175369262695, "learning_rate": 1.1300923565074509e-05, "loss": 0.7474, "step": 77770 }, { "epoch": 13.221145673975863, "grad_norm": 17.03951072692871, "learning_rate": 1.1298090543373562e-05, "loss": 0.8538, "step": 77780 }, { "epoch": 13.22284548699643, "grad_norm": 15.871146202087402, "learning_rate": 1.1295257521672617e-05, "loss": 0.7908, "step": 77790 }, { "epoch": 13.224545300016999, "grad_norm": 14.528149604797363, "learning_rate": 1.129242449997167e-05, "loss": 1.0203, "step": 77800 }, { "epoch": 13.226245113037566, "grad_norm": 13.95683479309082, "learning_rate": 1.1289591478270725e-05, "loss": 0.9654, "step": 77810 }, { "epoch": 13.227944926058134, "grad_norm": 12.816866874694824, "learning_rate": 1.1286758456569778e-05, "loss": 0.9042, "step": 77820 }, { "epoch": 13.2296447390787, "grad_norm": 11.576176643371582, "learning_rate": 1.1283925434868832e-05, "loss": 0.7363, "step": 77830 }, { "epoch": 13.23134455209927, "grad_norm": 13.031106948852539, "learning_rate": 1.1281092413167885e-05, "loss": 0.9889, "step": 77840 }, { "epoch": 13.233044365119836, "grad_norm": 12.993022918701172, "learning_rate": 1.127825939146694e-05, "loss": 0.9349, "step": 77850 }, { "epoch": 13.234744178140405, "grad_norm": 13.041824340820312, "learning_rate": 1.1275426369765992e-05, "loss": 0.8632, "step": 77860 }, { "epoch": 13.236443991160971, "grad_norm": 14.286273002624512, "learning_rate": 1.1272593348065047e-05, "loss": 0.8633, "step": 77870 }, { "epoch": 13.23814380418154, "grad_norm": 16.070451736450195, "learning_rate": 1.1269760326364101e-05, "loss": 0.8862, "step": 77880 }, { "epoch": 13.239843617202109, "grad_norm": 14.31615924835205, "learning_rate": 1.1266927304663153e-05, "loss": 0.802, "step": 77890 }, { "epoch": 13.241543430222675, "grad_norm": 12.966490745544434, "learning_rate": 1.1264094282962208e-05, "loss": 0.6914, "step": 77900 }, { "epoch": 13.243243243243244, "grad_norm": 17.09201431274414, "learning_rate": 1.1261261261261261e-05, "loss": 1.0092, "step": 77910 }, { "epoch": 13.24494305626381, "grad_norm": 14.605857849121094, "learning_rate": 1.1258428239560315e-05, "loss": 1.013, "step": 77920 }, { "epoch": 13.24664286928438, "grad_norm": 18.278438568115234, "learning_rate": 1.1255595217859368e-05, "loss": 1.117, "step": 77930 }, { "epoch": 13.248342682304946, "grad_norm": 15.701028823852539, "learning_rate": 1.1252762196158424e-05, "loss": 0.8748, "step": 77940 }, { "epoch": 13.250042495325514, "grad_norm": 13.538622856140137, "learning_rate": 1.1249929174457476e-05, "loss": 0.9206, "step": 77950 }, { "epoch": 13.251742308346081, "grad_norm": 14.183862686157227, "learning_rate": 1.124709615275653e-05, "loss": 0.9524, "step": 77960 }, { "epoch": 13.25344212136665, "grad_norm": 72.41496276855469, "learning_rate": 1.1244263131055584e-05, "loss": 0.7763, "step": 77970 }, { "epoch": 13.255141934387217, "grad_norm": 13.101901054382324, "learning_rate": 1.1241430109354638e-05, "loss": 0.8277, "step": 77980 }, { "epoch": 13.256841747407785, "grad_norm": 15.159908294677734, "learning_rate": 1.1238597087653691e-05, "loss": 0.9563, "step": 77990 }, { "epoch": 13.258541560428354, "grad_norm": 17.282302856445312, "learning_rate": 1.1235764065952747e-05, "loss": 1.0538, "step": 78000 }, { "epoch": 13.26024137344892, "grad_norm": 14.467733383178711, "learning_rate": 1.1232931044251798e-05, "loss": 0.8309, "step": 78010 }, { "epoch": 13.261941186469489, "grad_norm": 13.557371139526367, "learning_rate": 1.1230098022550854e-05, "loss": 0.8816, "step": 78020 }, { "epoch": 13.263640999490056, "grad_norm": 15.075485229492188, "learning_rate": 1.1227265000849907e-05, "loss": 0.995, "step": 78030 }, { "epoch": 13.265340812510624, "grad_norm": 13.49841594696045, "learning_rate": 1.122443197914896e-05, "loss": 0.9914, "step": 78040 }, { "epoch": 13.267040625531191, "grad_norm": 15.462276458740234, "learning_rate": 1.1221598957448014e-05, "loss": 1.0125, "step": 78050 }, { "epoch": 13.26874043855176, "grad_norm": 16.423372268676758, "learning_rate": 1.1218765935747068e-05, "loss": 0.9962, "step": 78060 }, { "epoch": 13.270440251572326, "grad_norm": 24.819061279296875, "learning_rate": 1.1215932914046121e-05, "loss": 0.9252, "step": 78070 }, { "epoch": 13.272140064592895, "grad_norm": 14.353891372680664, "learning_rate": 1.1213099892345175e-05, "loss": 1.0217, "step": 78080 }, { "epoch": 13.273839877613462, "grad_norm": 15.675206184387207, "learning_rate": 1.121026687064423e-05, "loss": 1.0018, "step": 78090 }, { "epoch": 13.27553969063403, "grad_norm": 12.437932968139648, "learning_rate": 1.1207433848943283e-05, "loss": 0.8913, "step": 78100 }, { "epoch": 13.277239503654599, "grad_norm": 17.312644958496094, "learning_rate": 1.1204600827242337e-05, "loss": 0.9097, "step": 78110 }, { "epoch": 13.278939316675165, "grad_norm": 10.335307121276855, "learning_rate": 1.120176780554139e-05, "loss": 0.999, "step": 78120 }, { "epoch": 13.280639129695734, "grad_norm": 15.154790878295898, "learning_rate": 1.1198934783840446e-05, "loss": 0.858, "step": 78130 }, { "epoch": 13.2823389427163, "grad_norm": 13.978702545166016, "learning_rate": 1.1196101762139498e-05, "loss": 1.0418, "step": 78140 }, { "epoch": 13.28403875573687, "grad_norm": 14.155001640319824, "learning_rate": 1.1193268740438553e-05, "loss": 0.9739, "step": 78150 }, { "epoch": 13.285738568757436, "grad_norm": 17.80815887451172, "learning_rate": 1.1190435718737606e-05, "loss": 0.9585, "step": 78160 }, { "epoch": 13.287438381778005, "grad_norm": 13.593948364257812, "learning_rate": 1.118760269703666e-05, "loss": 0.9833, "step": 78170 }, { "epoch": 13.289138194798571, "grad_norm": 20.40248680114746, "learning_rate": 1.1184769675335713e-05, "loss": 0.9185, "step": 78180 }, { "epoch": 13.29083800781914, "grad_norm": 12.592994689941406, "learning_rate": 1.1181936653634769e-05, "loss": 0.8153, "step": 78190 }, { "epoch": 13.292537820839708, "grad_norm": 14.923413276672363, "learning_rate": 1.117910363193382e-05, "loss": 1.167, "step": 78200 }, { "epoch": 13.294237633860275, "grad_norm": 10.277132987976074, "learning_rate": 1.1176270610232876e-05, "loss": 0.8448, "step": 78210 }, { "epoch": 13.295937446880844, "grad_norm": 12.314251899719238, "learning_rate": 1.1173437588531929e-05, "loss": 0.9222, "step": 78220 }, { "epoch": 13.29763725990141, "grad_norm": 15.025476455688477, "learning_rate": 1.1170604566830981e-05, "loss": 1.1004, "step": 78230 }, { "epoch": 13.299337072921979, "grad_norm": 12.396261215209961, "learning_rate": 1.1167771545130036e-05, "loss": 0.9387, "step": 78240 }, { "epoch": 13.301036885942546, "grad_norm": 14.077107429504395, "learning_rate": 1.116493852342909e-05, "loss": 1.0121, "step": 78250 }, { "epoch": 13.302736698963114, "grad_norm": 16.003875732421875, "learning_rate": 1.1162105501728143e-05, "loss": 1.0966, "step": 78260 }, { "epoch": 13.304436511983681, "grad_norm": 15.66146469116211, "learning_rate": 1.1159272480027197e-05, "loss": 1.1197, "step": 78270 }, { "epoch": 13.30613632500425, "grad_norm": 19.856468200683594, "learning_rate": 1.1156439458326252e-05, "loss": 0.8005, "step": 78280 }, { "epoch": 13.307836138024816, "grad_norm": 16.23740005493164, "learning_rate": 1.1153606436625304e-05, "loss": 1.0655, "step": 78290 }, { "epoch": 13.309535951045385, "grad_norm": 15.966751098632812, "learning_rate": 1.1150773414924359e-05, "loss": 0.943, "step": 78300 }, { "epoch": 13.311235764065954, "grad_norm": 14.080547332763672, "learning_rate": 1.1147940393223412e-05, "loss": 1.034, "step": 78310 }, { "epoch": 13.31293557708652, "grad_norm": 14.2871732711792, "learning_rate": 1.1145107371522466e-05, "loss": 0.7381, "step": 78320 }, { "epoch": 13.314635390107089, "grad_norm": 17.33774757385254, "learning_rate": 1.114227434982152e-05, "loss": 1.0122, "step": 78330 }, { "epoch": 13.316335203127656, "grad_norm": 14.254962921142578, "learning_rate": 1.1139441328120575e-05, "loss": 0.9887, "step": 78340 }, { "epoch": 13.318035016148224, "grad_norm": 16.246376037597656, "learning_rate": 1.1136608306419627e-05, "loss": 0.965, "step": 78350 }, { "epoch": 13.319734829168791, "grad_norm": 19.487470626831055, "learning_rate": 1.1133775284718682e-05, "loss": 0.886, "step": 78360 }, { "epoch": 13.32143464218936, "grad_norm": 12.582721710205078, "learning_rate": 1.1130942263017735e-05, "loss": 1.0458, "step": 78370 }, { "epoch": 13.323134455209926, "grad_norm": 12.799649238586426, "learning_rate": 1.1128109241316789e-05, "loss": 0.7937, "step": 78380 }, { "epoch": 13.324834268230495, "grad_norm": 12.547506332397461, "learning_rate": 1.1125276219615842e-05, "loss": 0.8946, "step": 78390 }, { "epoch": 13.326534081251062, "grad_norm": 23.921857833862305, "learning_rate": 1.1122443197914896e-05, "loss": 0.8468, "step": 78400 }, { "epoch": 13.32823389427163, "grad_norm": 16.23688507080078, "learning_rate": 1.111961017621395e-05, "loss": 1.1201, "step": 78410 }, { "epoch": 13.329933707292199, "grad_norm": 16.371009826660156, "learning_rate": 1.1116777154513003e-05, "loss": 0.8983, "step": 78420 }, { "epoch": 13.331633520312765, "grad_norm": 10.744795799255371, "learning_rate": 1.1113944132812058e-05, "loss": 0.7817, "step": 78430 }, { "epoch": 13.333333333333334, "grad_norm": 13.490425109863281, "learning_rate": 1.111111111111111e-05, "loss": 0.8827, "step": 78440 }, { "epoch": 13.3350331463539, "grad_norm": 15.155412673950195, "learning_rate": 1.1108278089410165e-05, "loss": 0.887, "step": 78450 }, { "epoch": 13.33673295937447, "grad_norm": 16.690101623535156, "learning_rate": 1.1105445067709219e-05, "loss": 0.8241, "step": 78460 }, { "epoch": 13.338432772395036, "grad_norm": 13.117632865905762, "learning_rate": 1.1102612046008274e-05, "loss": 0.756, "step": 78470 }, { "epoch": 13.340132585415605, "grad_norm": 22.331623077392578, "learning_rate": 1.1099779024307326e-05, "loss": 1.1189, "step": 78480 }, { "epoch": 13.341832398436171, "grad_norm": 16.728534698486328, "learning_rate": 1.1096946002606381e-05, "loss": 0.9552, "step": 78490 }, { "epoch": 13.34353221145674, "grad_norm": 11.533145904541016, "learning_rate": 1.1094112980905434e-05, "loss": 0.9396, "step": 78500 }, { "epoch": 13.345232024477308, "grad_norm": 14.998151779174805, "learning_rate": 1.1091279959204488e-05, "loss": 0.9564, "step": 78510 }, { "epoch": 13.346931837497875, "grad_norm": 18.446279525756836, "learning_rate": 1.1088446937503542e-05, "loss": 1.0204, "step": 78520 }, { "epoch": 13.348631650518444, "grad_norm": 13.835379600524902, "learning_rate": 1.1085613915802597e-05, "loss": 0.9312, "step": 78530 }, { "epoch": 13.35033146353901, "grad_norm": 20.609378814697266, "learning_rate": 1.1082780894101649e-05, "loss": 1.0266, "step": 78540 }, { "epoch": 13.352031276559579, "grad_norm": 9.08471393585205, "learning_rate": 1.1079947872400704e-05, "loss": 1.257, "step": 78550 }, { "epoch": 13.353731089580146, "grad_norm": 65.32064056396484, "learning_rate": 1.1077114850699757e-05, "loss": 1.2545, "step": 78560 }, { "epoch": 13.355430902600714, "grad_norm": 19.005264282226562, "learning_rate": 1.1074281828998809e-05, "loss": 0.9582, "step": 78570 }, { "epoch": 13.357130715621281, "grad_norm": 18.403188705444336, "learning_rate": 1.1071448807297864e-05, "loss": 1.047, "step": 78580 }, { "epoch": 13.35883052864185, "grad_norm": 14.522565841674805, "learning_rate": 1.1068615785596918e-05, "loss": 0.8719, "step": 78590 }, { "epoch": 13.360530341662416, "grad_norm": 14.247611045837402, "learning_rate": 1.1065782763895971e-05, "loss": 0.8846, "step": 78600 }, { "epoch": 13.362230154682985, "grad_norm": 14.306015014648438, "learning_rate": 1.1062949742195025e-05, "loss": 0.7211, "step": 78610 }, { "epoch": 13.363929967703553, "grad_norm": 18.776817321777344, "learning_rate": 1.106011672049408e-05, "loss": 0.9809, "step": 78620 }, { "epoch": 13.36562978072412, "grad_norm": 15.484517097473145, "learning_rate": 1.1057283698793132e-05, "loss": 0.7816, "step": 78630 }, { "epoch": 13.367329593744689, "grad_norm": 18.46607780456543, "learning_rate": 1.1054450677092187e-05, "loss": 0.9862, "step": 78640 }, { "epoch": 13.369029406765256, "grad_norm": 16.73624038696289, "learning_rate": 1.105161765539124e-05, "loss": 0.8993, "step": 78650 }, { "epoch": 13.370729219785824, "grad_norm": 12.358243942260742, "learning_rate": 1.1048784633690294e-05, "loss": 0.9358, "step": 78660 }, { "epoch": 13.37242903280639, "grad_norm": 12.732717514038086, "learning_rate": 1.1045951611989348e-05, "loss": 0.8396, "step": 78670 }, { "epoch": 13.37412884582696, "grad_norm": 12.356334686279297, "learning_rate": 1.1043118590288403e-05, "loss": 0.9381, "step": 78680 }, { "epoch": 13.375828658847526, "grad_norm": 16.734783172607422, "learning_rate": 1.1040285568587455e-05, "loss": 0.9111, "step": 78690 }, { "epoch": 13.377528471868095, "grad_norm": 13.208216667175293, "learning_rate": 1.103745254688651e-05, "loss": 0.9904, "step": 78700 }, { "epoch": 13.379228284888661, "grad_norm": 15.333375930786133, "learning_rate": 1.1034619525185564e-05, "loss": 0.8831, "step": 78710 }, { "epoch": 13.38092809790923, "grad_norm": 13.06353759765625, "learning_rate": 1.1031786503484617e-05, "loss": 0.8957, "step": 78720 }, { "epoch": 13.382627910929799, "grad_norm": 19.79833984375, "learning_rate": 1.102895348178367e-05, "loss": 0.9291, "step": 78730 }, { "epoch": 13.384327723950365, "grad_norm": 12.450462341308594, "learning_rate": 1.1026120460082726e-05, "loss": 0.8983, "step": 78740 }, { "epoch": 13.386027536970934, "grad_norm": 27.522924423217773, "learning_rate": 1.1023287438381778e-05, "loss": 1.0711, "step": 78750 }, { "epoch": 13.3877273499915, "grad_norm": 13.53245735168457, "learning_rate": 1.1020454416680831e-05, "loss": 1.0243, "step": 78760 }, { "epoch": 13.38942716301207, "grad_norm": 26.29836082458496, "learning_rate": 1.1017621394979886e-05, "loss": 0.9818, "step": 78770 }, { "epoch": 13.391126976032636, "grad_norm": 12.818188667297363, "learning_rate": 1.1014788373278938e-05, "loss": 0.937, "step": 78780 }, { "epoch": 13.392826789053204, "grad_norm": 14.836273193359375, "learning_rate": 1.1011955351577993e-05, "loss": 0.9352, "step": 78790 }, { "epoch": 13.394526602073771, "grad_norm": 18.67325210571289, "learning_rate": 1.1009122329877047e-05, "loss": 0.9413, "step": 78800 }, { "epoch": 13.39622641509434, "grad_norm": 11.347458839416504, "learning_rate": 1.10062893081761e-05, "loss": 1.0413, "step": 78810 }, { "epoch": 13.397926228114907, "grad_norm": 13.097906112670898, "learning_rate": 1.1003456286475154e-05, "loss": 0.9073, "step": 78820 }, { "epoch": 13.399626041135475, "grad_norm": 17.34092903137207, "learning_rate": 1.100062326477421e-05, "loss": 0.8743, "step": 78830 }, { "epoch": 13.401325854156044, "grad_norm": 40.98939514160156, "learning_rate": 1.0997790243073263e-05, "loss": 0.9757, "step": 78840 }, { "epoch": 13.40302566717661, "grad_norm": 11.601373672485352, "learning_rate": 1.0994957221372316e-05, "loss": 0.75, "step": 78850 }, { "epoch": 13.404725480197179, "grad_norm": 11.824304580688477, "learning_rate": 1.099212419967137e-05, "loss": 0.9254, "step": 78860 }, { "epoch": 13.406425293217746, "grad_norm": 17.559823989868164, "learning_rate": 1.0989291177970425e-05, "loss": 0.8087, "step": 78870 }, { "epoch": 13.408125106238314, "grad_norm": 11.129918098449707, "learning_rate": 1.0986458156269477e-05, "loss": 0.9391, "step": 78880 }, { "epoch": 13.409824919258881, "grad_norm": 11.173718452453613, "learning_rate": 1.0983625134568532e-05, "loss": 1.0822, "step": 78890 }, { "epoch": 13.41152473227945, "grad_norm": 12.669007301330566, "learning_rate": 1.0980792112867586e-05, "loss": 1.0722, "step": 78900 }, { "epoch": 13.413224545300016, "grad_norm": 16.115863800048828, "learning_rate": 1.0977959091166639e-05, "loss": 1.0262, "step": 78910 }, { "epoch": 13.414924358320585, "grad_norm": 19.486875534057617, "learning_rate": 1.0975126069465693e-05, "loss": 0.9097, "step": 78920 }, { "epoch": 13.416624171341152, "grad_norm": 18.84023094177246, "learning_rate": 1.0972293047764746e-05, "loss": 1.1108, "step": 78930 }, { "epoch": 13.41832398436172, "grad_norm": 15.548344612121582, "learning_rate": 1.09694600260638e-05, "loss": 0.8991, "step": 78940 }, { "epoch": 13.420023797382289, "grad_norm": 13.278189659118652, "learning_rate": 1.0966627004362853e-05, "loss": 1.0438, "step": 78950 }, { "epoch": 13.421723610402855, "grad_norm": 15.740047454833984, "learning_rate": 1.0963793982661908e-05, "loss": 0.7998, "step": 78960 }, { "epoch": 13.423423423423424, "grad_norm": 18.926374435424805, "learning_rate": 1.096096096096096e-05, "loss": 0.89, "step": 78970 }, { "epoch": 13.42512323644399, "grad_norm": 21.64743423461914, "learning_rate": 1.0958127939260015e-05, "loss": 0.9151, "step": 78980 }, { "epoch": 13.42682304946456, "grad_norm": 12.642642974853516, "learning_rate": 1.0955294917559069e-05, "loss": 0.939, "step": 78990 }, { "epoch": 13.428522862485126, "grad_norm": 15.080862998962402, "learning_rate": 1.0952461895858122e-05, "loss": 0.948, "step": 79000 }, { "epoch": 13.430222675505695, "grad_norm": 11.801422119140625, "learning_rate": 1.0949628874157176e-05, "loss": 0.9628, "step": 79010 }, { "epoch": 13.431922488526261, "grad_norm": 295.65277099609375, "learning_rate": 1.0946795852456231e-05, "loss": 1.0909, "step": 79020 }, { "epoch": 13.43362230154683, "grad_norm": 26.62310028076172, "learning_rate": 1.0943962830755283e-05, "loss": 0.8708, "step": 79030 }, { "epoch": 13.435322114567398, "grad_norm": 17.406274795532227, "learning_rate": 1.0941129809054338e-05, "loss": 0.8515, "step": 79040 }, { "epoch": 13.437021927587965, "grad_norm": 13.203999519348145, "learning_rate": 1.0938296787353392e-05, "loss": 0.972, "step": 79050 }, { "epoch": 13.438721740608534, "grad_norm": 13.1840181350708, "learning_rate": 1.0935463765652445e-05, "loss": 0.9003, "step": 79060 }, { "epoch": 13.4404215536291, "grad_norm": 16.35263442993164, "learning_rate": 1.0932630743951499e-05, "loss": 0.9999, "step": 79070 }, { "epoch": 13.442121366649669, "grad_norm": 13.941588401794434, "learning_rate": 1.0929797722250554e-05, "loss": 0.9802, "step": 79080 }, { "epoch": 13.443821179670236, "grad_norm": 11.385626792907715, "learning_rate": 1.0926964700549606e-05, "loss": 0.9101, "step": 79090 }, { "epoch": 13.445520992690804, "grad_norm": 16.32146453857422, "learning_rate": 1.092413167884866e-05, "loss": 0.8567, "step": 79100 }, { "epoch": 13.447220805711371, "grad_norm": 15.456405639648438, "learning_rate": 1.0921298657147715e-05, "loss": 0.9694, "step": 79110 }, { "epoch": 13.44892061873194, "grad_norm": 16.65252685546875, "learning_rate": 1.0918465635446766e-05, "loss": 0.9567, "step": 79120 }, { "epoch": 13.450620431752506, "grad_norm": 14.926312446594238, "learning_rate": 1.0915632613745822e-05, "loss": 0.9166, "step": 79130 }, { "epoch": 13.452320244773075, "grad_norm": 15.613080024719238, "learning_rate": 1.0912799592044875e-05, "loss": 1.0898, "step": 79140 }, { "epoch": 13.454020057793644, "grad_norm": 12.607086181640625, "learning_rate": 1.0909966570343929e-05, "loss": 0.9919, "step": 79150 }, { "epoch": 13.45571987081421, "grad_norm": 13.79865550994873, "learning_rate": 1.0907133548642982e-05, "loss": 1.1176, "step": 79160 }, { "epoch": 13.457419683834779, "grad_norm": 13.692291259765625, "learning_rate": 1.0904300526942037e-05, "loss": 0.8921, "step": 79170 }, { "epoch": 13.459119496855346, "grad_norm": 13.212871551513672, "learning_rate": 1.090146750524109e-05, "loss": 0.9046, "step": 79180 }, { "epoch": 13.460819309875914, "grad_norm": 13.547127723693848, "learning_rate": 1.0898634483540144e-05, "loss": 0.9627, "step": 79190 }, { "epoch": 13.462519122896481, "grad_norm": 14.345803260803223, "learning_rate": 1.0895801461839198e-05, "loss": 0.7526, "step": 79200 }, { "epoch": 13.46421893591705, "grad_norm": 13.337382316589355, "learning_rate": 1.0892968440138253e-05, "loss": 0.9884, "step": 79210 }, { "epoch": 13.465918748937616, "grad_norm": 22.537425994873047, "learning_rate": 1.0890135418437305e-05, "loss": 0.9016, "step": 79220 }, { "epoch": 13.467618561958185, "grad_norm": 15.211652755737305, "learning_rate": 1.088730239673636e-05, "loss": 1.0133, "step": 79230 }, { "epoch": 13.469318374978752, "grad_norm": 30.524824142456055, "learning_rate": 1.0884469375035414e-05, "loss": 0.8952, "step": 79240 }, { "epoch": 13.47101818799932, "grad_norm": 12.379261016845703, "learning_rate": 1.0881636353334467e-05, "loss": 0.8583, "step": 79250 }, { "epoch": 13.472718001019889, "grad_norm": 11.95047664642334, "learning_rate": 1.087880333163352e-05, "loss": 0.7752, "step": 79260 }, { "epoch": 13.474417814040455, "grad_norm": 16.895206451416016, "learning_rate": 1.0875970309932574e-05, "loss": 1.0514, "step": 79270 }, { "epoch": 13.476117627061024, "grad_norm": 15.349939346313477, "learning_rate": 1.0873137288231628e-05, "loss": 0.8983, "step": 79280 }, { "epoch": 13.47781744008159, "grad_norm": 13.1223783493042, "learning_rate": 1.0870304266530681e-05, "loss": 0.8455, "step": 79290 }, { "epoch": 13.47951725310216, "grad_norm": 12.116067886352539, "learning_rate": 1.0867471244829737e-05, "loss": 0.8389, "step": 79300 }, { "epoch": 13.481217066122726, "grad_norm": 12.108636856079102, "learning_rate": 1.0864638223128788e-05, "loss": 0.9012, "step": 79310 }, { "epoch": 13.482916879143295, "grad_norm": 10.64647102355957, "learning_rate": 1.0861805201427844e-05, "loss": 0.8747, "step": 79320 }, { "epoch": 13.484616692163861, "grad_norm": 17.564212799072266, "learning_rate": 1.0858972179726897e-05, "loss": 0.7992, "step": 79330 }, { "epoch": 13.48631650518443, "grad_norm": 13.911291122436523, "learning_rate": 1.085613915802595e-05, "loss": 1.1306, "step": 79340 }, { "epoch": 13.488016318204998, "grad_norm": 14.5238618850708, "learning_rate": 1.0853306136325004e-05, "loss": 1.0974, "step": 79350 }, { "epoch": 13.489716131225565, "grad_norm": 11.923513412475586, "learning_rate": 1.085047311462406e-05, "loss": 0.9344, "step": 79360 }, { "epoch": 13.491415944246134, "grad_norm": 14.80221176147461, "learning_rate": 1.0847640092923111e-05, "loss": 0.9645, "step": 79370 }, { "epoch": 13.4931157572667, "grad_norm": 12.408084869384766, "learning_rate": 1.0844807071222166e-05, "loss": 0.9899, "step": 79380 }, { "epoch": 13.494815570287269, "grad_norm": 14.862682342529297, "learning_rate": 1.084197404952122e-05, "loss": 0.9327, "step": 79390 }, { "epoch": 13.496515383307836, "grad_norm": 20.100452423095703, "learning_rate": 1.0839141027820273e-05, "loss": 0.882, "step": 79400 }, { "epoch": 13.498215196328404, "grad_norm": 17.345842361450195, "learning_rate": 1.0836308006119327e-05, "loss": 0.725, "step": 79410 }, { "epoch": 13.499915009348971, "grad_norm": 13.069602966308594, "learning_rate": 1.0833474984418382e-05, "loss": 0.9867, "step": 79420 }, { "epoch": 13.50161482236954, "grad_norm": 14.624733924865723, "learning_rate": 1.0830641962717434e-05, "loss": 0.9995, "step": 79430 }, { "epoch": 13.503314635390106, "grad_norm": 13.463871002197266, "learning_rate": 1.0827808941016488e-05, "loss": 0.8449, "step": 79440 }, { "epoch": 13.505014448410675, "grad_norm": 10.923924446105957, "learning_rate": 1.0824975919315543e-05, "loss": 0.9088, "step": 79450 }, { "epoch": 13.506714261431242, "grad_norm": 28.570697784423828, "learning_rate": 1.0822142897614595e-05, "loss": 0.9571, "step": 79460 }, { "epoch": 13.50841407445181, "grad_norm": 18.598581314086914, "learning_rate": 1.081930987591365e-05, "loss": 1.0263, "step": 79470 }, { "epoch": 13.510113887472379, "grad_norm": 14.451478004455566, "learning_rate": 1.0816476854212703e-05, "loss": 0.9782, "step": 79480 }, { "epoch": 13.511813700492946, "grad_norm": 14.569621086120605, "learning_rate": 1.0813643832511757e-05, "loss": 0.9383, "step": 79490 }, { "epoch": 13.513513513513514, "grad_norm": 14.83509635925293, "learning_rate": 1.081081081081081e-05, "loss": 0.8202, "step": 79500 }, { "epoch": 13.51521332653408, "grad_norm": 13.571779251098633, "learning_rate": 1.0807977789109866e-05, "loss": 0.9422, "step": 79510 }, { "epoch": 13.51691313955465, "grad_norm": 21.014402389526367, "learning_rate": 1.0805144767408917e-05, "loss": 0.9898, "step": 79520 }, { "epoch": 13.518612952575216, "grad_norm": 11.46825885772705, "learning_rate": 1.0802311745707973e-05, "loss": 0.9206, "step": 79530 }, { "epoch": 13.520312765595785, "grad_norm": 17.232830047607422, "learning_rate": 1.0799478724007026e-05, "loss": 0.9107, "step": 79540 }, { "epoch": 13.522012578616351, "grad_norm": 175.86105346679688, "learning_rate": 1.079664570230608e-05, "loss": 0.8506, "step": 79550 }, { "epoch": 13.52371239163692, "grad_norm": 25.40204429626465, "learning_rate": 1.0793812680605133e-05, "loss": 1.0133, "step": 79560 }, { "epoch": 13.525412204657489, "grad_norm": 14.393264770507812, "learning_rate": 1.0790979658904188e-05, "loss": 1.0226, "step": 79570 }, { "epoch": 13.527112017678055, "grad_norm": 13.969400405883789, "learning_rate": 1.0788146637203242e-05, "loss": 0.765, "step": 79580 }, { "epoch": 13.528811830698624, "grad_norm": 15.929036140441895, "learning_rate": 1.0785313615502295e-05, "loss": 0.804, "step": 79590 }, { "epoch": 13.53051164371919, "grad_norm": 20.80450439453125, "learning_rate": 1.0782480593801349e-05, "loss": 0.9268, "step": 79600 }, { "epoch": 13.53221145673976, "grad_norm": 12.692366600036621, "learning_rate": 1.0779647572100402e-05, "loss": 0.8903, "step": 79610 }, { "epoch": 13.533911269760326, "grad_norm": 13.280750274658203, "learning_rate": 1.0776814550399456e-05, "loss": 0.9572, "step": 79620 }, { "epoch": 13.535611082780894, "grad_norm": 14.93150806427002, "learning_rate": 1.077398152869851e-05, "loss": 0.7485, "step": 79630 }, { "epoch": 13.537310895801461, "grad_norm": 14.02898120880127, "learning_rate": 1.0771148506997565e-05, "loss": 1.1077, "step": 79640 }, { "epoch": 13.53901070882203, "grad_norm": 14.103907585144043, "learning_rate": 1.0768315485296617e-05, "loss": 0.9777, "step": 79650 }, { "epoch": 13.540710521842596, "grad_norm": 11.905268669128418, "learning_rate": 1.0765482463595672e-05, "loss": 1.011, "step": 79660 }, { "epoch": 13.542410334863165, "grad_norm": 19.548992156982422, "learning_rate": 1.0762649441894725e-05, "loss": 0.8376, "step": 79670 }, { "epoch": 13.544110147883734, "grad_norm": 12.930939674377441, "learning_rate": 1.0759816420193779e-05, "loss": 1.0148, "step": 79680 }, { "epoch": 13.5458099609043, "grad_norm": 11.852219581604004, "learning_rate": 1.0756983398492832e-05, "loss": 0.9109, "step": 79690 }, { "epoch": 13.547509773924869, "grad_norm": 16.114492416381836, "learning_rate": 1.0754150376791888e-05, "loss": 0.7876, "step": 79700 }, { "epoch": 13.549209586945436, "grad_norm": 16.554655075073242, "learning_rate": 1.075131735509094e-05, "loss": 0.9403, "step": 79710 }, { "epoch": 13.550909399966004, "grad_norm": 10.465088844299316, "learning_rate": 1.0748484333389995e-05, "loss": 0.9773, "step": 79720 }, { "epoch": 13.552609212986571, "grad_norm": 14.013542175292969, "learning_rate": 1.0745651311689048e-05, "loss": 0.8787, "step": 79730 }, { "epoch": 13.55430902600714, "grad_norm": 20.03026008605957, "learning_rate": 1.0742818289988102e-05, "loss": 0.81, "step": 79740 }, { "epoch": 13.556008839027706, "grad_norm": 21.250978469848633, "learning_rate": 1.0739985268287155e-05, "loss": 0.9167, "step": 79750 }, { "epoch": 13.557708652048275, "grad_norm": 11.950023651123047, "learning_rate": 1.073715224658621e-05, "loss": 0.8046, "step": 79760 }, { "epoch": 13.559408465068842, "grad_norm": 12.388569831848145, "learning_rate": 1.0734319224885262e-05, "loss": 1.0274, "step": 79770 }, { "epoch": 13.56110827808941, "grad_norm": 16.066396713256836, "learning_rate": 1.0731486203184316e-05, "loss": 0.8016, "step": 79780 }, { "epoch": 13.562808091109979, "grad_norm": 9.924904823303223, "learning_rate": 1.0728653181483371e-05, "loss": 1.0819, "step": 79790 }, { "epoch": 13.564507904130545, "grad_norm": 10.965779304504395, "learning_rate": 1.0725820159782423e-05, "loss": 1.0389, "step": 79800 }, { "epoch": 13.566207717151114, "grad_norm": 15.59056568145752, "learning_rate": 1.0722987138081478e-05, "loss": 0.7473, "step": 79810 }, { "epoch": 13.56790753017168, "grad_norm": 13.10685920715332, "learning_rate": 1.0720154116380532e-05, "loss": 0.869, "step": 79820 }, { "epoch": 13.56960734319225, "grad_norm": 12.217353820800781, "learning_rate": 1.0717321094679585e-05, "loss": 0.9988, "step": 79830 }, { "epoch": 13.571307156212816, "grad_norm": 15.33526611328125, "learning_rate": 1.0714488072978639e-05, "loss": 0.8421, "step": 79840 }, { "epoch": 13.573006969233385, "grad_norm": 15.699559211730957, "learning_rate": 1.0711655051277694e-05, "loss": 0.8835, "step": 79850 }, { "epoch": 13.574706782253951, "grad_norm": 13.244500160217285, "learning_rate": 1.0708822029576746e-05, "loss": 0.8309, "step": 79860 }, { "epoch": 13.57640659527452, "grad_norm": 16.65557098388672, "learning_rate": 1.07059890078758e-05, "loss": 0.9261, "step": 79870 }, { "epoch": 13.578106408295088, "grad_norm": 14.428457260131836, "learning_rate": 1.0703155986174854e-05, "loss": 0.9553, "step": 79880 }, { "epoch": 13.579806221315655, "grad_norm": 14.05221176147461, "learning_rate": 1.0700322964473908e-05, "loss": 0.8841, "step": 79890 }, { "epoch": 13.581506034336224, "grad_norm": 12.21493911743164, "learning_rate": 1.0697489942772961e-05, "loss": 1.0393, "step": 79900 }, { "epoch": 13.58320584735679, "grad_norm": 13.489249229431152, "learning_rate": 1.0694656921072017e-05, "loss": 0.9227, "step": 79910 }, { "epoch": 13.584905660377359, "grad_norm": 9.937222480773926, "learning_rate": 1.069182389937107e-05, "loss": 0.7038, "step": 79920 }, { "epoch": 13.586605473397926, "grad_norm": 13.177984237670898, "learning_rate": 1.0688990877670124e-05, "loss": 1.1119, "step": 79930 }, { "epoch": 13.588305286418494, "grad_norm": 13.794984817504883, "learning_rate": 1.0686157855969177e-05, "loss": 0.9576, "step": 79940 }, { "epoch": 13.590005099439061, "grad_norm": 13.519796371459961, "learning_rate": 1.0683324834268232e-05, "loss": 0.9637, "step": 79950 }, { "epoch": 13.59170491245963, "grad_norm": 19.251100540161133, "learning_rate": 1.0680491812567284e-05, "loss": 0.9842, "step": 79960 }, { "epoch": 13.593404725480196, "grad_norm": 11.598055839538574, "learning_rate": 1.0677658790866338e-05, "loss": 0.8958, "step": 79970 }, { "epoch": 13.595104538500765, "grad_norm": 16.3624324798584, "learning_rate": 1.0674825769165393e-05, "loss": 1.0489, "step": 79980 }, { "epoch": 13.596804351521332, "grad_norm": 17.915695190429688, "learning_rate": 1.0671992747464445e-05, "loss": 1.2965, "step": 79990 }, { "epoch": 13.5985041645419, "grad_norm": 14.287474632263184, "learning_rate": 1.06691597257635e-05, "loss": 0.9278, "step": 80000 }, { "epoch": 13.600203977562469, "grad_norm": 11.76931381225586, "learning_rate": 1.0666326704062554e-05, "loss": 0.9725, "step": 80010 }, { "epoch": 13.601903790583036, "grad_norm": 13.003822326660156, "learning_rate": 1.0663493682361607e-05, "loss": 0.9309, "step": 80020 }, { "epoch": 13.603603603603604, "grad_norm": 11.909933090209961, "learning_rate": 1.066066066066066e-05, "loss": 0.9249, "step": 80030 }, { "epoch": 13.60530341662417, "grad_norm": 21.485301971435547, "learning_rate": 1.0657827638959716e-05, "loss": 0.9099, "step": 80040 }, { "epoch": 13.60700322964474, "grad_norm": 12.766404151916504, "learning_rate": 1.0654994617258768e-05, "loss": 1.0633, "step": 80050 }, { "epoch": 13.608703042665306, "grad_norm": 14.600138664245605, "learning_rate": 1.0652161595557823e-05, "loss": 0.9646, "step": 80060 }, { "epoch": 13.610402855685875, "grad_norm": 12.941595077514648, "learning_rate": 1.0649328573856876e-05, "loss": 1.0228, "step": 80070 }, { "epoch": 13.612102668706441, "grad_norm": 14.939441680908203, "learning_rate": 1.064649555215593e-05, "loss": 1.1091, "step": 80080 }, { "epoch": 13.61380248172701, "grad_norm": 15.10749626159668, "learning_rate": 1.0643662530454983e-05, "loss": 1.1501, "step": 80090 }, { "epoch": 13.615502294747579, "grad_norm": 15.38902759552002, "learning_rate": 1.0640829508754039e-05, "loss": 0.946, "step": 80100 }, { "epoch": 13.617202107768145, "grad_norm": 29.616188049316406, "learning_rate": 1.063799648705309e-05, "loss": 0.8793, "step": 80110 }, { "epoch": 13.618901920788714, "grad_norm": 14.317166328430176, "learning_rate": 1.0635163465352146e-05, "loss": 0.9296, "step": 80120 }, { "epoch": 13.62060173380928, "grad_norm": 16.452892303466797, "learning_rate": 1.06323304436512e-05, "loss": 0.9788, "step": 80130 }, { "epoch": 13.62230154682985, "grad_norm": 14.00869369506836, "learning_rate": 1.0629497421950251e-05, "loss": 0.9172, "step": 80140 }, { "epoch": 13.624001359850416, "grad_norm": 17.71626091003418, "learning_rate": 1.0626664400249306e-05, "loss": 0.8907, "step": 80150 }, { "epoch": 13.625701172870984, "grad_norm": 14.387921333312988, "learning_rate": 1.062383137854836e-05, "loss": 0.9759, "step": 80160 }, { "epoch": 13.627400985891551, "grad_norm": 13.603713035583496, "learning_rate": 1.0620998356847413e-05, "loss": 0.9514, "step": 80170 }, { "epoch": 13.62910079891212, "grad_norm": 14.405658721923828, "learning_rate": 1.0618165335146467e-05, "loss": 0.8292, "step": 80180 }, { "epoch": 13.630800611932688, "grad_norm": 13.155734062194824, "learning_rate": 1.0615332313445522e-05, "loss": 0.8408, "step": 80190 }, { "epoch": 13.632500424953255, "grad_norm": 14.26158618927002, "learning_rate": 1.0612499291744574e-05, "loss": 1.0373, "step": 80200 }, { "epoch": 13.634200237973824, "grad_norm": 16.663257598876953, "learning_rate": 1.0609666270043629e-05, "loss": 0.9824, "step": 80210 }, { "epoch": 13.63590005099439, "grad_norm": 16.149715423583984, "learning_rate": 1.0606833248342683e-05, "loss": 1.0052, "step": 80220 }, { "epoch": 13.637599864014959, "grad_norm": 11.640251159667969, "learning_rate": 1.0604000226641736e-05, "loss": 1.1209, "step": 80230 }, { "epoch": 13.639299677035526, "grad_norm": 16.741239547729492, "learning_rate": 1.060116720494079e-05, "loss": 0.9568, "step": 80240 }, { "epoch": 13.640999490056094, "grad_norm": 13.697951316833496, "learning_rate": 1.0598334183239845e-05, "loss": 1.04, "step": 80250 }, { "epoch": 13.642699303076661, "grad_norm": 16.24150276184082, "learning_rate": 1.0595501161538897e-05, "loss": 0.8538, "step": 80260 }, { "epoch": 13.64439911609723, "grad_norm": 15.107775688171387, "learning_rate": 1.0592668139837952e-05, "loss": 1.0405, "step": 80270 }, { "epoch": 13.646098929117796, "grad_norm": 9.391400337219238, "learning_rate": 1.0589835118137005e-05, "loss": 0.9807, "step": 80280 }, { "epoch": 13.647798742138365, "grad_norm": 16.972993850708008, "learning_rate": 1.0587002096436059e-05, "loss": 0.8818, "step": 80290 }, { "epoch": 13.649498555158932, "grad_norm": 16.368614196777344, "learning_rate": 1.0584169074735112e-05, "loss": 0.8248, "step": 80300 }, { "epoch": 13.6511983681795, "grad_norm": 16.45641326904297, "learning_rate": 1.0581336053034166e-05, "loss": 0.831, "step": 80310 }, { "epoch": 13.652898181200069, "grad_norm": 11.90392017364502, "learning_rate": 1.0578503031333221e-05, "loss": 0.7062, "step": 80320 }, { "epoch": 13.654597994220635, "grad_norm": 18.697160720825195, "learning_rate": 1.0575670009632273e-05, "loss": 0.924, "step": 80330 }, { "epoch": 13.656297807241204, "grad_norm": 14.49940299987793, "learning_rate": 1.0572836987931328e-05, "loss": 0.8143, "step": 80340 }, { "epoch": 13.65799762026177, "grad_norm": 13.024507522583008, "learning_rate": 1.0570003966230382e-05, "loss": 0.8224, "step": 80350 }, { "epoch": 13.65969743328234, "grad_norm": 17.41438865661621, "learning_rate": 1.0567170944529435e-05, "loss": 1.1474, "step": 80360 }, { "epoch": 13.661397246302906, "grad_norm": 16.5162410736084, "learning_rate": 1.0564337922828489e-05, "loss": 0.9328, "step": 80370 }, { "epoch": 13.663097059323475, "grad_norm": 24.85085678100586, "learning_rate": 1.0561504901127544e-05, "loss": 0.8945, "step": 80380 }, { "epoch": 13.664796872344041, "grad_norm": 12.561933517456055, "learning_rate": 1.0558671879426596e-05, "loss": 0.9964, "step": 80390 }, { "epoch": 13.66649668536461, "grad_norm": 14.291749954223633, "learning_rate": 1.0555838857725651e-05, "loss": 1.0191, "step": 80400 }, { "epoch": 13.668196498385178, "grad_norm": 15.37915325164795, "learning_rate": 1.0553005836024705e-05, "loss": 0.9113, "step": 80410 }, { "epoch": 13.669896311405745, "grad_norm": 17.1910343170166, "learning_rate": 1.0550172814323758e-05, "loss": 0.929, "step": 80420 }, { "epoch": 13.671596124426314, "grad_norm": 10.604166984558105, "learning_rate": 1.0547339792622812e-05, "loss": 1.1944, "step": 80430 }, { "epoch": 13.67329593744688, "grad_norm": 14.353262901306152, "learning_rate": 1.0544506770921867e-05, "loss": 1.0549, "step": 80440 }, { "epoch": 13.674995750467449, "grad_norm": 18.51409149169922, "learning_rate": 1.0541673749220919e-05, "loss": 0.8863, "step": 80450 }, { "epoch": 13.676695563488016, "grad_norm": 13.7403564453125, "learning_rate": 1.0538840727519974e-05, "loss": 0.7921, "step": 80460 }, { "epoch": 13.678395376508584, "grad_norm": 22.94866180419922, "learning_rate": 1.0536007705819027e-05, "loss": 0.8918, "step": 80470 }, { "epoch": 13.680095189529151, "grad_norm": 15.096543312072754, "learning_rate": 1.053317468411808e-05, "loss": 0.8943, "step": 80480 }, { "epoch": 13.68179500254972, "grad_norm": 13.289900779724121, "learning_rate": 1.0530341662417134e-05, "loss": 0.9409, "step": 80490 }, { "epoch": 13.683494815570286, "grad_norm": 14.803690910339355, "learning_rate": 1.0527508640716188e-05, "loss": 0.8415, "step": 80500 }, { "epoch": 13.685194628590855, "grad_norm": 12.761484146118164, "learning_rate": 1.0524675619015241e-05, "loss": 0.9433, "step": 80510 }, { "epoch": 13.686894441611424, "grad_norm": 11.8666410446167, "learning_rate": 1.0521842597314295e-05, "loss": 1.0442, "step": 80520 }, { "epoch": 13.68859425463199, "grad_norm": 17.503305435180664, "learning_rate": 1.051900957561335e-05, "loss": 1.03, "step": 80530 }, { "epoch": 13.690294067652559, "grad_norm": 14.577932357788086, "learning_rate": 1.0516176553912402e-05, "loss": 0.9693, "step": 80540 }, { "epoch": 13.691993880673126, "grad_norm": 16.527801513671875, "learning_rate": 1.0513343532211457e-05, "loss": 0.8321, "step": 80550 }, { "epoch": 13.693693693693694, "grad_norm": 16.584257125854492, "learning_rate": 1.051051051051051e-05, "loss": 1.038, "step": 80560 }, { "epoch": 13.695393506714261, "grad_norm": 20.212034225463867, "learning_rate": 1.0507677488809564e-05, "loss": 1.0128, "step": 80570 }, { "epoch": 13.69709331973483, "grad_norm": 10.397830963134766, "learning_rate": 1.0504844467108618e-05, "loss": 1.0634, "step": 80580 }, { "epoch": 13.698793132755396, "grad_norm": 14.298460960388184, "learning_rate": 1.0502011445407673e-05, "loss": 0.9191, "step": 80590 }, { "epoch": 13.700492945775965, "grad_norm": 21.631515502929688, "learning_rate": 1.0499178423706725e-05, "loss": 0.9214, "step": 80600 }, { "epoch": 13.702192758796532, "grad_norm": 10.592997550964355, "learning_rate": 1.049634540200578e-05, "loss": 0.8565, "step": 80610 }, { "epoch": 13.7038925718171, "grad_norm": 16.26556396484375, "learning_rate": 1.0493512380304834e-05, "loss": 0.847, "step": 80620 }, { "epoch": 13.705592384837669, "grad_norm": 15.925495147705078, "learning_rate": 1.0490679358603887e-05, "loss": 0.9177, "step": 80630 }, { "epoch": 13.707292197858235, "grad_norm": 14.065702438354492, "learning_rate": 1.048784633690294e-05, "loss": 0.9339, "step": 80640 }, { "epoch": 13.708992010878804, "grad_norm": 12.592752456665039, "learning_rate": 1.0485013315201994e-05, "loss": 1.1444, "step": 80650 }, { "epoch": 13.71069182389937, "grad_norm": 16.21600341796875, "learning_rate": 1.048218029350105e-05, "loss": 0.957, "step": 80660 }, { "epoch": 13.71239163691994, "grad_norm": 12.602349281311035, "learning_rate": 1.0479347271800101e-05, "loss": 0.8557, "step": 80670 }, { "epoch": 13.714091449940506, "grad_norm": 13.019012451171875, "learning_rate": 1.0476514250099156e-05, "loss": 0.9456, "step": 80680 }, { "epoch": 13.715791262961075, "grad_norm": 17.698936462402344, "learning_rate": 1.047368122839821e-05, "loss": 1.043, "step": 80690 }, { "epoch": 13.717491075981641, "grad_norm": 15.087394714355469, "learning_rate": 1.0470848206697263e-05, "loss": 0.7978, "step": 80700 }, { "epoch": 13.71919088900221, "grad_norm": 13.499581336975098, "learning_rate": 1.0468015184996317e-05, "loss": 0.9846, "step": 80710 }, { "epoch": 13.720890702022778, "grad_norm": 14.496245384216309, "learning_rate": 1.0465182163295372e-05, "loss": 1.0059, "step": 80720 }, { "epoch": 13.722590515043345, "grad_norm": 17.11132049560547, "learning_rate": 1.0462349141594424e-05, "loss": 0.7933, "step": 80730 }, { "epoch": 13.724290328063914, "grad_norm": 22.434335708618164, "learning_rate": 1.045951611989348e-05, "loss": 0.9294, "step": 80740 }, { "epoch": 13.72599014108448, "grad_norm": 17.226856231689453, "learning_rate": 1.0456683098192533e-05, "loss": 0.974, "step": 80750 }, { "epoch": 13.727689954105049, "grad_norm": 16.889888763427734, "learning_rate": 1.0453850076491586e-05, "loss": 0.9186, "step": 80760 }, { "epoch": 13.729389767125616, "grad_norm": 17.932619094848633, "learning_rate": 1.045101705479064e-05, "loss": 1.0284, "step": 80770 }, { "epoch": 13.731089580146184, "grad_norm": 10.332504272460938, "learning_rate": 1.0448184033089695e-05, "loss": 0.8829, "step": 80780 }, { "epoch": 13.732789393166751, "grad_norm": 14.25515365600586, "learning_rate": 1.0445351011388747e-05, "loss": 0.9446, "step": 80790 }, { "epoch": 13.73448920618732, "grad_norm": 12.648563385009766, "learning_rate": 1.0442517989687802e-05, "loss": 1.0716, "step": 80800 }, { "epoch": 13.736189019207886, "grad_norm": 15.738703727722168, "learning_rate": 1.0439684967986856e-05, "loss": 0.8283, "step": 80810 }, { "epoch": 13.737888832228455, "grad_norm": 12.914507865905762, "learning_rate": 1.0436851946285907e-05, "loss": 1.1226, "step": 80820 }, { "epoch": 13.739588645249022, "grad_norm": 18.215566635131836, "learning_rate": 1.0434018924584963e-05, "loss": 1.0627, "step": 80830 }, { "epoch": 13.74128845826959, "grad_norm": 13.990236282348633, "learning_rate": 1.0431185902884016e-05, "loss": 0.9208, "step": 80840 }, { "epoch": 13.742988271290159, "grad_norm": 12.958571434020996, "learning_rate": 1.042835288118307e-05, "loss": 0.881, "step": 80850 }, { "epoch": 13.744688084310726, "grad_norm": 12.125309944152832, "learning_rate": 1.0425519859482123e-05, "loss": 0.9925, "step": 80860 }, { "epoch": 13.746387897331294, "grad_norm": 9.347014427185059, "learning_rate": 1.0422686837781178e-05, "loss": 1.0802, "step": 80870 }, { "epoch": 13.74808771035186, "grad_norm": 20.38816261291504, "learning_rate": 1.041985381608023e-05, "loss": 0.9283, "step": 80880 }, { "epoch": 13.74978752337243, "grad_norm": 13.816452026367188, "learning_rate": 1.0417020794379285e-05, "loss": 1.0961, "step": 80890 }, { "epoch": 13.751487336392996, "grad_norm": 17.001007080078125, "learning_rate": 1.0414187772678339e-05, "loss": 0.786, "step": 80900 }, { "epoch": 13.753187149413565, "grad_norm": 11.531583786010742, "learning_rate": 1.0411354750977393e-05, "loss": 0.8756, "step": 80910 }, { "epoch": 13.754886962434131, "grad_norm": 16.35150146484375, "learning_rate": 1.0408521729276446e-05, "loss": 1.1547, "step": 80920 }, { "epoch": 13.7565867754547, "grad_norm": 15.220455169677734, "learning_rate": 1.0405688707575501e-05, "loss": 0.9726, "step": 80930 }, { "epoch": 13.758286588475269, "grad_norm": 14.944157600402832, "learning_rate": 1.0402855685874553e-05, "loss": 0.8424, "step": 80940 }, { "epoch": 13.759986401495835, "grad_norm": 13.422187805175781, "learning_rate": 1.0400022664173608e-05, "loss": 1.1665, "step": 80950 }, { "epoch": 13.761686214516404, "grad_norm": 19.623767852783203, "learning_rate": 1.0397189642472662e-05, "loss": 0.9793, "step": 80960 }, { "epoch": 13.76338602753697, "grad_norm": 17.331157684326172, "learning_rate": 1.0394356620771715e-05, "loss": 1.0382, "step": 80970 }, { "epoch": 13.76508584055754, "grad_norm": 15.390863418579102, "learning_rate": 1.0391523599070769e-05, "loss": 0.9287, "step": 80980 }, { "epoch": 13.766785653578106, "grad_norm": 11.93735408782959, "learning_rate": 1.0388690577369822e-05, "loss": 0.914, "step": 80990 }, { "epoch": 13.768485466598674, "grad_norm": 12.496661186218262, "learning_rate": 1.0385857555668876e-05, "loss": 1.0484, "step": 81000 }, { "epoch": 13.770185279619241, "grad_norm": 13.069256782531738, "learning_rate": 1.038302453396793e-05, "loss": 0.9439, "step": 81010 }, { "epoch": 13.77188509263981, "grad_norm": 14.692161560058594, "learning_rate": 1.0380191512266985e-05, "loss": 0.9539, "step": 81020 }, { "epoch": 13.773584905660378, "grad_norm": 11.164834976196289, "learning_rate": 1.0377358490566038e-05, "loss": 0.7618, "step": 81030 }, { "epoch": 13.775284718680945, "grad_norm": 11.309273719787598, "learning_rate": 1.0374525468865092e-05, "loss": 0.86, "step": 81040 }, { "epoch": 13.776984531701514, "grad_norm": 17.179786682128906, "learning_rate": 1.0371692447164145e-05, "loss": 0.8559, "step": 81050 }, { "epoch": 13.77868434472208, "grad_norm": 25.423067092895508, "learning_rate": 1.03688594254632e-05, "loss": 0.9749, "step": 81060 }, { "epoch": 13.780384157742649, "grad_norm": 15.249831199645996, "learning_rate": 1.0366026403762252e-05, "loss": 0.9251, "step": 81070 }, { "epoch": 13.782083970763216, "grad_norm": 18.53769874572754, "learning_rate": 1.0363193382061307e-05, "loss": 0.8932, "step": 81080 }, { "epoch": 13.783783783783784, "grad_norm": 16.85137176513672, "learning_rate": 1.0360360360360361e-05, "loss": 0.9123, "step": 81090 }, { "epoch": 13.785483596804351, "grad_norm": 12.848970413208008, "learning_rate": 1.0357527338659415e-05, "loss": 0.8468, "step": 81100 }, { "epoch": 13.78718340982492, "grad_norm": 14.25011920928955, "learning_rate": 1.0354694316958468e-05, "loss": 0.9602, "step": 81110 }, { "epoch": 13.788883222845486, "grad_norm": 16.119792938232422, "learning_rate": 1.0351861295257523e-05, "loss": 1.0201, "step": 81120 }, { "epoch": 13.790583035866055, "grad_norm": 12.873780250549316, "learning_rate": 1.0349028273556575e-05, "loss": 0.7562, "step": 81130 }, { "epoch": 13.792282848886622, "grad_norm": 14.022262573242188, "learning_rate": 1.034619525185563e-05, "loss": 1.0847, "step": 81140 }, { "epoch": 13.79398266190719, "grad_norm": 20.714670181274414, "learning_rate": 1.0343362230154684e-05, "loss": 1.0184, "step": 81150 }, { "epoch": 13.795682474927759, "grad_norm": 13.570906639099121, "learning_rate": 1.0340529208453737e-05, "loss": 0.9752, "step": 81160 }, { "epoch": 13.797382287948325, "grad_norm": 12.261548042297363, "learning_rate": 1.0337696186752791e-05, "loss": 0.8603, "step": 81170 }, { "epoch": 13.799082100968894, "grad_norm": 28.520578384399414, "learning_rate": 1.0334863165051844e-05, "loss": 1.0115, "step": 81180 }, { "epoch": 13.80078191398946, "grad_norm": 13.85439682006836, "learning_rate": 1.0332030143350898e-05, "loss": 0.9486, "step": 81190 }, { "epoch": 13.80248172701003, "grad_norm": 22.100454330444336, "learning_rate": 1.0329197121649951e-05, "loss": 0.9535, "step": 81200 }, { "epoch": 13.804181540030596, "grad_norm": 21.29884147644043, "learning_rate": 1.0326364099949007e-05, "loss": 0.9716, "step": 81210 }, { "epoch": 13.805881353051165, "grad_norm": 10.157475471496582, "learning_rate": 1.0323531078248058e-05, "loss": 0.9562, "step": 81220 }, { "epoch": 13.807581166071731, "grad_norm": 15.585463523864746, "learning_rate": 1.0320698056547114e-05, "loss": 0.9117, "step": 81230 }, { "epoch": 13.8092809790923, "grad_norm": 10.677257537841797, "learning_rate": 1.0317865034846167e-05, "loss": 0.9582, "step": 81240 }, { "epoch": 13.810980792112868, "grad_norm": 11.730369567871094, "learning_rate": 1.031503201314522e-05, "loss": 1.0599, "step": 81250 }, { "epoch": 13.812680605133435, "grad_norm": 13.043102264404297, "learning_rate": 1.0312198991444274e-05, "loss": 0.8969, "step": 81260 }, { "epoch": 13.814380418154004, "grad_norm": 12.086065292358398, "learning_rate": 1.030936596974333e-05, "loss": 0.6687, "step": 81270 }, { "epoch": 13.81608023117457, "grad_norm": 15.470234870910645, "learning_rate": 1.0306532948042381e-05, "loss": 1.018, "step": 81280 }, { "epoch": 13.817780044195139, "grad_norm": 15.449088096618652, "learning_rate": 1.0303699926341437e-05, "loss": 1.0478, "step": 81290 }, { "epoch": 13.819479857215706, "grad_norm": 18.011524200439453, "learning_rate": 1.030086690464049e-05, "loss": 0.9583, "step": 81300 }, { "epoch": 13.821179670236274, "grad_norm": 14.795978546142578, "learning_rate": 1.0298033882939544e-05, "loss": 0.7523, "step": 81310 }, { "epoch": 13.822879483256841, "grad_norm": 16.626394271850586, "learning_rate": 1.0295200861238597e-05, "loss": 0.7965, "step": 81320 }, { "epoch": 13.82457929627741, "grad_norm": 17.453571319580078, "learning_rate": 1.0292367839537652e-05, "loss": 0.8035, "step": 81330 }, { "epoch": 13.826279109297976, "grad_norm": 13.206852912902832, "learning_rate": 1.0289534817836704e-05, "loss": 0.8934, "step": 81340 }, { "epoch": 13.827978922318545, "grad_norm": 16.731021881103516, "learning_rate": 1.0286701796135758e-05, "loss": 0.9289, "step": 81350 }, { "epoch": 13.829678735339114, "grad_norm": 32.861473083496094, "learning_rate": 1.0283868774434813e-05, "loss": 0.8133, "step": 81360 }, { "epoch": 13.83137854835968, "grad_norm": 10.939358711242676, "learning_rate": 1.0281035752733865e-05, "loss": 0.9815, "step": 81370 }, { "epoch": 13.833078361380249, "grad_norm": 11.009929656982422, "learning_rate": 1.027820273103292e-05, "loss": 1.0052, "step": 81380 }, { "epoch": 13.834778174400816, "grad_norm": 13.41700267791748, "learning_rate": 1.0275369709331973e-05, "loss": 0.8248, "step": 81390 }, { "epoch": 13.836477987421384, "grad_norm": 14.173480033874512, "learning_rate": 1.0272536687631029e-05, "loss": 0.866, "step": 81400 }, { "epoch": 13.838177800441951, "grad_norm": 14.745050430297852, "learning_rate": 1.026970366593008e-05, "loss": 0.9881, "step": 81410 }, { "epoch": 13.83987761346252, "grad_norm": 17.60209083557129, "learning_rate": 1.0266870644229136e-05, "loss": 1.0019, "step": 81420 }, { "epoch": 13.841577426483086, "grad_norm": 13.543964385986328, "learning_rate": 1.026403762252819e-05, "loss": 0.9871, "step": 81430 }, { "epoch": 13.843277239503655, "grad_norm": 14.358375549316406, "learning_rate": 1.0261204600827243e-05, "loss": 1.2258, "step": 81440 }, { "epoch": 13.844977052524222, "grad_norm": 12.203604698181152, "learning_rate": 1.0258371579126296e-05, "loss": 0.8762, "step": 81450 }, { "epoch": 13.84667686554479, "grad_norm": 14.674123764038086, "learning_rate": 1.0255538557425351e-05, "loss": 0.9366, "step": 81460 }, { "epoch": 13.848376678565359, "grad_norm": 13.713099479675293, "learning_rate": 1.0252705535724403e-05, "loss": 0.8038, "step": 81470 }, { "epoch": 13.850076491585925, "grad_norm": 11.930119514465332, "learning_rate": 1.0249872514023459e-05, "loss": 1.0302, "step": 81480 }, { "epoch": 13.851776304606494, "grad_norm": 13.642541885375977, "learning_rate": 1.0247039492322512e-05, "loss": 0.8633, "step": 81490 }, { "epoch": 13.85347611762706, "grad_norm": 28.200462341308594, "learning_rate": 1.0244206470621566e-05, "loss": 0.9078, "step": 81500 }, { "epoch": 13.85517593064763, "grad_norm": 13.293328285217285, "learning_rate": 1.0241373448920619e-05, "loss": 0.874, "step": 81510 }, { "epoch": 13.856875743668196, "grad_norm": 16.731103897094727, "learning_rate": 1.0238540427219673e-05, "loss": 1.1693, "step": 81520 }, { "epoch": 13.858575556688765, "grad_norm": 15.814573287963867, "learning_rate": 1.0235707405518726e-05, "loss": 0.9144, "step": 81530 }, { "epoch": 13.860275369709331, "grad_norm": 13.691399574279785, "learning_rate": 1.023287438381778e-05, "loss": 1.136, "step": 81540 }, { "epoch": 13.8619751827299, "grad_norm": 12.809301376342773, "learning_rate": 1.0230041362116835e-05, "loss": 0.9356, "step": 81550 }, { "epoch": 13.863674995750468, "grad_norm": 20.548887252807617, "learning_rate": 1.0227208340415887e-05, "loss": 0.9816, "step": 81560 }, { "epoch": 13.865374808771035, "grad_norm": 14.512207984924316, "learning_rate": 1.0224375318714942e-05, "loss": 0.9509, "step": 81570 }, { "epoch": 13.867074621791604, "grad_norm": 17.19561004638672, "learning_rate": 1.0221542297013995e-05, "loss": 0.9365, "step": 81580 }, { "epoch": 13.86877443481217, "grad_norm": 10.818811416625977, "learning_rate": 1.0218709275313049e-05, "loss": 0.771, "step": 81590 }, { "epoch": 13.870474247832739, "grad_norm": 15.49968433380127, "learning_rate": 1.0215876253612102e-05, "loss": 0.9432, "step": 81600 }, { "epoch": 13.872174060853306, "grad_norm": 25.524133682250977, "learning_rate": 1.0213043231911158e-05, "loss": 0.8795, "step": 81610 }, { "epoch": 13.873873873873874, "grad_norm": 12.360931396484375, "learning_rate": 1.021021021021021e-05, "loss": 0.8528, "step": 81620 }, { "epoch": 13.875573686894441, "grad_norm": 14.303130149841309, "learning_rate": 1.0207377188509265e-05, "loss": 1.0827, "step": 81630 }, { "epoch": 13.87727349991501, "grad_norm": 12.148262023925781, "learning_rate": 1.0204544166808318e-05, "loss": 1.0401, "step": 81640 }, { "epoch": 13.878973312935576, "grad_norm": 12.609228134155273, "learning_rate": 1.0201711145107372e-05, "loss": 0.7376, "step": 81650 }, { "epoch": 13.880673125956145, "grad_norm": 21.24842071533203, "learning_rate": 1.0198878123406425e-05, "loss": 1.0061, "step": 81660 }, { "epoch": 13.882372938976712, "grad_norm": 15.656471252441406, "learning_rate": 1.019604510170548e-05, "loss": 0.8224, "step": 81670 }, { "epoch": 13.88407275199728, "grad_norm": 14.345212936401367, "learning_rate": 1.0193212080004532e-05, "loss": 0.8678, "step": 81680 }, { "epoch": 13.885772565017849, "grad_norm": 13.264488220214844, "learning_rate": 1.0190379058303586e-05, "loss": 0.6881, "step": 81690 }, { "epoch": 13.887472378038415, "grad_norm": 21.094911575317383, "learning_rate": 1.0187546036602641e-05, "loss": 1.051, "step": 81700 }, { "epoch": 13.889172191058984, "grad_norm": 21.603567123413086, "learning_rate": 1.0184713014901693e-05, "loss": 1.2249, "step": 81710 }, { "epoch": 13.89087200407955, "grad_norm": 18.28354835510254, "learning_rate": 1.0181879993200748e-05, "loss": 0.9381, "step": 81720 }, { "epoch": 13.89257181710012, "grad_norm": 16.9354190826416, "learning_rate": 1.0179046971499802e-05, "loss": 0.9617, "step": 81730 }, { "epoch": 13.894271630120686, "grad_norm": 9.147464752197266, "learning_rate": 1.0176213949798855e-05, "loss": 0.9186, "step": 81740 }, { "epoch": 13.895971443141255, "grad_norm": 13.154172897338867, "learning_rate": 1.0173380928097909e-05, "loss": 0.857, "step": 81750 }, { "epoch": 13.897671256161821, "grad_norm": 9.89717960357666, "learning_rate": 1.0170547906396964e-05, "loss": 0.8422, "step": 81760 }, { "epoch": 13.89937106918239, "grad_norm": 17.283248901367188, "learning_rate": 1.0167714884696017e-05, "loss": 1.0583, "step": 81770 }, { "epoch": 13.901070882202959, "grad_norm": 16.63370704650879, "learning_rate": 1.0164881862995071e-05, "loss": 0.8776, "step": 81780 }, { "epoch": 13.902770695223525, "grad_norm": 16.205228805541992, "learning_rate": 1.0162048841294124e-05, "loss": 0.8574, "step": 81790 }, { "epoch": 13.904470508244094, "grad_norm": 18.423175811767578, "learning_rate": 1.015921581959318e-05, "loss": 0.9622, "step": 81800 }, { "epoch": 13.90617032126466, "grad_norm": 15.703866958618164, "learning_rate": 1.0156382797892232e-05, "loss": 0.7991, "step": 81810 }, { "epoch": 13.90787013428523, "grad_norm": 15.812721252441406, "learning_rate": 1.0153549776191287e-05, "loss": 0.8804, "step": 81820 }, { "epoch": 13.909569947305796, "grad_norm": 20.70928382873535, "learning_rate": 1.015071675449034e-05, "loss": 0.9605, "step": 81830 }, { "epoch": 13.911269760326364, "grad_norm": 17.21004295349121, "learning_rate": 1.0147883732789394e-05, "loss": 0.8732, "step": 81840 }, { "epoch": 13.912969573346931, "grad_norm": 12.721318244934082, "learning_rate": 1.0145050711088447e-05, "loss": 0.9288, "step": 81850 }, { "epoch": 13.9146693863675, "grad_norm": 11.625651359558105, "learning_rate": 1.01422176893875e-05, "loss": 0.7385, "step": 81860 }, { "epoch": 13.916369199388066, "grad_norm": 50.00120162963867, "learning_rate": 1.0139384667686554e-05, "loss": 0.9456, "step": 81870 }, { "epoch": 13.918069012408635, "grad_norm": 10.376282691955566, "learning_rate": 1.0136551645985608e-05, "loss": 1.0149, "step": 81880 }, { "epoch": 13.919768825429204, "grad_norm": 15.476861000061035, "learning_rate": 1.0133718624284663e-05, "loss": 1.0608, "step": 81890 }, { "epoch": 13.92146863844977, "grad_norm": 13.449640274047852, "learning_rate": 1.0130885602583715e-05, "loss": 1.063, "step": 81900 }, { "epoch": 13.923168451470339, "grad_norm": 12.19969367980957, "learning_rate": 1.012805258088277e-05, "loss": 1.1489, "step": 81910 }, { "epoch": 13.924868264490906, "grad_norm": 12.605411529541016, "learning_rate": 1.0125219559181824e-05, "loss": 0.9215, "step": 81920 }, { "epoch": 13.926568077511474, "grad_norm": 17.34209632873535, "learning_rate": 1.0122386537480877e-05, "loss": 0.9142, "step": 81930 }, { "epoch": 13.928267890532041, "grad_norm": 10.530673027038574, "learning_rate": 1.011955351577993e-05, "loss": 1.15, "step": 81940 }, { "epoch": 13.92996770355261, "grad_norm": 10.52264404296875, "learning_rate": 1.0116720494078986e-05, "loss": 0.8848, "step": 81950 }, { "epoch": 13.931667516573176, "grad_norm": 11.409134864807129, "learning_rate": 1.0113887472378038e-05, "loss": 0.8793, "step": 81960 }, { "epoch": 13.933367329593745, "grad_norm": 16.383464813232422, "learning_rate": 1.0111054450677093e-05, "loss": 1.0585, "step": 81970 }, { "epoch": 13.935067142614312, "grad_norm": 14.7285737991333, "learning_rate": 1.0108221428976146e-05, "loss": 0.7686, "step": 81980 }, { "epoch": 13.93676695563488, "grad_norm": 16.912248611450195, "learning_rate": 1.01053884072752e-05, "loss": 0.9753, "step": 81990 }, { "epoch": 13.938466768655449, "grad_norm": 22.676660537719727, "learning_rate": 1.0102555385574254e-05, "loss": 0.7072, "step": 82000 }, { "epoch": 13.940166581676015, "grad_norm": 14.495658874511719, "learning_rate": 1.0099722363873309e-05, "loss": 0.9629, "step": 82010 }, { "epoch": 13.941866394696584, "grad_norm": 9.820878982543945, "learning_rate": 1.009688934217236e-05, "loss": 1.0573, "step": 82020 }, { "epoch": 13.94356620771715, "grad_norm": 12.516462326049805, "learning_rate": 1.0094056320471414e-05, "loss": 0.9359, "step": 82030 }, { "epoch": 13.94526602073772, "grad_norm": 13.90323543548584, "learning_rate": 1.009122329877047e-05, "loss": 0.9249, "step": 82040 }, { "epoch": 13.946965833758286, "grad_norm": 16.334125518798828, "learning_rate": 1.0088390277069521e-05, "loss": 1.1151, "step": 82050 }, { "epoch": 13.948665646778855, "grad_norm": 22.13856315612793, "learning_rate": 1.0085557255368576e-05, "loss": 0.9218, "step": 82060 }, { "epoch": 13.950365459799421, "grad_norm": 13.78686809539795, "learning_rate": 1.008272423366763e-05, "loss": 1.051, "step": 82070 }, { "epoch": 13.95206527281999, "grad_norm": 21.90167236328125, "learning_rate": 1.0079891211966683e-05, "loss": 0.865, "step": 82080 }, { "epoch": 13.953765085840558, "grad_norm": 12.568172454833984, "learning_rate": 1.0077058190265737e-05, "loss": 0.9148, "step": 82090 }, { "epoch": 13.955464898861125, "grad_norm": 13.619126319885254, "learning_rate": 1.0074225168564792e-05, "loss": 0.8148, "step": 82100 }, { "epoch": 13.957164711881694, "grad_norm": 15.933497428894043, "learning_rate": 1.0071392146863846e-05, "loss": 0.9441, "step": 82110 }, { "epoch": 13.95886452490226, "grad_norm": 15.807735443115234, "learning_rate": 1.0068559125162899e-05, "loss": 0.9881, "step": 82120 }, { "epoch": 13.960564337922829, "grad_norm": 9.530674934387207, "learning_rate": 1.0065726103461953e-05, "loss": 0.739, "step": 82130 }, { "epoch": 13.962264150943396, "grad_norm": 13.998543739318848, "learning_rate": 1.0062893081761008e-05, "loss": 0.8525, "step": 82140 }, { "epoch": 13.963963963963964, "grad_norm": 17.528404235839844, "learning_rate": 1.006006006006006e-05, "loss": 0.9228, "step": 82150 }, { "epoch": 13.965663776984531, "grad_norm": 12.720892906188965, "learning_rate": 1.0057227038359115e-05, "loss": 0.8965, "step": 82160 }, { "epoch": 13.9673635900051, "grad_norm": 14.25486946105957, "learning_rate": 1.0054394016658168e-05, "loss": 1.0399, "step": 82170 }, { "epoch": 13.969063403025666, "grad_norm": 12.660863876342773, "learning_rate": 1.0051560994957222e-05, "loss": 0.9515, "step": 82180 }, { "epoch": 13.970763216046235, "grad_norm": 14.104326248168945, "learning_rate": 1.0048727973256275e-05, "loss": 0.938, "step": 82190 }, { "epoch": 13.972463029066802, "grad_norm": 16.160486221313477, "learning_rate": 1.004589495155533e-05, "loss": 1.0313, "step": 82200 }, { "epoch": 13.97416284208737, "grad_norm": 10.01595687866211, "learning_rate": 1.0043061929854383e-05, "loss": 0.9937, "step": 82210 }, { "epoch": 13.975862655107939, "grad_norm": 13.343984603881836, "learning_rate": 1.0040228908153436e-05, "loss": 0.9915, "step": 82220 }, { "epoch": 13.977562468128506, "grad_norm": 13.853736877441406, "learning_rate": 1.0037395886452491e-05, "loss": 0.9532, "step": 82230 }, { "epoch": 13.979262281149074, "grad_norm": 11.842617988586426, "learning_rate": 1.0034562864751543e-05, "loss": 1.0376, "step": 82240 }, { "epoch": 13.98096209416964, "grad_norm": 11.91401481628418, "learning_rate": 1.0031729843050598e-05, "loss": 0.9776, "step": 82250 }, { "epoch": 13.98266190719021, "grad_norm": 18.611860275268555, "learning_rate": 1.0028896821349652e-05, "loss": 1.0284, "step": 82260 }, { "epoch": 13.984361720210776, "grad_norm": 31.193283081054688, "learning_rate": 1.0026063799648705e-05, "loss": 0.822, "step": 82270 }, { "epoch": 13.986061533231345, "grad_norm": 13.412171363830566, "learning_rate": 1.0023230777947759e-05, "loss": 0.968, "step": 82280 }, { "epoch": 13.987761346251911, "grad_norm": 19.313772201538086, "learning_rate": 1.0020397756246814e-05, "loss": 0.9318, "step": 82290 }, { "epoch": 13.98946115927248, "grad_norm": 14.656928062438965, "learning_rate": 1.0017564734545866e-05, "loss": 0.9216, "step": 82300 }, { "epoch": 13.991160972293049, "grad_norm": 17.05354118347168, "learning_rate": 1.0014731712844921e-05, "loss": 0.9533, "step": 82310 }, { "epoch": 13.992860785313615, "grad_norm": 12.993318557739258, "learning_rate": 1.0011898691143975e-05, "loss": 0.6684, "step": 82320 }, { "epoch": 13.994560598334184, "grad_norm": 17.04764747619629, "learning_rate": 1.0009065669443028e-05, "loss": 0.7624, "step": 82330 }, { "epoch": 13.99626041135475, "grad_norm": 14.758978843688965, "learning_rate": 1.0006232647742082e-05, "loss": 0.7537, "step": 82340 }, { "epoch": 13.99796022437532, "grad_norm": 15.866199493408203, "learning_rate": 1.0003399626041137e-05, "loss": 0.9614, "step": 82350 }, { "epoch": 13.999660037395886, "grad_norm": 16.617204666137695, "learning_rate": 1.0000566604340189e-05, "loss": 1.0415, "step": 82360 }, { "epoch": 14.0, "eval_cer": 1.0, "eval_loss": 2.6222567558288574, "eval_runtime": 1967.3557, "eval_samples_per_second": 0.239, "eval_steps_per_second": 0.239, "step": 82362 }, { "epoch": 14.001359850416454, "grad_norm": 14.042247772216797, "learning_rate": 9.997733582639244e-06, "loss": 1.0664, "step": 82370 }, { "epoch": 14.003059663437021, "grad_norm": 13.113764762878418, "learning_rate": 9.994900560938297e-06, "loss": 0.9334, "step": 82380 }, { "epoch": 14.00475947645759, "grad_norm": 15.746429443359375, "learning_rate": 9.99206753923735e-06, "loss": 1.0151, "step": 82390 }, { "epoch": 14.006459289478157, "grad_norm": 15.518426895141602, "learning_rate": 9.989234517536405e-06, "loss": 0.7678, "step": 82400 }, { "epoch": 14.008159102498725, "grad_norm": 20.995113372802734, "learning_rate": 9.986401495835458e-06, "loss": 0.9125, "step": 82410 }, { "epoch": 14.009858915519294, "grad_norm": 12.96017837524414, "learning_rate": 9.983568474134512e-06, "loss": 0.8073, "step": 82420 }, { "epoch": 14.01155872853986, "grad_norm": 18.111841201782227, "learning_rate": 9.980735452433565e-06, "loss": 0.8987, "step": 82430 }, { "epoch": 14.013258541560429, "grad_norm": 14.080025672912598, "learning_rate": 9.97790243073262e-06, "loss": 0.901, "step": 82440 }, { "epoch": 14.014958354580996, "grad_norm": 17.717571258544922, "learning_rate": 9.975069409031672e-06, "loss": 0.8973, "step": 82450 }, { "epoch": 14.016658167601564, "grad_norm": 18.331180572509766, "learning_rate": 9.972236387330727e-06, "loss": 0.9635, "step": 82460 }, { "epoch": 14.018357980622131, "grad_norm": 17.688133239746094, "learning_rate": 9.969403365629781e-06, "loss": 0.8774, "step": 82470 }, { "epoch": 14.0200577936427, "grad_norm": 23.707468032836914, "learning_rate": 9.966570343928836e-06, "loss": 0.9586, "step": 82480 }, { "epoch": 14.021757606663266, "grad_norm": 16.502986907958984, "learning_rate": 9.963737322227888e-06, "loss": 0.6977, "step": 82490 }, { "epoch": 14.023457419683835, "grad_norm": 23.949228286743164, "learning_rate": 9.960904300526943e-06, "loss": 0.691, "step": 82500 }, { "epoch": 14.025157232704403, "grad_norm": 17.236011505126953, "learning_rate": 9.958071278825997e-06, "loss": 1.0079, "step": 82510 }, { "epoch": 14.02685704572497, "grad_norm": 17.777055740356445, "learning_rate": 9.95523825712505e-06, "loss": 0.7878, "step": 82520 }, { "epoch": 14.028556858745539, "grad_norm": 18.358678817749023, "learning_rate": 9.952405235424104e-06, "loss": 0.9853, "step": 82530 }, { "epoch": 14.030256671766105, "grad_norm": 12.197195053100586, "learning_rate": 9.949572213723159e-06, "loss": 0.8411, "step": 82540 }, { "epoch": 14.031956484786674, "grad_norm": 17.542264938354492, "learning_rate": 9.94673919202221e-06, "loss": 1.0081, "step": 82550 }, { "epoch": 14.03365629780724, "grad_norm": 15.028456687927246, "learning_rate": 9.943906170321264e-06, "loss": 0.9078, "step": 82560 }, { "epoch": 14.03535611082781, "grad_norm": 13.123356819152832, "learning_rate": 9.94107314862032e-06, "loss": 0.7544, "step": 82570 }, { "epoch": 14.037055923848376, "grad_norm": 19.953927993774414, "learning_rate": 9.938240126919371e-06, "loss": 0.9924, "step": 82580 }, { "epoch": 14.038755736868945, "grad_norm": 15.734583854675293, "learning_rate": 9.935407105218427e-06, "loss": 0.7819, "step": 82590 }, { "epoch": 14.040455549889511, "grad_norm": 12.492475509643555, "learning_rate": 9.93257408351748e-06, "loss": 0.7611, "step": 82600 }, { "epoch": 14.04215536291008, "grad_norm": 12.828605651855469, "learning_rate": 9.929741061816534e-06, "loss": 1.0504, "step": 82610 }, { "epoch": 14.043855175930648, "grad_norm": 12.20595645904541, "learning_rate": 9.926908040115587e-06, "loss": 0.9316, "step": 82620 }, { "epoch": 14.045554988951215, "grad_norm": 12.693113327026367, "learning_rate": 9.924075018414642e-06, "loss": 0.8104, "step": 82630 }, { "epoch": 14.047254801971784, "grad_norm": 14.787217140197754, "learning_rate": 9.921241996713694e-06, "loss": 0.9784, "step": 82640 }, { "epoch": 14.04895461499235, "grad_norm": 14.803339958190918, "learning_rate": 9.91840897501275e-06, "loss": 0.9225, "step": 82650 }, { "epoch": 14.050654428012919, "grad_norm": 12.344411849975586, "learning_rate": 9.915575953311803e-06, "loss": 0.8541, "step": 82660 }, { "epoch": 14.052354241033486, "grad_norm": 13.763448715209961, "learning_rate": 9.912742931610856e-06, "loss": 0.8838, "step": 82670 }, { "epoch": 14.054054054054054, "grad_norm": 34.41059112548828, "learning_rate": 9.90990990990991e-06, "loss": 0.7397, "step": 82680 }, { "epoch": 14.055753867074621, "grad_norm": 18.484424591064453, "learning_rate": 9.907076888208965e-06, "loss": 0.7601, "step": 82690 }, { "epoch": 14.05745368009519, "grad_norm": 14.205199241638184, "learning_rate": 9.904243866508017e-06, "loss": 0.9118, "step": 82700 }, { "epoch": 14.059153493115756, "grad_norm": 12.552241325378418, "learning_rate": 9.901410844807072e-06, "loss": 0.8063, "step": 82710 }, { "epoch": 14.060853306136325, "grad_norm": 17.032255172729492, "learning_rate": 9.898577823106126e-06, "loss": 0.8469, "step": 82720 }, { "epoch": 14.062553119156894, "grad_norm": 16.717496871948242, "learning_rate": 9.895744801405178e-06, "loss": 0.8266, "step": 82730 }, { "epoch": 14.06425293217746, "grad_norm": 19.612730026245117, "learning_rate": 9.892911779704233e-06, "loss": 0.9191, "step": 82740 }, { "epoch": 14.065952745198029, "grad_norm": 27.506919860839844, "learning_rate": 9.890078758003286e-06, "loss": 0.9571, "step": 82750 }, { "epoch": 14.067652558218596, "grad_norm": 13.535090446472168, "learning_rate": 9.88724573630234e-06, "loss": 0.9184, "step": 82760 }, { "epoch": 14.069352371239164, "grad_norm": 11.49742603302002, "learning_rate": 9.884412714601393e-06, "loss": 0.9511, "step": 82770 }, { "epoch": 14.071052184259731, "grad_norm": 13.213311195373535, "learning_rate": 9.881579692900449e-06, "loss": 0.864, "step": 82780 }, { "epoch": 14.0727519972803, "grad_norm": 14.684614181518555, "learning_rate": 9.8787466711995e-06, "loss": 0.7484, "step": 82790 }, { "epoch": 14.074451810300866, "grad_norm": 14.040779113769531, "learning_rate": 9.875913649498556e-06, "loss": 0.7699, "step": 82800 }, { "epoch": 14.076151623321435, "grad_norm": 9.440950393676758, "learning_rate": 9.873080627797609e-06, "loss": 0.7858, "step": 82810 }, { "epoch": 14.077851436342002, "grad_norm": 14.33569622039795, "learning_rate": 9.870247606096663e-06, "loss": 0.6132, "step": 82820 }, { "epoch": 14.07955124936257, "grad_norm": 12.559207916259766, "learning_rate": 9.867414584395716e-06, "loss": 0.9956, "step": 82830 }, { "epoch": 14.081251062383139, "grad_norm": 14.991832733154297, "learning_rate": 9.864581562694771e-06, "loss": 0.9069, "step": 82840 }, { "epoch": 14.082950875403705, "grad_norm": 10.923843383789062, "learning_rate": 9.861748540993825e-06, "loss": 0.8059, "step": 82850 }, { "epoch": 14.084650688424274, "grad_norm": 12.267611503601074, "learning_rate": 9.858915519292878e-06, "loss": 1.0044, "step": 82860 }, { "epoch": 14.08635050144484, "grad_norm": 14.396658897399902, "learning_rate": 9.856082497591932e-06, "loss": 0.7747, "step": 82870 }, { "epoch": 14.08805031446541, "grad_norm": 14.136241912841797, "learning_rate": 9.853249475890987e-06, "loss": 0.9053, "step": 82880 }, { "epoch": 14.089750127485976, "grad_norm": 16.41604995727539, "learning_rate": 9.850416454190039e-06, "loss": 0.7486, "step": 82890 }, { "epoch": 14.091449940506545, "grad_norm": 50.268821716308594, "learning_rate": 9.847583432489092e-06, "loss": 0.8495, "step": 82900 }, { "epoch": 14.093149753527111, "grad_norm": 19.272197723388672, "learning_rate": 9.844750410788148e-06, "loss": 0.8926, "step": 82910 }, { "epoch": 14.09484956654768, "grad_norm": 14.248559951782227, "learning_rate": 9.8419173890872e-06, "loss": 0.6938, "step": 82920 }, { "epoch": 14.096549379568248, "grad_norm": 13.989852905273438, "learning_rate": 9.839084367386255e-06, "loss": 1.0237, "step": 82930 }, { "epoch": 14.098249192588815, "grad_norm": 14.608888626098633, "learning_rate": 9.836251345685308e-06, "loss": 0.8662, "step": 82940 }, { "epoch": 14.099949005609384, "grad_norm": 12.322266578674316, "learning_rate": 9.833418323984362e-06, "loss": 1.0612, "step": 82950 }, { "epoch": 14.10164881862995, "grad_norm": 17.993953704833984, "learning_rate": 9.830585302283415e-06, "loss": 0.8159, "step": 82960 }, { "epoch": 14.103348631650519, "grad_norm": 13.344793319702148, "learning_rate": 9.82775228058247e-06, "loss": 0.9145, "step": 82970 }, { "epoch": 14.105048444671086, "grad_norm": 17.130077362060547, "learning_rate": 9.824919258881522e-06, "loss": 0.9695, "step": 82980 }, { "epoch": 14.106748257691654, "grad_norm": 10.821344375610352, "learning_rate": 9.822086237180578e-06, "loss": 0.9959, "step": 82990 }, { "epoch": 14.108448070712221, "grad_norm": 11.5763521194458, "learning_rate": 9.819253215479631e-06, "loss": 0.8207, "step": 83000 }, { "epoch": 14.11014788373279, "grad_norm": 13.343158721923828, "learning_rate": 9.816420193778685e-06, "loss": 0.8718, "step": 83010 }, { "epoch": 14.111847696753356, "grad_norm": 18.035106658935547, "learning_rate": 9.813587172077738e-06, "loss": 0.7709, "step": 83020 }, { "epoch": 14.113547509773925, "grad_norm": 12.963569641113281, "learning_rate": 9.810754150376793e-06, "loss": 0.9039, "step": 83030 }, { "epoch": 14.115247322794493, "grad_norm": 15.735084533691406, "learning_rate": 9.807921128675845e-06, "loss": 0.7576, "step": 83040 }, { "epoch": 14.11694713581506, "grad_norm": 12.488394737243652, "learning_rate": 9.8050881069749e-06, "loss": 0.8177, "step": 83050 }, { "epoch": 14.118646948835629, "grad_norm": 11.223280906677246, "learning_rate": 9.802255085273954e-06, "loss": 0.8528, "step": 83060 }, { "epoch": 14.120346761856196, "grad_norm": 10.657822608947754, "learning_rate": 9.799422063573006e-06, "loss": 0.8419, "step": 83070 }, { "epoch": 14.122046574876764, "grad_norm": 10.75789737701416, "learning_rate": 9.796589041872061e-06, "loss": 0.9111, "step": 83080 }, { "epoch": 14.12374638789733, "grad_norm": 15.374222755432129, "learning_rate": 9.793756020171114e-06, "loss": 0.7311, "step": 83090 }, { "epoch": 14.1254462009179, "grad_norm": 14.173067092895508, "learning_rate": 9.790922998470168e-06, "loss": 0.826, "step": 83100 }, { "epoch": 14.127146013938466, "grad_norm": 15.00294017791748, "learning_rate": 9.788089976769222e-06, "loss": 0.911, "step": 83110 }, { "epoch": 14.128845826959035, "grad_norm": 12.821290969848633, "learning_rate": 9.785256955068277e-06, "loss": 0.8605, "step": 83120 }, { "epoch": 14.130545639979601, "grad_norm": 15.59339714050293, "learning_rate": 9.782423933367329e-06, "loss": 0.8568, "step": 83130 }, { "epoch": 14.13224545300017, "grad_norm": 14.096455574035645, "learning_rate": 9.779590911666384e-06, "loss": 0.9871, "step": 83140 }, { "epoch": 14.133945266020739, "grad_norm": 21.01111602783203, "learning_rate": 9.776757889965437e-06, "loss": 0.9513, "step": 83150 }, { "epoch": 14.135645079041305, "grad_norm": 11.055816650390625, "learning_rate": 9.77392486826449e-06, "loss": 0.8501, "step": 83160 }, { "epoch": 14.137344892061874, "grad_norm": 10.732741355895996, "learning_rate": 9.771091846563544e-06, "loss": 0.78, "step": 83170 }, { "epoch": 14.13904470508244, "grad_norm": 15.951929092407227, "learning_rate": 9.7682588248626e-06, "loss": 0.8164, "step": 83180 }, { "epoch": 14.14074451810301, "grad_norm": 15.23215103149414, "learning_rate": 9.765425803161651e-06, "loss": 0.9, "step": 83190 }, { "epoch": 14.142444331123576, "grad_norm": 12.619797706604004, "learning_rate": 9.762592781460707e-06, "loss": 0.8855, "step": 83200 }, { "epoch": 14.144144144144144, "grad_norm": 15.084896087646484, "learning_rate": 9.75975975975976e-06, "loss": 0.9448, "step": 83210 }, { "epoch": 14.145843957164711, "grad_norm": 12.18239974975586, "learning_rate": 9.756926738058815e-06, "loss": 0.835, "step": 83220 }, { "epoch": 14.14754377018528, "grad_norm": 18.2509708404541, "learning_rate": 9.754093716357867e-06, "loss": 0.7271, "step": 83230 }, { "epoch": 14.149243583205847, "grad_norm": 11.590612411499023, "learning_rate": 9.75126069465692e-06, "loss": 1.0226, "step": 83240 }, { "epoch": 14.150943396226415, "grad_norm": 19.96112632751465, "learning_rate": 9.748427672955976e-06, "loss": 0.9376, "step": 83250 }, { "epoch": 14.152643209246984, "grad_norm": 14.632649421691895, "learning_rate": 9.745594651255028e-06, "loss": 0.8267, "step": 83260 }, { "epoch": 14.15434302226755, "grad_norm": 13.163661003112793, "learning_rate": 9.742761629554083e-06, "loss": 0.6874, "step": 83270 }, { "epoch": 14.156042835288119, "grad_norm": 18.50756072998047, "learning_rate": 9.739928607853136e-06, "loss": 0.9413, "step": 83280 }, { "epoch": 14.157742648308686, "grad_norm": 14.484116554260254, "learning_rate": 9.73709558615219e-06, "loss": 0.8604, "step": 83290 }, { "epoch": 14.159442461329254, "grad_norm": 45.584537506103516, "learning_rate": 9.734262564451244e-06, "loss": 0.8153, "step": 83300 }, { "epoch": 14.161142274349821, "grad_norm": 13.536919593811035, "learning_rate": 9.731429542750299e-06, "loss": 0.6145, "step": 83310 }, { "epoch": 14.16284208737039, "grad_norm": 13.961897850036621, "learning_rate": 9.72859652104935e-06, "loss": 0.7915, "step": 83320 }, { "epoch": 14.164541900390956, "grad_norm": 9.542667388916016, "learning_rate": 9.725763499348406e-06, "loss": 0.9537, "step": 83330 }, { "epoch": 14.166241713411525, "grad_norm": 16.184024810791016, "learning_rate": 9.72293047764746e-06, "loss": 0.9164, "step": 83340 }, { "epoch": 14.167941526432092, "grad_norm": 13.692367553710938, "learning_rate": 9.720097455946513e-06, "loss": 0.9638, "step": 83350 }, { "epoch": 14.16964133945266, "grad_norm": 12.701964378356934, "learning_rate": 9.717264434245566e-06, "loss": 0.8329, "step": 83360 }, { "epoch": 14.171341152473229, "grad_norm": 18.73041534423828, "learning_rate": 9.714431412544622e-06, "loss": 0.9435, "step": 83370 }, { "epoch": 14.173040965493795, "grad_norm": 12.297065734863281, "learning_rate": 9.711598390843673e-06, "loss": 0.8254, "step": 83380 }, { "epoch": 14.174740778514364, "grad_norm": 13.631264686584473, "learning_rate": 9.708765369142729e-06, "loss": 0.7958, "step": 83390 }, { "epoch": 14.17644059153493, "grad_norm": 12.853922843933105, "learning_rate": 9.705932347441782e-06, "loss": 0.8621, "step": 83400 }, { "epoch": 14.1781404045555, "grad_norm": 12.867536544799805, "learning_rate": 9.703099325740836e-06, "loss": 0.9365, "step": 83410 }, { "epoch": 14.179840217576066, "grad_norm": 11.133005142211914, "learning_rate": 9.70026630403989e-06, "loss": 1.001, "step": 83420 }, { "epoch": 14.181540030596635, "grad_norm": 14.457795143127441, "learning_rate": 9.697433282338943e-06, "loss": 0.9107, "step": 83430 }, { "epoch": 14.183239843617201, "grad_norm": 13.622757911682129, "learning_rate": 9.694600260637996e-06, "loss": 0.7035, "step": 83440 }, { "epoch": 14.18493965663777, "grad_norm": 14.826756477355957, "learning_rate": 9.69176723893705e-06, "loss": 0.9663, "step": 83450 }, { "epoch": 14.186639469658338, "grad_norm": 12.745162963867188, "learning_rate": 9.688934217236105e-06, "loss": 0.9165, "step": 83460 }, { "epoch": 14.188339282678905, "grad_norm": 15.313756942749023, "learning_rate": 9.686101195535157e-06, "loss": 0.9912, "step": 83470 }, { "epoch": 14.190039095699474, "grad_norm": 13.5181884765625, "learning_rate": 9.683268173834212e-06, "loss": 0.8095, "step": 83480 }, { "epoch": 14.19173890872004, "grad_norm": 17.0294132232666, "learning_rate": 9.680435152133266e-06, "loss": 0.7945, "step": 83490 }, { "epoch": 14.193438721740609, "grad_norm": 15.218610763549805, "learning_rate": 9.677602130432319e-06, "loss": 0.965, "step": 83500 }, { "epoch": 14.195138534761176, "grad_norm": 16.101573944091797, "learning_rate": 9.674769108731373e-06, "loss": 0.9932, "step": 83510 }, { "epoch": 14.196838347781744, "grad_norm": 15.529366493225098, "learning_rate": 9.671936087030428e-06, "loss": 0.9732, "step": 83520 }, { "epoch": 14.198538160802311, "grad_norm": 21.506683349609375, "learning_rate": 9.66910306532948e-06, "loss": 0.884, "step": 83530 }, { "epoch": 14.20023797382288, "grad_norm": 20.880937576293945, "learning_rate": 9.666270043628535e-06, "loss": 0.9359, "step": 83540 }, { "epoch": 14.201937786843446, "grad_norm": 23.399486541748047, "learning_rate": 9.663437021927588e-06, "loss": 0.7005, "step": 83550 }, { "epoch": 14.203637599864015, "grad_norm": 17.624162673950195, "learning_rate": 9.660604000226642e-06, "loss": 0.8269, "step": 83560 }, { "epoch": 14.205337412884584, "grad_norm": 12.684412956237793, "learning_rate": 9.657770978525695e-06, "loss": 0.7654, "step": 83570 }, { "epoch": 14.20703722590515, "grad_norm": 13.362719535827637, "learning_rate": 9.65493795682475e-06, "loss": 0.7284, "step": 83580 }, { "epoch": 14.208737038925719, "grad_norm": 15.732542037963867, "learning_rate": 9.652104935123804e-06, "loss": 0.9234, "step": 83590 }, { "epoch": 14.210436851946286, "grad_norm": 14.775520324707031, "learning_rate": 9.649271913422856e-06, "loss": 0.8391, "step": 83600 }, { "epoch": 14.212136664966854, "grad_norm": 15.997801780700684, "learning_rate": 9.646438891721911e-06, "loss": 0.8641, "step": 83610 }, { "epoch": 14.213836477987421, "grad_norm": 15.685267448425293, "learning_rate": 9.643605870020965e-06, "loss": 0.7589, "step": 83620 }, { "epoch": 14.21553629100799, "grad_norm": 14.858023643493652, "learning_rate": 9.640772848320018e-06, "loss": 0.9007, "step": 83630 }, { "epoch": 14.217236104028556, "grad_norm": 11.16385269165039, "learning_rate": 9.637939826619072e-06, "loss": 1.0092, "step": 83640 }, { "epoch": 14.218935917049125, "grad_norm": 35.031402587890625, "learning_rate": 9.635106804918127e-06, "loss": 0.8079, "step": 83650 }, { "epoch": 14.220635730069692, "grad_norm": 22.133197784423828, "learning_rate": 9.632273783217179e-06, "loss": 0.7528, "step": 83660 }, { "epoch": 14.22233554309026, "grad_norm": 18.212491989135742, "learning_rate": 9.629440761516234e-06, "loss": 0.964, "step": 83670 }, { "epoch": 14.224035356110829, "grad_norm": 15.230124473571777, "learning_rate": 9.626607739815288e-06, "loss": 0.9029, "step": 83680 }, { "epoch": 14.225735169131395, "grad_norm": 14.868284225463867, "learning_rate": 9.623774718114341e-06, "loss": 0.664, "step": 83690 }, { "epoch": 14.227434982151964, "grad_norm": 12.396477699279785, "learning_rate": 9.620941696413395e-06, "loss": 0.7811, "step": 83700 }, { "epoch": 14.22913479517253, "grad_norm": 14.944470405578613, "learning_rate": 9.61810867471245e-06, "loss": 0.7881, "step": 83710 }, { "epoch": 14.2308346081931, "grad_norm": 11.473039627075195, "learning_rate": 9.615275653011502e-06, "loss": 0.9211, "step": 83720 }, { "epoch": 14.232534421213666, "grad_norm": 22.933347702026367, "learning_rate": 9.612442631310557e-06, "loss": 0.7713, "step": 83730 }, { "epoch": 14.234234234234235, "grad_norm": 14.364496231079102, "learning_rate": 9.60960960960961e-06, "loss": 0.829, "step": 83740 }, { "epoch": 14.235934047254801, "grad_norm": 17.278575897216797, "learning_rate": 9.606776587908664e-06, "loss": 1.0107, "step": 83750 }, { "epoch": 14.23763386027537, "grad_norm": 10.550210952758789, "learning_rate": 9.603943566207717e-06, "loss": 0.8711, "step": 83760 }, { "epoch": 14.239333673295937, "grad_norm": 14.648630142211914, "learning_rate": 9.601110544506771e-06, "loss": 1.032, "step": 83770 }, { "epoch": 14.241033486316505, "grad_norm": 11.357612609863281, "learning_rate": 9.598277522805824e-06, "loss": 0.8178, "step": 83780 }, { "epoch": 14.242733299337074, "grad_norm": 13.024635314941406, "learning_rate": 9.595444501104878e-06, "loss": 0.9183, "step": 83790 }, { "epoch": 14.24443311235764, "grad_norm": 17.77234649658203, "learning_rate": 9.592611479403933e-06, "loss": 1.0426, "step": 83800 }, { "epoch": 14.246132925378209, "grad_norm": 13.079293251037598, "learning_rate": 9.589778457702985e-06, "loss": 0.9217, "step": 83810 }, { "epoch": 14.247832738398776, "grad_norm": 14.706375122070312, "learning_rate": 9.58694543600204e-06, "loss": 0.9216, "step": 83820 }, { "epoch": 14.249532551419344, "grad_norm": 18.35411834716797, "learning_rate": 9.584112414301094e-06, "loss": 0.777, "step": 83830 }, { "epoch": 14.251232364439911, "grad_norm": 11.415233612060547, "learning_rate": 9.581279392600147e-06, "loss": 0.8708, "step": 83840 }, { "epoch": 14.25293217746048, "grad_norm": 12.121389389038086, "learning_rate": 9.5784463708992e-06, "loss": 0.697, "step": 83850 }, { "epoch": 14.254631990481046, "grad_norm": 12.734920501708984, "learning_rate": 9.575613349198256e-06, "loss": 0.7326, "step": 83860 }, { "epoch": 14.256331803501615, "grad_norm": 29.05297088623047, "learning_rate": 9.572780327497308e-06, "loss": 0.8447, "step": 83870 }, { "epoch": 14.258031616522182, "grad_norm": 13.640849113464355, "learning_rate": 9.569947305796363e-06, "loss": 0.8269, "step": 83880 }, { "epoch": 14.25973142954275, "grad_norm": 10.649073600769043, "learning_rate": 9.567114284095417e-06, "loss": 0.796, "step": 83890 }, { "epoch": 14.261431242563319, "grad_norm": 14.67101764678955, "learning_rate": 9.56428126239447e-06, "loss": 0.9511, "step": 83900 }, { "epoch": 14.263131055583885, "grad_norm": 19.418745040893555, "learning_rate": 9.561448240693524e-06, "loss": 0.8, "step": 83910 }, { "epoch": 14.264830868604454, "grad_norm": 15.52606201171875, "learning_rate": 9.558615218992579e-06, "loss": 0.8035, "step": 83920 }, { "epoch": 14.26653068162502, "grad_norm": 19.44961929321289, "learning_rate": 9.555782197291632e-06, "loss": 0.828, "step": 83930 }, { "epoch": 14.26823049464559, "grad_norm": 13.167495727539062, "learning_rate": 9.552949175590684e-06, "loss": 0.7806, "step": 83940 }, { "epoch": 14.269930307666156, "grad_norm": 13.300612449645996, "learning_rate": 9.55011615388974e-06, "loss": 0.9178, "step": 83950 }, { "epoch": 14.271630120686725, "grad_norm": 14.699173927307129, "learning_rate": 9.547283132188793e-06, "loss": 0.9376, "step": 83960 }, { "epoch": 14.273329933707291, "grad_norm": 11.949858665466309, "learning_rate": 9.544450110487846e-06, "loss": 0.7663, "step": 83970 }, { "epoch": 14.27502974672786, "grad_norm": 13.225117683410645, "learning_rate": 9.5416170887869e-06, "loss": 0.9157, "step": 83980 }, { "epoch": 14.276729559748428, "grad_norm": 19.75986671447754, "learning_rate": 9.538784067085955e-06, "loss": 0.8888, "step": 83990 }, { "epoch": 14.278429372768995, "grad_norm": 15.518467903137207, "learning_rate": 9.535951045385007e-06, "loss": 0.8977, "step": 84000 }, { "epoch": 14.280129185789564, "grad_norm": 11.588016510009766, "learning_rate": 9.533118023684062e-06, "loss": 0.7731, "step": 84010 }, { "epoch": 14.28182899881013, "grad_norm": 11.726266860961914, "learning_rate": 9.530285001983116e-06, "loss": 0.9065, "step": 84020 }, { "epoch": 14.2835288118307, "grad_norm": 13.767180442810059, "learning_rate": 9.52745198028217e-06, "loss": 0.8144, "step": 84030 }, { "epoch": 14.285228624851266, "grad_norm": 15.410052299499512, "learning_rate": 9.524618958581223e-06, "loss": 0.6656, "step": 84040 }, { "epoch": 14.286928437871834, "grad_norm": 13.882790565490723, "learning_rate": 9.521785936880278e-06, "loss": 0.8581, "step": 84050 }, { "epoch": 14.288628250892401, "grad_norm": 15.232016563415527, "learning_rate": 9.51895291517933e-06, "loss": 0.8216, "step": 84060 }, { "epoch": 14.29032806391297, "grad_norm": 11.394916534423828, "learning_rate": 9.516119893478385e-06, "loss": 0.9312, "step": 84070 }, { "epoch": 14.292027876933536, "grad_norm": 14.419036865234375, "learning_rate": 9.513286871777439e-06, "loss": 0.8379, "step": 84080 }, { "epoch": 14.293727689954105, "grad_norm": 21.607280731201172, "learning_rate": 9.510453850076492e-06, "loss": 0.9558, "step": 84090 }, { "epoch": 14.295427502974674, "grad_norm": 10.858170509338379, "learning_rate": 9.507620828375546e-06, "loss": 0.8854, "step": 84100 }, { "epoch": 14.29712731599524, "grad_norm": 16.400217056274414, "learning_rate": 9.504787806674599e-06, "loss": 0.7281, "step": 84110 }, { "epoch": 14.298827129015809, "grad_norm": 14.881032943725586, "learning_rate": 9.501954784973653e-06, "loss": 0.818, "step": 84120 }, { "epoch": 14.300526942036376, "grad_norm": 14.017580032348633, "learning_rate": 9.499121763272706e-06, "loss": 0.6838, "step": 84130 }, { "epoch": 14.302226755056944, "grad_norm": 14.65023136138916, "learning_rate": 9.496288741571761e-06, "loss": 0.7899, "step": 84140 }, { "epoch": 14.303926568077511, "grad_norm": 14.981184005737305, "learning_rate": 9.493455719870813e-06, "loss": 1.1386, "step": 84150 }, { "epoch": 14.30562638109808, "grad_norm": 13.193531036376953, "learning_rate": 9.490622698169868e-06, "loss": 0.7985, "step": 84160 }, { "epoch": 14.307326194118646, "grad_norm": 15.281545639038086, "learning_rate": 9.487789676468922e-06, "loss": 0.8497, "step": 84170 }, { "epoch": 14.309026007139215, "grad_norm": 14.713544845581055, "learning_rate": 9.484956654767975e-06, "loss": 0.8182, "step": 84180 }, { "epoch": 14.310725820159782, "grad_norm": 16.206912994384766, "learning_rate": 9.482123633067029e-06, "loss": 0.8631, "step": 84190 }, { "epoch": 14.31242563318035, "grad_norm": 12.832676887512207, "learning_rate": 9.479290611366084e-06, "loss": 0.7805, "step": 84200 }, { "epoch": 14.314125446200919, "grad_norm": 11.30794906616211, "learning_rate": 9.476457589665136e-06, "loss": 0.8072, "step": 84210 }, { "epoch": 14.315825259221485, "grad_norm": 14.356650352478027, "learning_rate": 9.473624567964191e-06, "loss": 0.9312, "step": 84220 }, { "epoch": 14.317525072242054, "grad_norm": 12.6611967086792, "learning_rate": 9.470791546263245e-06, "loss": 0.9178, "step": 84230 }, { "epoch": 14.31922488526262, "grad_norm": 17.246416091918945, "learning_rate": 9.467958524562298e-06, "loss": 0.9256, "step": 84240 }, { "epoch": 14.32092469828319, "grad_norm": 14.123771667480469, "learning_rate": 9.465125502861352e-06, "loss": 0.9448, "step": 84250 }, { "epoch": 14.322624511303756, "grad_norm": 11.846830368041992, "learning_rate": 9.462292481160407e-06, "loss": 0.8528, "step": 84260 }, { "epoch": 14.324324324324325, "grad_norm": 16.029817581176758, "learning_rate": 9.459459459459459e-06, "loss": 0.8741, "step": 84270 }, { "epoch": 14.326024137344891, "grad_norm": 23.55037498474121, "learning_rate": 9.456626437758512e-06, "loss": 0.9127, "step": 84280 }, { "epoch": 14.32772395036546, "grad_norm": 11.978034973144531, "learning_rate": 9.453793416057568e-06, "loss": 0.9439, "step": 84290 }, { "epoch": 14.329423763386028, "grad_norm": 14.993334770202637, "learning_rate": 9.450960394356621e-06, "loss": 0.8845, "step": 84300 }, { "epoch": 14.331123576406595, "grad_norm": 17.930850982666016, "learning_rate": 9.448127372655675e-06, "loss": 0.8472, "step": 84310 }, { "epoch": 14.332823389427164, "grad_norm": 12.049192428588867, "learning_rate": 9.445294350954728e-06, "loss": 0.8642, "step": 84320 }, { "epoch": 14.33452320244773, "grad_norm": 14.612112045288086, "learning_rate": 9.442461329253783e-06, "loss": 0.9183, "step": 84330 }, { "epoch": 14.336223015468299, "grad_norm": 13.332186698913574, "learning_rate": 9.439628307552835e-06, "loss": 0.8668, "step": 84340 }, { "epoch": 14.337922828488866, "grad_norm": 11.597899436950684, "learning_rate": 9.43679528585189e-06, "loss": 0.7985, "step": 84350 }, { "epoch": 14.339622641509434, "grad_norm": 14.855598449707031, "learning_rate": 9.433962264150944e-06, "loss": 0.8302, "step": 84360 }, { "epoch": 14.341322454530001, "grad_norm": 14.567793846130371, "learning_rate": 9.431129242449997e-06, "loss": 0.9652, "step": 84370 }, { "epoch": 14.34302226755057, "grad_norm": 12.484006881713867, "learning_rate": 9.428296220749051e-06, "loss": 0.9603, "step": 84380 }, { "epoch": 14.344722080571136, "grad_norm": 14.863333702087402, "learning_rate": 9.425463199048106e-06, "loss": 0.8246, "step": 84390 }, { "epoch": 14.346421893591705, "grad_norm": 18.70992088317871, "learning_rate": 9.422630177347158e-06, "loss": 0.8794, "step": 84400 }, { "epoch": 14.348121706612273, "grad_norm": 10.96373462677002, "learning_rate": 9.419797155646213e-06, "loss": 0.8632, "step": 84410 }, { "epoch": 14.34982151963284, "grad_norm": 13.916786193847656, "learning_rate": 9.416964133945267e-06, "loss": 0.9995, "step": 84420 }, { "epoch": 14.351521332653409, "grad_norm": 12.56200122833252, "learning_rate": 9.41413111224432e-06, "loss": 1.0384, "step": 84430 }, { "epoch": 14.353221145673976, "grad_norm": 13.185583114624023, "learning_rate": 9.411298090543374e-06, "loss": 0.8842, "step": 84440 }, { "epoch": 14.354920958694544, "grad_norm": 13.230549812316895, "learning_rate": 9.408465068842427e-06, "loss": 0.9307, "step": 84450 }, { "epoch": 14.35662077171511, "grad_norm": 14.557941436767578, "learning_rate": 9.405632047141481e-06, "loss": 0.6897, "step": 84460 }, { "epoch": 14.35832058473568, "grad_norm": 17.428203582763672, "learning_rate": 9.402799025440534e-06, "loss": 0.9235, "step": 84470 }, { "epoch": 14.360020397756246, "grad_norm": 14.809011459350586, "learning_rate": 9.39996600373959e-06, "loss": 0.8729, "step": 84480 }, { "epoch": 14.361720210776815, "grad_norm": 14.838473320007324, "learning_rate": 9.397132982038641e-06, "loss": 1.0043, "step": 84490 }, { "epoch": 14.363420023797381, "grad_norm": 12.440117835998535, "learning_rate": 9.394299960337697e-06, "loss": 0.8758, "step": 84500 }, { "epoch": 14.36511983681795, "grad_norm": 14.961898803710938, "learning_rate": 9.39146693863675e-06, "loss": 0.9265, "step": 84510 }, { "epoch": 14.366819649838519, "grad_norm": 14.448760032653809, "learning_rate": 9.388633916935804e-06, "loss": 0.6924, "step": 84520 }, { "epoch": 14.368519462859085, "grad_norm": 14.886610984802246, "learning_rate": 9.385800895234857e-06, "loss": 0.7878, "step": 84530 }, { "epoch": 14.370219275879654, "grad_norm": 12.945816040039062, "learning_rate": 9.382967873533912e-06, "loss": 0.7542, "step": 84540 }, { "epoch": 14.37191908890022, "grad_norm": 10.31100082397461, "learning_rate": 9.380134851832964e-06, "loss": 0.8754, "step": 84550 }, { "epoch": 14.37361890192079, "grad_norm": 11.904182434082031, "learning_rate": 9.37730183013202e-06, "loss": 0.8325, "step": 84560 }, { "epoch": 14.375318714941356, "grad_norm": 11.595678329467773, "learning_rate": 9.374468808431073e-06, "loss": 0.7664, "step": 84570 }, { "epoch": 14.377018527961924, "grad_norm": 12.650553703308105, "learning_rate": 9.371635786730127e-06, "loss": 0.7087, "step": 84580 }, { "epoch": 14.378718340982491, "grad_norm": 13.672147750854492, "learning_rate": 9.36880276502918e-06, "loss": 0.8438, "step": 84590 }, { "epoch": 14.38041815400306, "grad_norm": 12.105737686157227, "learning_rate": 9.365969743328235e-06, "loss": 0.8072, "step": 84600 }, { "epoch": 14.382117967023627, "grad_norm": 14.169055938720703, "learning_rate": 9.363136721627287e-06, "loss": 0.8169, "step": 84610 }, { "epoch": 14.383817780044195, "grad_norm": 11.801485061645508, "learning_rate": 9.360303699926342e-06, "loss": 0.8779, "step": 84620 }, { "epoch": 14.385517593064764, "grad_norm": 10.448821067810059, "learning_rate": 9.357470678225396e-06, "loss": 0.65, "step": 84630 }, { "epoch": 14.38721740608533, "grad_norm": 9.204331398010254, "learning_rate": 9.354637656524448e-06, "loss": 0.8261, "step": 84640 }, { "epoch": 14.388917219105899, "grad_norm": 9.733353614807129, "learning_rate": 9.351804634823503e-06, "loss": 0.9077, "step": 84650 }, { "epoch": 14.390617032126466, "grad_norm": 14.468692779541016, "learning_rate": 9.348971613122556e-06, "loss": 0.9454, "step": 84660 }, { "epoch": 14.392316845147034, "grad_norm": 15.92210578918457, "learning_rate": 9.346138591421612e-06, "loss": 0.9824, "step": 84670 }, { "epoch": 14.394016658167601, "grad_norm": 14.238725662231445, "learning_rate": 9.343305569720663e-06, "loss": 0.8743, "step": 84680 }, { "epoch": 14.39571647118817, "grad_norm": 16.651811599731445, "learning_rate": 9.340472548019719e-06, "loss": 0.7856, "step": 84690 }, { "epoch": 14.397416284208736, "grad_norm": 11.863250732421875, "learning_rate": 9.337639526318772e-06, "loss": 0.7831, "step": 84700 }, { "epoch": 14.399116097229305, "grad_norm": 12.761455535888672, "learning_rate": 9.334806504617826e-06, "loss": 0.9311, "step": 84710 }, { "epoch": 14.400815910249872, "grad_norm": 33.29697799682617, "learning_rate": 9.33197348291688e-06, "loss": 0.8872, "step": 84720 }, { "epoch": 14.40251572327044, "grad_norm": 13.788006782531738, "learning_rate": 9.329140461215934e-06, "loss": 0.6809, "step": 84730 }, { "epoch": 14.404215536291009, "grad_norm": 13.831216812133789, "learning_rate": 9.326307439514986e-06, "loss": 0.7213, "step": 84740 }, { "epoch": 14.405915349311575, "grad_norm": 8.530976295471191, "learning_rate": 9.323474417814041e-06, "loss": 0.7777, "step": 84750 }, { "epoch": 14.407615162332144, "grad_norm": 15.650282859802246, "learning_rate": 9.320641396113095e-06, "loss": 0.9581, "step": 84760 }, { "epoch": 14.40931497535271, "grad_norm": 12.339841842651367, "learning_rate": 9.317808374412149e-06, "loss": 0.8173, "step": 84770 }, { "epoch": 14.41101478837328, "grad_norm": 12.926785469055176, "learning_rate": 9.314975352711202e-06, "loss": 0.8962, "step": 84780 }, { "epoch": 14.412714601393846, "grad_norm": 12.19104290008545, "learning_rate": 9.312142331010257e-06, "loss": 0.8746, "step": 84790 }, { "epoch": 14.414414414414415, "grad_norm": 17.761289596557617, "learning_rate": 9.309309309309309e-06, "loss": 0.7373, "step": 84800 }, { "epoch": 14.416114227434981, "grad_norm": 17.316423416137695, "learning_rate": 9.306476287608363e-06, "loss": 0.7943, "step": 84810 }, { "epoch": 14.41781404045555, "grad_norm": 14.135146141052246, "learning_rate": 9.303643265907418e-06, "loss": 0.9199, "step": 84820 }, { "epoch": 14.419513853476118, "grad_norm": 21.26985740661621, "learning_rate": 9.30081024420647e-06, "loss": 0.8298, "step": 84830 }, { "epoch": 14.421213666496685, "grad_norm": 23.036108016967773, "learning_rate": 9.297977222505525e-06, "loss": 0.8171, "step": 84840 }, { "epoch": 14.422913479517254, "grad_norm": 13.882655143737793, "learning_rate": 9.295144200804578e-06, "loss": 0.7058, "step": 84850 }, { "epoch": 14.42461329253782, "grad_norm": 16.632427215576172, "learning_rate": 9.292311179103632e-06, "loss": 0.8386, "step": 84860 }, { "epoch": 14.426313105558389, "grad_norm": 19.079971313476562, "learning_rate": 9.289478157402685e-06, "loss": 0.8528, "step": 84870 }, { "epoch": 14.428012918578956, "grad_norm": 13.68086051940918, "learning_rate": 9.28664513570174e-06, "loss": 0.7618, "step": 84880 }, { "epoch": 14.429712731599524, "grad_norm": 13.763449668884277, "learning_rate": 9.283812114000792e-06, "loss": 0.9302, "step": 84890 }, { "epoch": 14.431412544620091, "grad_norm": 17.485118865966797, "learning_rate": 9.280979092299848e-06, "loss": 0.8312, "step": 84900 }, { "epoch": 14.43311235764066, "grad_norm": 15.163558006286621, "learning_rate": 9.278146070598901e-06, "loss": 0.7315, "step": 84910 }, { "epoch": 14.434812170661226, "grad_norm": 14.939255714416504, "learning_rate": 9.275313048897955e-06, "loss": 0.7502, "step": 84920 }, { "epoch": 14.436511983681795, "grad_norm": 14.87673568725586, "learning_rate": 9.272480027197008e-06, "loss": 0.8275, "step": 84930 }, { "epoch": 14.438211796702364, "grad_norm": 10.55141830444336, "learning_rate": 9.269647005496063e-06, "loss": 0.7098, "step": 84940 }, { "epoch": 14.43991160972293, "grad_norm": 11.720373153686523, "learning_rate": 9.266813983795115e-06, "loss": 0.9095, "step": 84950 }, { "epoch": 14.441611422743499, "grad_norm": 15.414345741271973, "learning_rate": 9.26398096209417e-06, "loss": 0.931, "step": 84960 }, { "epoch": 14.443311235764066, "grad_norm": 15.214606285095215, "learning_rate": 9.261147940393224e-06, "loss": 0.7623, "step": 84970 }, { "epoch": 14.445011048784634, "grad_norm": 15.93912410736084, "learning_rate": 9.258314918692276e-06, "loss": 0.832, "step": 84980 }, { "epoch": 14.446710861805201, "grad_norm": 18.04694175720215, "learning_rate": 9.255481896991331e-06, "loss": 1.0295, "step": 84990 }, { "epoch": 14.44841067482577, "grad_norm": 21.31618309020996, "learning_rate": 9.252648875290385e-06, "loss": 0.8806, "step": 85000 }, { "epoch": 14.450110487846336, "grad_norm": 10.850683212280273, "learning_rate": 9.249815853589438e-06, "loss": 0.7426, "step": 85010 }, { "epoch": 14.451810300866905, "grad_norm": 12.883768081665039, "learning_rate": 9.246982831888492e-06, "loss": 0.8491, "step": 85020 }, { "epoch": 14.453510113887472, "grad_norm": 15.080554008483887, "learning_rate": 9.244149810187547e-06, "loss": 0.8709, "step": 85030 }, { "epoch": 14.45520992690804, "grad_norm": 12.867774963378906, "learning_rate": 9.2413167884866e-06, "loss": 0.8703, "step": 85040 }, { "epoch": 14.456909739928609, "grad_norm": 24.072586059570312, "learning_rate": 9.238483766785654e-06, "loss": 0.9084, "step": 85050 }, { "epoch": 14.458609552949175, "grad_norm": 9.265376091003418, "learning_rate": 9.235650745084707e-06, "loss": 0.8056, "step": 85060 }, { "epoch": 14.460309365969744, "grad_norm": 13.26463794708252, "learning_rate": 9.232817723383763e-06, "loss": 0.74, "step": 85070 }, { "epoch": 14.46200917899031, "grad_norm": 11.532594680786133, "learning_rate": 9.229984701682814e-06, "loss": 0.7703, "step": 85080 }, { "epoch": 14.46370899201088, "grad_norm": 20.275711059570312, "learning_rate": 9.22715167998187e-06, "loss": 0.8911, "step": 85090 }, { "epoch": 14.465408805031446, "grad_norm": 13.714929580688477, "learning_rate": 9.224318658280923e-06, "loss": 0.9688, "step": 85100 }, { "epoch": 14.467108618052015, "grad_norm": 12.907546043395996, "learning_rate": 9.221485636579977e-06, "loss": 0.8978, "step": 85110 }, { "epoch": 14.468808431072581, "grad_norm": 16.669702529907227, "learning_rate": 9.21865261487903e-06, "loss": 0.7428, "step": 85120 }, { "epoch": 14.47050824409315, "grad_norm": 18.150468826293945, "learning_rate": 9.215819593178085e-06, "loss": 0.9601, "step": 85130 }, { "epoch": 14.472208057113718, "grad_norm": 12.200358390808105, "learning_rate": 9.212986571477137e-06, "loss": 0.8312, "step": 85140 }, { "epoch": 14.473907870134285, "grad_norm": 14.8934326171875, "learning_rate": 9.21015354977619e-06, "loss": 0.8423, "step": 85150 }, { "epoch": 14.475607683154854, "grad_norm": 10.867552757263184, "learning_rate": 9.207320528075246e-06, "loss": 0.6967, "step": 85160 }, { "epoch": 14.47730749617542, "grad_norm": 16.380889892578125, "learning_rate": 9.204487506374298e-06, "loss": 0.7865, "step": 85170 }, { "epoch": 14.479007309195989, "grad_norm": 16.503568649291992, "learning_rate": 9.201654484673353e-06, "loss": 0.9978, "step": 85180 }, { "epoch": 14.480707122216556, "grad_norm": 15.941771507263184, "learning_rate": 9.198821462972407e-06, "loss": 1.0369, "step": 85190 }, { "epoch": 14.482406935237124, "grad_norm": 15.281553268432617, "learning_rate": 9.19598844127146e-06, "loss": 0.8227, "step": 85200 }, { "epoch": 14.484106748257691, "grad_norm": 14.32272720336914, "learning_rate": 9.193155419570514e-06, "loss": 0.864, "step": 85210 }, { "epoch": 14.48580656127826, "grad_norm": 11.576541900634766, "learning_rate": 9.190322397869569e-06, "loss": 0.8908, "step": 85220 }, { "epoch": 14.487506374298826, "grad_norm": 8.796948432922363, "learning_rate": 9.18748937616862e-06, "loss": 0.8571, "step": 85230 }, { "epoch": 14.489206187319395, "grad_norm": 12.963590621948242, "learning_rate": 9.184656354467676e-06, "loss": 0.7579, "step": 85240 }, { "epoch": 14.490906000339963, "grad_norm": 12.915694236755371, "learning_rate": 9.18182333276673e-06, "loss": 0.762, "step": 85250 }, { "epoch": 14.49260581336053, "grad_norm": 14.83443832397461, "learning_rate": 9.178990311065783e-06, "loss": 0.8502, "step": 85260 }, { "epoch": 14.494305626381099, "grad_norm": 15.390841484069824, "learning_rate": 9.176157289364836e-06, "loss": 0.7924, "step": 85270 }, { "epoch": 14.496005439401666, "grad_norm": 13.627537727355957, "learning_rate": 9.173324267663892e-06, "loss": 0.8242, "step": 85280 }, { "epoch": 14.497705252422234, "grad_norm": 14.124402046203613, "learning_rate": 9.170491245962943e-06, "loss": 0.8111, "step": 85290 }, { "epoch": 14.4994050654428, "grad_norm": 11.817280769348145, "learning_rate": 9.167658224261999e-06, "loss": 0.8113, "step": 85300 }, { "epoch": 14.50110487846337, "grad_norm": 12.125370025634766, "learning_rate": 9.164825202561052e-06, "loss": 0.7805, "step": 85310 }, { "epoch": 14.502804691483936, "grad_norm": 13.762063026428223, "learning_rate": 9.161992180860104e-06, "loss": 0.9605, "step": 85320 }, { "epoch": 14.504504504504505, "grad_norm": 14.939828872680664, "learning_rate": 9.15915915915916e-06, "loss": 0.7865, "step": 85330 }, { "epoch": 14.506204317525071, "grad_norm": 14.657877922058105, "learning_rate": 9.156326137458213e-06, "loss": 0.7115, "step": 85340 }, { "epoch": 14.50790413054564, "grad_norm": 35.202205657958984, "learning_rate": 9.153493115757266e-06, "loss": 0.8242, "step": 85350 }, { "epoch": 14.509603943566209, "grad_norm": 11.194051742553711, "learning_rate": 9.15066009405632e-06, "loss": 0.9396, "step": 85360 }, { "epoch": 14.511303756586775, "grad_norm": 21.726150512695312, "learning_rate": 9.147827072355375e-06, "loss": 1.0666, "step": 85370 }, { "epoch": 14.513003569607344, "grad_norm": 15.074999809265137, "learning_rate": 9.144994050654427e-06, "loss": 1.1677, "step": 85380 }, { "epoch": 14.51470338262791, "grad_norm": 9.580687522888184, "learning_rate": 9.142161028953482e-06, "loss": 0.7983, "step": 85390 }, { "epoch": 14.51640319564848, "grad_norm": 11.480216026306152, "learning_rate": 9.139328007252536e-06, "loss": 0.9609, "step": 85400 }, { "epoch": 14.518103008669046, "grad_norm": 16.065467834472656, "learning_rate": 9.13649498555159e-06, "loss": 0.7854, "step": 85410 }, { "epoch": 14.519802821689614, "grad_norm": 14.395238876342773, "learning_rate": 9.133661963850643e-06, "loss": 0.8104, "step": 85420 }, { "epoch": 14.521502634710181, "grad_norm": 16.201162338256836, "learning_rate": 9.130828942149698e-06, "loss": 0.9102, "step": 85430 }, { "epoch": 14.52320244773075, "grad_norm": 18.729211807250977, "learning_rate": 9.127995920448751e-06, "loss": 0.8717, "step": 85440 }, { "epoch": 14.524902260751318, "grad_norm": 12.786539077758789, "learning_rate": 9.125162898747805e-06, "loss": 0.82, "step": 85450 }, { "epoch": 14.526602073771885, "grad_norm": 13.409319877624512, "learning_rate": 9.122329877046858e-06, "loss": 0.7572, "step": 85460 }, { "epoch": 14.528301886792454, "grad_norm": 15.619871139526367, "learning_rate": 9.119496855345914e-06, "loss": 0.9001, "step": 85470 }, { "epoch": 14.53000169981302, "grad_norm": 16.31678009033203, "learning_rate": 9.116663833644965e-06, "loss": 0.9038, "step": 85480 }, { "epoch": 14.531701512833589, "grad_norm": 12.938547134399414, "learning_rate": 9.113830811944019e-06, "loss": 0.837, "step": 85490 }, { "epoch": 14.533401325854156, "grad_norm": 24.375530242919922, "learning_rate": 9.110997790243074e-06, "loss": 0.9177, "step": 85500 }, { "epoch": 14.535101138874724, "grad_norm": 14.70201587677002, "learning_rate": 9.108164768542126e-06, "loss": 0.9282, "step": 85510 }, { "epoch": 14.536800951895291, "grad_norm": 21.750165939331055, "learning_rate": 9.105331746841181e-06, "loss": 0.951, "step": 85520 }, { "epoch": 14.53850076491586, "grad_norm": 14.859914779663086, "learning_rate": 9.102498725140235e-06, "loss": 0.7729, "step": 85530 }, { "epoch": 14.540200577936426, "grad_norm": 10.312244415283203, "learning_rate": 9.099665703439288e-06, "loss": 0.933, "step": 85540 }, { "epoch": 14.541900390956995, "grad_norm": 11.58414363861084, "learning_rate": 9.096832681738342e-06, "loss": 0.9213, "step": 85550 }, { "epoch": 14.543600203977562, "grad_norm": 12.688789367675781, "learning_rate": 9.093999660037397e-06, "loss": 0.7252, "step": 85560 }, { "epoch": 14.54530001699813, "grad_norm": 88.76199340820312, "learning_rate": 9.091166638336449e-06, "loss": 0.9859, "step": 85570 }, { "epoch": 14.546999830018699, "grad_norm": 16.924785614013672, "learning_rate": 9.088333616635504e-06, "loss": 0.8572, "step": 85580 }, { "epoch": 14.548699643039265, "grad_norm": 22.803903579711914, "learning_rate": 9.085500594934558e-06, "loss": 0.8839, "step": 85590 }, { "epoch": 14.550399456059834, "grad_norm": 13.053563117980957, "learning_rate": 9.082667573233611e-06, "loss": 0.7962, "step": 85600 }, { "epoch": 14.5520992690804, "grad_norm": 15.73507022857666, "learning_rate": 9.079834551532665e-06, "loss": 0.9453, "step": 85610 }, { "epoch": 14.55379908210097, "grad_norm": 16.686729431152344, "learning_rate": 9.07700152983172e-06, "loss": 0.7707, "step": 85620 }, { "epoch": 14.555498895121536, "grad_norm": 16.435823440551758, "learning_rate": 9.074168508130772e-06, "loss": 0.9536, "step": 85630 }, { "epoch": 14.557198708142105, "grad_norm": 13.800699234008789, "learning_rate": 9.071335486429827e-06, "loss": 0.8565, "step": 85640 }, { "epoch": 14.558898521162671, "grad_norm": 13.686726570129395, "learning_rate": 9.06850246472888e-06, "loss": 0.9253, "step": 85650 }, { "epoch": 14.56059833418324, "grad_norm": 14.24581527709961, "learning_rate": 9.065669443027934e-06, "loss": 0.8963, "step": 85660 }, { "epoch": 14.562298147203808, "grad_norm": 10.603869438171387, "learning_rate": 9.062836421326987e-06, "loss": 0.8058, "step": 85670 }, { "epoch": 14.563997960224375, "grad_norm": 13.308588027954102, "learning_rate": 9.060003399626041e-06, "loss": 1.181, "step": 85680 }, { "epoch": 14.565697773244944, "grad_norm": 13.644857406616211, "learning_rate": 9.057170377925095e-06, "loss": 0.7182, "step": 85690 }, { "epoch": 14.56739758626551, "grad_norm": 11.2286376953125, "learning_rate": 9.054337356224148e-06, "loss": 0.8145, "step": 85700 }, { "epoch": 14.569097399286079, "grad_norm": 44.10304641723633, "learning_rate": 9.051504334523203e-06, "loss": 0.9881, "step": 85710 }, { "epoch": 14.570797212306646, "grad_norm": 14.938362121582031, "learning_rate": 9.048671312822255e-06, "loss": 0.8087, "step": 85720 }, { "epoch": 14.572497025327214, "grad_norm": 13.848048210144043, "learning_rate": 9.04583829112131e-06, "loss": 0.9547, "step": 85730 }, { "epoch": 14.574196838347781, "grad_norm": 14.727082252502441, "learning_rate": 9.043005269420364e-06, "loss": 0.9047, "step": 85740 }, { "epoch": 14.57589665136835, "grad_norm": 19.755191802978516, "learning_rate": 9.040172247719417e-06, "loss": 0.8298, "step": 85750 }, { "epoch": 14.577596464388916, "grad_norm": 13.794106483459473, "learning_rate": 9.037339226018471e-06, "loss": 0.8977, "step": 85760 }, { "epoch": 14.579296277409485, "grad_norm": 13.300180435180664, "learning_rate": 9.034506204317526e-06, "loss": 0.7687, "step": 85770 }, { "epoch": 14.580996090430054, "grad_norm": 13.133766174316406, "learning_rate": 9.03167318261658e-06, "loss": 1.0101, "step": 85780 }, { "epoch": 14.58269590345062, "grad_norm": 15.194700241088867, "learning_rate": 9.028840160915633e-06, "loss": 0.8794, "step": 85790 }, { "epoch": 14.584395716471189, "grad_norm": 21.537975311279297, "learning_rate": 9.026007139214687e-06, "loss": 0.8442, "step": 85800 }, { "epoch": 14.586095529491756, "grad_norm": 18.347213745117188, "learning_rate": 9.023174117513742e-06, "loss": 0.9211, "step": 85810 }, { "epoch": 14.587795342512324, "grad_norm": 17.084749221801758, "learning_rate": 9.020341095812794e-06, "loss": 0.819, "step": 85820 }, { "epoch": 14.589495155532891, "grad_norm": 15.718057632446289, "learning_rate": 9.017508074111849e-06, "loss": 0.877, "step": 85830 }, { "epoch": 14.59119496855346, "grad_norm": 18.810495376586914, "learning_rate": 9.014675052410902e-06, "loss": 0.6633, "step": 85840 }, { "epoch": 14.592894781574026, "grad_norm": 11.116935729980469, "learning_rate": 9.011842030709954e-06, "loss": 0.7884, "step": 85850 }, { "epoch": 14.594594594594595, "grad_norm": 14.924040794372559, "learning_rate": 9.00900900900901e-06, "loss": 0.8716, "step": 85860 }, { "epoch": 14.596294407615162, "grad_norm": 15.538790702819824, "learning_rate": 9.006175987308063e-06, "loss": 1.0261, "step": 85870 }, { "epoch": 14.59799422063573, "grad_norm": 13.389365196228027, "learning_rate": 9.003342965607117e-06, "loss": 1.0551, "step": 85880 }, { "epoch": 14.599694033656299, "grad_norm": 18.208236694335938, "learning_rate": 9.00050994390617e-06, "loss": 0.8868, "step": 85890 }, { "epoch": 14.601393846676865, "grad_norm": 14.046669960021973, "learning_rate": 8.997676922205225e-06, "loss": 0.7711, "step": 85900 }, { "epoch": 14.603093659697434, "grad_norm": 13.28429126739502, "learning_rate": 8.994843900504277e-06, "loss": 0.9264, "step": 85910 }, { "epoch": 14.604793472718, "grad_norm": 12.814798355102539, "learning_rate": 8.992010878803332e-06, "loss": 0.8902, "step": 85920 }, { "epoch": 14.60649328573857, "grad_norm": 20.714557647705078, "learning_rate": 8.989177857102386e-06, "loss": 0.9296, "step": 85930 }, { "epoch": 14.608193098759136, "grad_norm": 16.62053680419922, "learning_rate": 8.98634483540144e-06, "loss": 0.7444, "step": 85940 }, { "epoch": 14.609892911779705, "grad_norm": 18.62727165222168, "learning_rate": 8.983511813700493e-06, "loss": 0.7188, "step": 85950 }, { "epoch": 14.611592724800271, "grad_norm": 12.613456726074219, "learning_rate": 8.980678791999548e-06, "loss": 0.7646, "step": 85960 }, { "epoch": 14.61329253782084, "grad_norm": 20.842370986938477, "learning_rate": 8.9778457702986e-06, "loss": 0.7333, "step": 85970 }, { "epoch": 14.614992350841408, "grad_norm": 35.26179504394531, "learning_rate": 8.975012748597655e-06, "loss": 0.6616, "step": 85980 }, { "epoch": 14.616692163861975, "grad_norm": 11.608023643493652, "learning_rate": 8.972179726896709e-06, "loss": 0.9609, "step": 85990 }, { "epoch": 14.618391976882544, "grad_norm": 9.719439506530762, "learning_rate": 8.969346705195762e-06, "loss": 0.8104, "step": 86000 }, { "epoch": 14.62009178990311, "grad_norm": 13.207093238830566, "learning_rate": 8.966513683494816e-06, "loss": 0.8108, "step": 86010 }, { "epoch": 14.621791602923679, "grad_norm": 17.137981414794922, "learning_rate": 8.96368066179387e-06, "loss": 0.84, "step": 86020 }, { "epoch": 14.623491415944246, "grad_norm": 17.04666519165039, "learning_rate": 8.960847640092923e-06, "loss": 0.8352, "step": 86030 }, { "epoch": 14.625191228964814, "grad_norm": 13.203399658203125, "learning_rate": 8.958014618391976e-06, "loss": 0.9672, "step": 86040 }, { "epoch": 14.626891041985381, "grad_norm": 16.395618438720703, "learning_rate": 8.955181596691031e-06, "loss": 0.9047, "step": 86050 }, { "epoch": 14.62859085500595, "grad_norm": 15.304118156433105, "learning_rate": 8.952348574990083e-06, "loss": 0.9583, "step": 86060 }, { "epoch": 14.630290668026516, "grad_norm": 70.6300048828125, "learning_rate": 8.949515553289139e-06, "loss": 0.7869, "step": 86070 }, { "epoch": 14.631990481047085, "grad_norm": 13.127213478088379, "learning_rate": 8.946682531588192e-06, "loss": 0.9261, "step": 86080 }, { "epoch": 14.633690294067652, "grad_norm": 27.33932876586914, "learning_rate": 8.943849509887246e-06, "loss": 0.7942, "step": 86090 }, { "epoch": 14.63539010708822, "grad_norm": 12.457124710083008, "learning_rate": 8.941016488186299e-06, "loss": 0.8351, "step": 86100 }, { "epoch": 14.637089920108789, "grad_norm": 11.28414249420166, "learning_rate": 8.938183466485354e-06, "loss": 0.8039, "step": 86110 }, { "epoch": 14.638789733129355, "grad_norm": 18.079742431640625, "learning_rate": 8.935350444784408e-06, "loss": 0.8826, "step": 86120 }, { "epoch": 14.640489546149924, "grad_norm": 14.466354370117188, "learning_rate": 8.932517423083461e-06, "loss": 0.8277, "step": 86130 }, { "epoch": 14.64218935917049, "grad_norm": 15.201949119567871, "learning_rate": 8.929684401382515e-06, "loss": 0.8842, "step": 86140 }, { "epoch": 14.64388917219106, "grad_norm": 14.16309642791748, "learning_rate": 8.92685137968157e-06, "loss": 1.1401, "step": 86150 }, { "epoch": 14.645588985211626, "grad_norm": 10.140676498413086, "learning_rate": 8.924018357980622e-06, "loss": 0.8999, "step": 86160 }, { "epoch": 14.647288798232195, "grad_norm": 12.648844718933105, "learning_rate": 8.921185336279677e-06, "loss": 0.8725, "step": 86170 }, { "epoch": 14.648988611252761, "grad_norm": 41.14336395263672, "learning_rate": 8.91835231457873e-06, "loss": 0.927, "step": 86180 }, { "epoch": 14.65068842427333, "grad_norm": 22.729511260986328, "learning_rate": 8.915519292877782e-06, "loss": 0.8007, "step": 86190 }, { "epoch": 14.652388237293898, "grad_norm": 14.636481285095215, "learning_rate": 8.912686271176838e-06, "loss": 1.0642, "step": 86200 }, { "epoch": 14.654088050314465, "grad_norm": 16.62000846862793, "learning_rate": 8.909853249475891e-06, "loss": 0.826, "step": 86210 }, { "epoch": 14.655787863335034, "grad_norm": 11.700467109680176, "learning_rate": 8.907020227774945e-06, "loss": 0.6102, "step": 86220 }, { "epoch": 14.6574876763556, "grad_norm": 12.512511253356934, "learning_rate": 8.904187206073998e-06, "loss": 0.8799, "step": 86230 }, { "epoch": 14.659187489376169, "grad_norm": 14.054780006408691, "learning_rate": 8.901354184373053e-06, "loss": 0.6849, "step": 86240 }, { "epoch": 14.660887302396736, "grad_norm": 15.243498802185059, "learning_rate": 8.898521162672105e-06, "loss": 0.7152, "step": 86250 }, { "epoch": 14.662587115417304, "grad_norm": 13.792794227600098, "learning_rate": 8.89568814097116e-06, "loss": 0.8909, "step": 86260 }, { "epoch": 14.664286928437871, "grad_norm": 13.28856372833252, "learning_rate": 8.892855119270214e-06, "loss": 0.8921, "step": 86270 }, { "epoch": 14.66598674145844, "grad_norm": 11.912264823913574, "learning_rate": 8.890022097569268e-06, "loss": 1.054, "step": 86280 }, { "epoch": 14.667686554479006, "grad_norm": 23.732749938964844, "learning_rate": 8.887189075868321e-06, "loss": 0.7703, "step": 86290 }, { "epoch": 14.669386367499575, "grad_norm": 12.592512130737305, "learning_rate": 8.884356054167376e-06, "loss": 0.8853, "step": 86300 }, { "epoch": 14.671086180520144, "grad_norm": 38.640480041503906, "learning_rate": 8.881523032466428e-06, "loss": 1.0284, "step": 86310 }, { "epoch": 14.67278599354071, "grad_norm": 15.789416313171387, "learning_rate": 8.878690010765483e-06, "loss": 1.0272, "step": 86320 }, { "epoch": 14.674485806561279, "grad_norm": 12.96994686126709, "learning_rate": 8.875856989064537e-06, "loss": 0.7677, "step": 86330 }, { "epoch": 14.676185619581846, "grad_norm": 12.729997634887695, "learning_rate": 8.87302396736359e-06, "loss": 0.9056, "step": 86340 }, { "epoch": 14.677885432602414, "grad_norm": 24.5263614654541, "learning_rate": 8.870190945662644e-06, "loss": 0.9204, "step": 86350 }, { "epoch": 14.679585245622981, "grad_norm": 11.835152626037598, "learning_rate": 8.867357923961697e-06, "loss": 0.8321, "step": 86360 }, { "epoch": 14.68128505864355, "grad_norm": 9.9887056350708, "learning_rate": 8.864524902260751e-06, "loss": 0.8136, "step": 86370 }, { "epoch": 14.682984871664116, "grad_norm": 14.663159370422363, "learning_rate": 8.861691880559804e-06, "loss": 0.9603, "step": 86380 }, { "epoch": 14.684684684684685, "grad_norm": 20.255401611328125, "learning_rate": 8.85885885885886e-06, "loss": 0.9078, "step": 86390 }, { "epoch": 14.686384497705252, "grad_norm": 15.472257614135742, "learning_rate": 8.856025837157912e-06, "loss": 0.8437, "step": 86400 }, { "epoch": 14.68808431072582, "grad_norm": 19.83836555480957, "learning_rate": 8.853192815456967e-06, "loss": 0.9879, "step": 86410 }, { "epoch": 14.689784123746389, "grad_norm": 12.498658180236816, "learning_rate": 8.85035979375602e-06, "loss": 0.9027, "step": 86420 }, { "epoch": 14.691483936766955, "grad_norm": 12.966169357299805, "learning_rate": 8.847526772055074e-06, "loss": 0.9762, "step": 86430 }, { "epoch": 14.693183749787524, "grad_norm": 13.813613891601562, "learning_rate": 8.844693750354127e-06, "loss": 0.8415, "step": 86440 }, { "epoch": 14.69488356280809, "grad_norm": 14.795605659484863, "learning_rate": 8.841860728653183e-06, "loss": 0.8056, "step": 86450 }, { "epoch": 14.69658337582866, "grad_norm": 21.978296279907227, "learning_rate": 8.839027706952234e-06, "loss": 1.0955, "step": 86460 }, { "epoch": 14.698283188849226, "grad_norm": 11.779158592224121, "learning_rate": 8.83619468525129e-06, "loss": 1.0869, "step": 86470 }, { "epoch": 14.699983001869795, "grad_norm": 12.420978546142578, "learning_rate": 8.833361663550343e-06, "loss": 0.7352, "step": 86480 }, { "epoch": 14.701682814890361, "grad_norm": 17.58739471435547, "learning_rate": 8.830528641849398e-06, "loss": 0.8452, "step": 86490 }, { "epoch": 14.70338262791093, "grad_norm": 18.817913055419922, "learning_rate": 8.82769562014845e-06, "loss": 0.8108, "step": 86500 }, { "epoch": 14.705082440931498, "grad_norm": 12.961976051330566, "learning_rate": 8.824862598447505e-06, "loss": 0.7938, "step": 86510 }, { "epoch": 14.706782253952065, "grad_norm": 11.256193161010742, "learning_rate": 8.822029576746559e-06, "loss": 0.8545, "step": 86520 }, { "epoch": 14.708482066972634, "grad_norm": 11.70174503326416, "learning_rate": 8.81919655504561e-06, "loss": 0.8562, "step": 86530 }, { "epoch": 14.7101818799932, "grad_norm": 53.276668548583984, "learning_rate": 8.816363533344666e-06, "loss": 0.7564, "step": 86540 }, { "epoch": 14.711881693013769, "grad_norm": 17.75122833251953, "learning_rate": 8.81353051164372e-06, "loss": 0.8165, "step": 86550 }, { "epoch": 14.713581506034336, "grad_norm": 16.53954315185547, "learning_rate": 8.810697489942773e-06, "loss": 1.0885, "step": 86560 }, { "epoch": 14.715281319054904, "grad_norm": 18.237871170043945, "learning_rate": 8.807864468241826e-06, "loss": 0.7704, "step": 86570 }, { "epoch": 14.716981132075471, "grad_norm": 16.048492431640625, "learning_rate": 8.805031446540882e-06, "loss": 0.6448, "step": 86580 }, { "epoch": 14.71868094509604, "grad_norm": 12.665371894836426, "learning_rate": 8.802198424839934e-06, "loss": 0.9116, "step": 86590 }, { "epoch": 14.720380758116606, "grad_norm": 34.76811599731445, "learning_rate": 8.799365403138989e-06, "loss": 0.8959, "step": 86600 }, { "epoch": 14.722080571137175, "grad_norm": 10.893799781799316, "learning_rate": 8.796532381438042e-06, "loss": 0.9906, "step": 86610 }, { "epoch": 14.723780384157742, "grad_norm": 11.567313194274902, "learning_rate": 8.793699359737096e-06, "loss": 0.7435, "step": 86620 }, { "epoch": 14.72548019717831, "grad_norm": 17.8502140045166, "learning_rate": 8.79086633803615e-06, "loss": 0.9351, "step": 86630 }, { "epoch": 14.727180010198879, "grad_norm": 11.306095123291016, "learning_rate": 8.788033316335205e-06, "loss": 0.6993, "step": 86640 }, { "epoch": 14.728879823219446, "grad_norm": 14.556936264038086, "learning_rate": 8.785200294634256e-06, "loss": 0.8987, "step": 86650 }, { "epoch": 14.730579636240014, "grad_norm": 15.109381675720215, "learning_rate": 8.782367272933312e-06, "loss": 0.9209, "step": 86660 }, { "epoch": 14.73227944926058, "grad_norm": 14.640939712524414, "learning_rate": 8.779534251232365e-06, "loss": 1.0116, "step": 86670 }, { "epoch": 14.73397926228115, "grad_norm": 44.1358642578125, "learning_rate": 8.776701229531419e-06, "loss": 0.8036, "step": 86680 }, { "epoch": 14.735679075301716, "grad_norm": 13.55788516998291, "learning_rate": 8.773868207830472e-06, "loss": 0.6877, "step": 86690 }, { "epoch": 14.737378888322285, "grad_norm": 10.925374984741211, "learning_rate": 8.771035186129526e-06, "loss": 0.8543, "step": 86700 }, { "epoch": 14.739078701342851, "grad_norm": 11.396653175354004, "learning_rate": 8.76820216442858e-06, "loss": 0.905, "step": 86710 }, { "epoch": 14.74077851436342, "grad_norm": 17.630413055419922, "learning_rate": 8.765369142727633e-06, "loss": 0.7159, "step": 86720 }, { "epoch": 14.742478327383989, "grad_norm": 12.592290878295898, "learning_rate": 8.762536121026688e-06, "loss": 0.7748, "step": 86730 }, { "epoch": 14.744178140404555, "grad_norm": 19.41526222229004, "learning_rate": 8.75970309932574e-06, "loss": 0.8381, "step": 86740 }, { "epoch": 14.745877953425124, "grad_norm": 11.97326374053955, "learning_rate": 8.756870077624795e-06, "loss": 0.93, "step": 86750 }, { "epoch": 14.74757776644569, "grad_norm": 16.227041244506836, "learning_rate": 8.754037055923848e-06, "loss": 0.8362, "step": 86760 }, { "epoch": 14.74927757946626, "grad_norm": 11.94321060180664, "learning_rate": 8.751204034222902e-06, "loss": 0.7673, "step": 86770 }, { "epoch": 14.750977392486826, "grad_norm": 14.3949613571167, "learning_rate": 8.748371012521956e-06, "loss": 0.858, "step": 86780 }, { "epoch": 14.752677205507394, "grad_norm": 13.872739791870117, "learning_rate": 8.74553799082101e-06, "loss": 0.7215, "step": 86790 }, { "epoch": 14.754377018527961, "grad_norm": 11.832123756408691, "learning_rate": 8.742704969120063e-06, "loss": 0.9824, "step": 86800 }, { "epoch": 14.75607683154853, "grad_norm": 68.45249938964844, "learning_rate": 8.739871947419118e-06, "loss": 0.9078, "step": 86810 }, { "epoch": 14.757776644569098, "grad_norm": 13.47023868560791, "learning_rate": 8.737038925718171e-06, "loss": 0.9845, "step": 86820 }, { "epoch": 14.759476457589665, "grad_norm": 15.223406791687012, "learning_rate": 8.734205904017225e-06, "loss": 0.8408, "step": 86830 }, { "epoch": 14.761176270610234, "grad_norm": 19.42745590209961, "learning_rate": 8.731372882316278e-06, "loss": 1.0553, "step": 86840 }, { "epoch": 14.7628760836308, "grad_norm": 18.73438262939453, "learning_rate": 8.728539860615334e-06, "loss": 0.9348, "step": 86850 }, { "epoch": 14.764575896651369, "grad_norm": 13.564610481262207, "learning_rate": 8.725706838914387e-06, "loss": 0.8473, "step": 86860 }, { "epoch": 14.766275709671936, "grad_norm": 10.948921203613281, "learning_rate": 8.72287381721344e-06, "loss": 0.7734, "step": 86870 }, { "epoch": 14.767975522692504, "grad_norm": 15.215028762817383, "learning_rate": 8.720040795512494e-06, "loss": 0.874, "step": 86880 }, { "epoch": 14.769675335713071, "grad_norm": 14.038854598999023, "learning_rate": 8.717207773811548e-06, "loss": 0.9475, "step": 86890 }, { "epoch": 14.77137514873364, "grad_norm": 14.495613098144531, "learning_rate": 8.714374752110601e-06, "loss": 0.8265, "step": 86900 }, { "epoch": 14.773074961754206, "grad_norm": 18.95753288269043, "learning_rate": 8.711541730409655e-06, "loss": 1.0202, "step": 86910 }, { "epoch": 14.774774774774775, "grad_norm": 16.550199508666992, "learning_rate": 8.70870870870871e-06, "loss": 0.8078, "step": 86920 }, { "epoch": 14.776474587795342, "grad_norm": 11.497336387634277, "learning_rate": 8.705875687007762e-06, "loss": 0.8871, "step": 86930 }, { "epoch": 14.77817440081591, "grad_norm": 11.790806770324707, "learning_rate": 8.703042665306817e-06, "loss": 0.8632, "step": 86940 }, { "epoch": 14.779874213836479, "grad_norm": 13.444690704345703, "learning_rate": 8.70020964360587e-06, "loss": 0.8793, "step": 86950 }, { "epoch": 14.781574026857045, "grad_norm": 15.045082092285156, "learning_rate": 8.697376621904924e-06, "loss": 0.9013, "step": 86960 }, { "epoch": 14.783273839877614, "grad_norm": 13.466248512268066, "learning_rate": 8.694543600203978e-06, "loss": 0.8175, "step": 86970 }, { "epoch": 14.78497365289818, "grad_norm": 33.242645263671875, "learning_rate": 8.691710578503033e-06, "loss": 0.8804, "step": 86980 }, { "epoch": 14.78667346591875, "grad_norm": 11.116703987121582, "learning_rate": 8.688877556802085e-06, "loss": 0.8349, "step": 86990 }, { "epoch": 14.788373278939316, "grad_norm": 10.305861473083496, "learning_rate": 8.68604453510114e-06, "loss": 0.7388, "step": 87000 }, { "epoch": 14.790073091959885, "grad_norm": 19.048112869262695, "learning_rate": 8.683211513400193e-06, "loss": 0.9017, "step": 87010 }, { "epoch": 14.791772904980451, "grad_norm": 16.912691116333008, "learning_rate": 8.680378491699247e-06, "loss": 0.8944, "step": 87020 }, { "epoch": 14.79347271800102, "grad_norm": 16.237733840942383, "learning_rate": 8.6775454699983e-06, "loss": 0.8134, "step": 87030 }, { "epoch": 14.795172531021588, "grad_norm": 14.27161693572998, "learning_rate": 8.674712448297356e-06, "loss": 0.9269, "step": 87040 }, { "epoch": 14.796872344042155, "grad_norm": 10.463380813598633, "learning_rate": 8.671879426596407e-06, "loss": 0.859, "step": 87050 }, { "epoch": 14.798572157062724, "grad_norm": 13.46951961517334, "learning_rate": 8.669046404895461e-06, "loss": 0.8831, "step": 87060 }, { "epoch": 14.80027197008329, "grad_norm": 11.37551212310791, "learning_rate": 8.666213383194516e-06, "loss": 0.7831, "step": 87070 }, { "epoch": 14.801971783103859, "grad_norm": 11.806388854980469, "learning_rate": 8.663380361493568e-06, "loss": 0.9787, "step": 87080 }, { "epoch": 14.803671596124426, "grad_norm": 17.938812255859375, "learning_rate": 8.660547339792623e-06, "loss": 1.267, "step": 87090 }, { "epoch": 14.805371409144994, "grad_norm": 11.950303077697754, "learning_rate": 8.657714318091677e-06, "loss": 0.9436, "step": 87100 }, { "epoch": 14.807071222165561, "grad_norm": 17.711950302124023, "learning_rate": 8.65488129639073e-06, "loss": 0.9108, "step": 87110 }, { "epoch": 14.80877103518613, "grad_norm": 13.583677291870117, "learning_rate": 8.652048274689784e-06, "loss": 0.8396, "step": 87120 }, { "epoch": 14.810470848206696, "grad_norm": 17.741640090942383, "learning_rate": 8.649215252988839e-06, "loss": 0.8204, "step": 87130 }, { "epoch": 14.812170661227265, "grad_norm": 11.397531509399414, "learning_rate": 8.64638223128789e-06, "loss": 1.0108, "step": 87140 }, { "epoch": 14.813870474247834, "grad_norm": 12.614123344421387, "learning_rate": 8.643549209586946e-06, "loss": 0.696, "step": 87150 }, { "epoch": 14.8155702872684, "grad_norm": 14.3191556930542, "learning_rate": 8.640716187886e-06, "loss": 0.9436, "step": 87160 }, { "epoch": 14.817270100288969, "grad_norm": 14.56442642211914, "learning_rate": 8.637883166185053e-06, "loss": 0.8834, "step": 87170 }, { "epoch": 14.818969913309536, "grad_norm": 22.170034408569336, "learning_rate": 8.635050144484107e-06, "loss": 1.1285, "step": 87180 }, { "epoch": 14.820669726330104, "grad_norm": 14.996209144592285, "learning_rate": 8.632217122783162e-06, "loss": 0.9116, "step": 87190 }, { "epoch": 14.822369539350671, "grad_norm": 21.877025604248047, "learning_rate": 8.629384101082214e-06, "loss": 0.7847, "step": 87200 }, { "epoch": 14.82406935237124, "grad_norm": 12.563082695007324, "learning_rate": 8.626551079381269e-06, "loss": 0.8531, "step": 87210 }, { "epoch": 14.825769165391806, "grad_norm": 16.191749572753906, "learning_rate": 8.623718057680322e-06, "loss": 0.8461, "step": 87220 }, { "epoch": 14.827468978412375, "grad_norm": 10.78868579864502, "learning_rate": 8.620885035979376e-06, "loss": 0.8729, "step": 87230 }, { "epoch": 14.829168791432942, "grad_norm": 14.597407341003418, "learning_rate": 8.61805201427843e-06, "loss": 0.746, "step": 87240 }, { "epoch": 14.83086860445351, "grad_norm": 19.975250244140625, "learning_rate": 8.615218992577483e-06, "loss": 0.8499, "step": 87250 }, { "epoch": 14.832568417474079, "grad_norm": 11.803906440734863, "learning_rate": 8.612385970876538e-06, "loss": 0.8772, "step": 87260 }, { "epoch": 14.834268230494645, "grad_norm": 21.75547981262207, "learning_rate": 8.60955294917559e-06, "loss": 0.9098, "step": 87270 }, { "epoch": 14.835968043515214, "grad_norm": 14.817829132080078, "learning_rate": 8.606719927474645e-06, "loss": 0.8468, "step": 87280 }, { "epoch": 14.83766785653578, "grad_norm": 16.398984909057617, "learning_rate": 8.603886905773699e-06, "loss": 0.8703, "step": 87290 }, { "epoch": 14.83936766955635, "grad_norm": 21.528732299804688, "learning_rate": 8.601053884072752e-06, "loss": 0.7318, "step": 87300 }, { "epoch": 14.841067482576916, "grad_norm": 9.636054039001465, "learning_rate": 8.598220862371806e-06, "loss": 0.783, "step": 87310 }, { "epoch": 14.842767295597485, "grad_norm": 59.645843505859375, "learning_rate": 8.595387840670861e-06, "loss": 0.728, "step": 87320 }, { "epoch": 14.844467108618051, "grad_norm": 14.228190422058105, "learning_rate": 8.592554818969913e-06, "loss": 0.7627, "step": 87330 }, { "epoch": 14.84616692163862, "grad_norm": 14.15938663482666, "learning_rate": 8.589721797268968e-06, "loss": 0.8648, "step": 87340 }, { "epoch": 14.847866734659188, "grad_norm": 14.653132438659668, "learning_rate": 8.586888775568022e-06, "loss": 0.7178, "step": 87350 }, { "epoch": 14.849566547679755, "grad_norm": 10.229172706604004, "learning_rate": 8.584055753867075e-06, "loss": 0.9146, "step": 87360 }, { "epoch": 14.851266360700324, "grad_norm": 15.968432426452637, "learning_rate": 8.581222732166129e-06, "loss": 0.9219, "step": 87370 }, { "epoch": 14.85296617372089, "grad_norm": 11.611855506896973, "learning_rate": 8.578389710465184e-06, "loss": 0.7424, "step": 87380 }, { "epoch": 14.854665986741459, "grad_norm": 9.540007591247559, "learning_rate": 8.575556688764236e-06, "loss": 0.7797, "step": 87390 }, { "epoch": 14.856365799762026, "grad_norm": 15.137228965759277, "learning_rate": 8.572723667063289e-06, "loss": 0.8979, "step": 87400 }, { "epoch": 14.858065612782594, "grad_norm": 23.612607955932617, "learning_rate": 8.569890645362344e-06, "loss": 0.9103, "step": 87410 }, { "epoch": 14.859765425803161, "grad_norm": 13.15592098236084, "learning_rate": 8.567057623661396e-06, "loss": 0.8906, "step": 87420 }, { "epoch": 14.86146523882373, "grad_norm": 12.358973503112793, "learning_rate": 8.564224601960451e-06, "loss": 0.8778, "step": 87430 }, { "epoch": 14.863165051844296, "grad_norm": 14.691405296325684, "learning_rate": 8.561391580259505e-06, "loss": 0.7766, "step": 87440 }, { "epoch": 14.864864864864865, "grad_norm": 18.16875457763672, "learning_rate": 8.558558558558558e-06, "loss": 0.7724, "step": 87450 }, { "epoch": 14.866564677885432, "grad_norm": 15.340714454650879, "learning_rate": 8.555725536857612e-06, "loss": 0.8727, "step": 87460 }, { "epoch": 14.868264490906, "grad_norm": 14.152644157409668, "learning_rate": 8.552892515156667e-06, "loss": 1.0088, "step": 87470 }, { "epoch": 14.869964303926569, "grad_norm": 13.614863395690918, "learning_rate": 8.550059493455719e-06, "loss": 0.7996, "step": 87480 }, { "epoch": 14.871664116947136, "grad_norm": 10.7886381149292, "learning_rate": 8.547226471754774e-06, "loss": 0.9453, "step": 87490 }, { "epoch": 14.873363929967704, "grad_norm": 50.88521194458008, "learning_rate": 8.544393450053828e-06, "loss": 0.9114, "step": 87500 }, { "epoch": 14.87506374298827, "grad_norm": 10.527771949768066, "learning_rate": 8.541560428352881e-06, "loss": 0.9377, "step": 87510 }, { "epoch": 14.87676355600884, "grad_norm": 13.785600662231445, "learning_rate": 8.538727406651935e-06, "loss": 0.8164, "step": 87520 }, { "epoch": 14.878463369029406, "grad_norm": 17.309621810913086, "learning_rate": 8.53589438495099e-06, "loss": 0.8661, "step": 87530 }, { "epoch": 14.880163182049975, "grad_norm": 16.42005729675293, "learning_rate": 8.533061363250042e-06, "loss": 0.802, "step": 87540 }, { "epoch": 14.881862995070541, "grad_norm": 14.329578399658203, "learning_rate": 8.530228341549097e-06, "loss": 0.8052, "step": 87550 }, { "epoch": 14.88356280809111, "grad_norm": 13.2632474899292, "learning_rate": 8.52739531984815e-06, "loss": 0.8201, "step": 87560 }, { "epoch": 14.885262621111679, "grad_norm": 14.310626983642578, "learning_rate": 8.524562298147204e-06, "loss": 1.0082, "step": 87570 }, { "epoch": 14.886962434132245, "grad_norm": 13.768381118774414, "learning_rate": 8.521729276446258e-06, "loss": 1.0014, "step": 87580 }, { "epoch": 14.888662247152814, "grad_norm": 14.997482299804688, "learning_rate": 8.518896254745311e-06, "loss": 0.9107, "step": 87590 }, { "epoch": 14.89036206017338, "grad_norm": 11.871623039245605, "learning_rate": 8.516063233044366e-06, "loss": 0.6486, "step": 87600 }, { "epoch": 14.89206187319395, "grad_norm": 15.371241569519043, "learning_rate": 8.513230211343418e-06, "loss": 0.8413, "step": 87610 }, { "epoch": 14.893761686214516, "grad_norm": 18.515451431274414, "learning_rate": 8.510397189642473e-06, "loss": 0.7679, "step": 87620 }, { "epoch": 14.895461499235084, "grad_norm": 13.33566665649414, "learning_rate": 8.507564167941527e-06, "loss": 0.7212, "step": 87630 }, { "epoch": 14.897161312255651, "grad_norm": 18.706741333007812, "learning_rate": 8.50473114624058e-06, "loss": 0.9303, "step": 87640 }, { "epoch": 14.89886112527622, "grad_norm": 18.703004837036133, "learning_rate": 8.501898124539634e-06, "loss": 0.899, "step": 87650 }, { "epoch": 14.900560938296788, "grad_norm": 21.240318298339844, "learning_rate": 8.49906510283869e-06, "loss": 1.0756, "step": 87660 }, { "epoch": 14.902260751317355, "grad_norm": 14.717246055603027, "learning_rate": 8.496232081137741e-06, "loss": 0.9177, "step": 87670 }, { "epoch": 14.903960564337924, "grad_norm": 14.008061408996582, "learning_rate": 8.493399059436796e-06, "loss": 0.826, "step": 87680 }, { "epoch": 14.90566037735849, "grad_norm": 13.007107734680176, "learning_rate": 8.49056603773585e-06, "loss": 1.0015, "step": 87690 }, { "epoch": 14.907360190379059, "grad_norm": 20.513307571411133, "learning_rate": 8.487733016034903e-06, "loss": 1.0894, "step": 87700 }, { "epoch": 14.909060003399626, "grad_norm": 12.344270706176758, "learning_rate": 8.484899994333957e-06, "loss": 0.8092, "step": 87710 }, { "epoch": 14.910759816420194, "grad_norm": 12.106184005737305, "learning_rate": 8.482066972633012e-06, "loss": 0.8197, "step": 87720 }, { "epoch": 14.912459629440761, "grad_norm": 15.80951976776123, "learning_rate": 8.479233950932064e-06, "loss": 0.8001, "step": 87730 }, { "epoch": 14.91415944246133, "grad_norm": 27.052818298339844, "learning_rate": 8.476400929231117e-06, "loss": 0.7346, "step": 87740 }, { "epoch": 14.915859255481896, "grad_norm": 11.306713104248047, "learning_rate": 8.473567907530173e-06, "loss": 0.8528, "step": 87750 }, { "epoch": 14.917559068502465, "grad_norm": 13.776017189025879, "learning_rate": 8.470734885829224e-06, "loss": 0.7918, "step": 87760 }, { "epoch": 14.919258881523032, "grad_norm": 16.460927963256836, "learning_rate": 8.46790186412828e-06, "loss": 0.9395, "step": 87770 }, { "epoch": 14.9209586945436, "grad_norm": 14.666077613830566, "learning_rate": 8.465068842427333e-06, "loss": 1.0962, "step": 87780 }, { "epoch": 14.922658507564169, "grad_norm": 11.25165843963623, "learning_rate": 8.462235820726387e-06, "loss": 0.9106, "step": 87790 }, { "epoch": 14.924358320584735, "grad_norm": 15.30734920501709, "learning_rate": 8.45940279902544e-06, "loss": 0.8071, "step": 87800 }, { "epoch": 14.926058133605304, "grad_norm": 11.780189514160156, "learning_rate": 8.456569777324495e-06, "loss": 1.1246, "step": 87810 }, { "epoch": 14.92775794662587, "grad_norm": 25.871347427368164, "learning_rate": 8.453736755623547e-06, "loss": 0.8641, "step": 87820 }, { "epoch": 14.92945775964644, "grad_norm": 14.696878433227539, "learning_rate": 8.450903733922602e-06, "loss": 0.7758, "step": 87830 }, { "epoch": 14.931157572667006, "grad_norm": 19.899564743041992, "learning_rate": 8.448070712221656e-06, "loss": 0.9695, "step": 87840 }, { "epoch": 14.932857385687575, "grad_norm": 14.415620803833008, "learning_rate": 8.44523769052071e-06, "loss": 0.7918, "step": 87850 }, { "epoch": 14.934557198708141, "grad_norm": 16.16461753845215, "learning_rate": 8.442404668819763e-06, "loss": 0.8254, "step": 87860 }, { "epoch": 14.93625701172871, "grad_norm": 13.260046005249023, "learning_rate": 8.439571647118818e-06, "loss": 0.8515, "step": 87870 }, { "epoch": 14.937956824749278, "grad_norm": 19.691192626953125, "learning_rate": 8.43673862541787e-06, "loss": 0.7842, "step": 87880 }, { "epoch": 14.939656637769845, "grad_norm": 16.359119415283203, "learning_rate": 8.433905603716925e-06, "loss": 0.8041, "step": 87890 }, { "epoch": 14.941356450790414, "grad_norm": 10.623122215270996, "learning_rate": 8.431072582015979e-06, "loss": 1.0006, "step": 87900 }, { "epoch": 14.94305626381098, "grad_norm": 12.844754219055176, "learning_rate": 8.42823956031503e-06, "loss": 0.8065, "step": 87910 }, { "epoch": 14.944756076831549, "grad_norm": 10.3287353515625, "learning_rate": 8.425406538614086e-06, "loss": 1.0839, "step": 87920 }, { "epoch": 14.946455889852116, "grad_norm": 12.46136474609375, "learning_rate": 8.42257351691314e-06, "loss": 0.9273, "step": 87930 }, { "epoch": 14.948155702872684, "grad_norm": 11.759668350219727, "learning_rate": 8.419740495212193e-06, "loss": 0.8532, "step": 87940 }, { "epoch": 14.949855515893251, "grad_norm": 12.269769668579102, "learning_rate": 8.416907473511246e-06, "loss": 0.7895, "step": 87950 }, { "epoch": 14.95155532891382, "grad_norm": 13.273245811462402, "learning_rate": 8.414074451810302e-06, "loss": 0.8669, "step": 87960 }, { "epoch": 14.953255141934386, "grad_norm": 11.061623573303223, "learning_rate": 8.411241430109355e-06, "loss": 0.9142, "step": 87970 }, { "epoch": 14.954954954954955, "grad_norm": 20.926374435424805, "learning_rate": 8.408408408408409e-06, "loss": 0.8533, "step": 87980 }, { "epoch": 14.956654767975524, "grad_norm": 16.502901077270508, "learning_rate": 8.405575386707462e-06, "loss": 0.8956, "step": 87990 }, { "epoch": 14.95835458099609, "grad_norm": 14.390917778015137, "learning_rate": 8.402742365006517e-06, "loss": 0.7804, "step": 88000 }, { "epoch": 14.960054394016659, "grad_norm": 17.60472869873047, "learning_rate": 8.39990934330557e-06, "loss": 0.8009, "step": 88010 }, { "epoch": 14.961754207037226, "grad_norm": 17.445417404174805, "learning_rate": 8.397076321604624e-06, "loss": 0.7534, "step": 88020 }, { "epoch": 14.963454020057794, "grad_norm": 14.447041511535645, "learning_rate": 8.394243299903678e-06, "loss": 0.89, "step": 88030 }, { "epoch": 14.965153833078361, "grad_norm": 12.601644515991211, "learning_rate": 8.391410278202731e-06, "loss": 0.7574, "step": 88040 }, { "epoch": 14.96685364609893, "grad_norm": 16.89458656311035, "learning_rate": 8.388577256501785e-06, "loss": 0.7959, "step": 88050 }, { "epoch": 14.968553459119496, "grad_norm": 16.046178817749023, "learning_rate": 8.38574423480084e-06, "loss": 1.0494, "step": 88060 }, { "epoch": 14.970253272140065, "grad_norm": 16.358083724975586, "learning_rate": 8.382911213099892e-06, "loss": 0.9272, "step": 88070 }, { "epoch": 14.971953085160632, "grad_norm": 14.718915939331055, "learning_rate": 8.380078191398947e-06, "loss": 0.9495, "step": 88080 }, { "epoch": 14.9736528981812, "grad_norm": 18.348114013671875, "learning_rate": 8.377245169698e-06, "loss": 0.8761, "step": 88090 }, { "epoch": 14.975352711201769, "grad_norm": 14.533824920654297, "learning_rate": 8.374412147997053e-06, "loss": 0.807, "step": 88100 }, { "epoch": 14.977052524222335, "grad_norm": 19.134658813476562, "learning_rate": 8.371579126296108e-06, "loss": 0.8158, "step": 88110 }, { "epoch": 14.978752337242904, "grad_norm": 14.256097793579102, "learning_rate": 8.368746104595161e-06, "loss": 0.9047, "step": 88120 }, { "epoch": 14.98045215026347, "grad_norm": 13.364097595214844, "learning_rate": 8.365913082894215e-06, "loss": 0.9746, "step": 88130 }, { "epoch": 14.98215196328404, "grad_norm": 14.783742904663086, "learning_rate": 8.363080061193268e-06, "loss": 0.7863, "step": 88140 }, { "epoch": 14.983851776304606, "grad_norm": 20.11029624938965, "learning_rate": 8.360247039492324e-06, "loss": 0.9593, "step": 88150 }, { "epoch": 14.985551589325175, "grad_norm": 13.384900093078613, "learning_rate": 8.357414017791375e-06, "loss": 0.8357, "step": 88160 }, { "epoch": 14.987251402345741, "grad_norm": 12.86650276184082, "learning_rate": 8.35458099609043e-06, "loss": 0.9482, "step": 88170 }, { "epoch": 14.98895121536631, "grad_norm": 14.692614555358887, "learning_rate": 8.351747974389484e-06, "loss": 0.7699, "step": 88180 }, { "epoch": 14.990651028386878, "grad_norm": 16.060449600219727, "learning_rate": 8.348914952688538e-06, "loss": 0.9345, "step": 88190 }, { "epoch": 14.992350841407445, "grad_norm": 17.277923583984375, "learning_rate": 8.346081930987591e-06, "loss": 0.7238, "step": 88200 }, { "epoch": 14.994050654428014, "grad_norm": 18.842805862426758, "learning_rate": 8.343248909286646e-06, "loss": 0.8333, "step": 88210 }, { "epoch": 14.99575046744858, "grad_norm": 18.0903377532959, "learning_rate": 8.340415887585698e-06, "loss": 1.0197, "step": 88220 }, { "epoch": 14.997450280469149, "grad_norm": 16.80282974243164, "learning_rate": 8.337582865884753e-06, "loss": 1.003, "step": 88230 }, { "epoch": 14.999150093489716, "grad_norm": 11.943648338317871, "learning_rate": 8.334749844183807e-06, "loss": 1.0472, "step": 88240 }, { "epoch": 15.0, "eval_cer": 1.0, "eval_loss": 2.6513092517852783, "eval_runtime": 2036.0981, "eval_samples_per_second": 0.231, "eval_steps_per_second": 0.231, "step": 88245 }, { "epoch": 15.000849906510284, "grad_norm": 13.920328140258789, "learning_rate": 8.33191682248286e-06, "loss": 0.6254, "step": 88250 }, { "epoch": 15.002549719530851, "grad_norm": 10.461517333984375, "learning_rate": 8.329083800781914e-06, "loss": 0.8228, "step": 88260 }, { "epoch": 15.00424953255142, "grad_norm": 14.939767837524414, "learning_rate": 8.326250779080968e-06, "loss": 0.7809, "step": 88270 }, { "epoch": 15.005949345571986, "grad_norm": 22.66014289855957, "learning_rate": 8.323417757380021e-06, "loss": 0.6488, "step": 88280 }, { "epoch": 15.007649158592555, "grad_norm": 11.16942024230957, "learning_rate": 8.320584735679075e-06, "loss": 0.9014, "step": 88290 }, { "epoch": 15.009348971613123, "grad_norm": 11.689493179321289, "learning_rate": 8.31775171397813e-06, "loss": 0.7766, "step": 88300 }, { "epoch": 15.01104878463369, "grad_norm": 9.967001914978027, "learning_rate": 8.314918692277183e-06, "loss": 0.699, "step": 88310 }, { "epoch": 15.012748597654259, "grad_norm": 24.901674270629883, "learning_rate": 8.312085670576237e-06, "loss": 0.6842, "step": 88320 }, { "epoch": 15.014448410674825, "grad_norm": 10.996285438537598, "learning_rate": 8.30925264887529e-06, "loss": 0.7969, "step": 88330 }, { "epoch": 15.016148223695394, "grad_norm": 14.313968658447266, "learning_rate": 8.306419627174346e-06, "loss": 0.8353, "step": 88340 }, { "epoch": 15.01784803671596, "grad_norm": 12.059111595153809, "learning_rate": 8.303586605473397e-06, "loss": 0.7843, "step": 88350 }, { "epoch": 15.01954784973653, "grad_norm": 11.439994812011719, "learning_rate": 8.300753583772453e-06, "loss": 0.7442, "step": 88360 }, { "epoch": 15.021247662757096, "grad_norm": 21.76095962524414, "learning_rate": 8.297920562071506e-06, "loss": 0.7534, "step": 88370 }, { "epoch": 15.022947475777665, "grad_norm": 12.229230880737305, "learning_rate": 8.29508754037056e-06, "loss": 0.7328, "step": 88380 }, { "epoch": 15.024647288798231, "grad_norm": 12.076310157775879, "learning_rate": 8.292254518669613e-06, "loss": 0.7849, "step": 88390 }, { "epoch": 15.0263471018188, "grad_norm": 8.356978416442871, "learning_rate": 8.289421496968668e-06, "loss": 0.7118, "step": 88400 }, { "epoch": 15.028046914839368, "grad_norm": 15.826127052307129, "learning_rate": 8.28658847526772e-06, "loss": 0.8257, "step": 88410 }, { "epoch": 15.029746727859935, "grad_norm": 12.59960651397705, "learning_rate": 8.283755453566775e-06, "loss": 0.8256, "step": 88420 }, { "epoch": 15.031446540880504, "grad_norm": 13.147482872009277, "learning_rate": 8.280922431865829e-06, "loss": 0.7069, "step": 88430 }, { "epoch": 15.03314635390107, "grad_norm": 14.219442367553711, "learning_rate": 8.27808941016488e-06, "loss": 0.9012, "step": 88440 }, { "epoch": 15.034846166921639, "grad_norm": 10.282154083251953, "learning_rate": 8.275256388463936e-06, "loss": 0.8161, "step": 88450 }, { "epoch": 15.036545979942206, "grad_norm": 14.594353675842285, "learning_rate": 8.27242336676299e-06, "loss": 0.6393, "step": 88460 }, { "epoch": 15.038245792962774, "grad_norm": 15.544693946838379, "learning_rate": 8.269590345062043e-06, "loss": 0.8354, "step": 88470 }, { "epoch": 15.039945605983341, "grad_norm": 16.174715042114258, "learning_rate": 8.266757323361097e-06, "loss": 0.7657, "step": 88480 }, { "epoch": 15.04164541900391, "grad_norm": 15.174674987792969, "learning_rate": 8.263924301660152e-06, "loss": 0.7708, "step": 88490 }, { "epoch": 15.043345232024476, "grad_norm": 11.26712417602539, "learning_rate": 8.261091279959204e-06, "loss": 0.7906, "step": 88500 }, { "epoch": 15.045045045045045, "grad_norm": 14.102855682373047, "learning_rate": 8.258258258258259e-06, "loss": 0.6664, "step": 88510 }, { "epoch": 15.046744858065614, "grad_norm": 13.267807960510254, "learning_rate": 8.255425236557312e-06, "loss": 0.8104, "step": 88520 }, { "epoch": 15.04844467108618, "grad_norm": 13.622316360473633, "learning_rate": 8.252592214856366e-06, "loss": 0.8242, "step": 88530 }, { "epoch": 15.050144484106749, "grad_norm": 12.750822067260742, "learning_rate": 8.24975919315542e-06, "loss": 0.7838, "step": 88540 }, { "epoch": 15.051844297127316, "grad_norm": 10.557108879089355, "learning_rate": 8.246926171454475e-06, "loss": 0.8845, "step": 88550 }, { "epoch": 15.053544110147884, "grad_norm": 19.843215942382812, "learning_rate": 8.244093149753526e-06, "loss": 0.822, "step": 88560 }, { "epoch": 15.055243923168451, "grad_norm": 9.347963333129883, "learning_rate": 8.241260128052582e-06, "loss": 0.7307, "step": 88570 }, { "epoch": 15.05694373618902, "grad_norm": 13.621919631958008, "learning_rate": 8.238427106351635e-06, "loss": 0.7705, "step": 88580 }, { "epoch": 15.058643549209586, "grad_norm": 11.59322452545166, "learning_rate": 8.235594084650689e-06, "loss": 0.9515, "step": 88590 }, { "epoch": 15.060343362230155, "grad_norm": 20.26099967956543, "learning_rate": 8.232761062949742e-06, "loss": 0.897, "step": 88600 }, { "epoch": 15.062043175250722, "grad_norm": 15.706406593322754, "learning_rate": 8.229928041248796e-06, "loss": 0.7102, "step": 88610 }, { "epoch": 15.06374298827129, "grad_norm": 11.809913635253906, "learning_rate": 8.22709501954785e-06, "loss": 0.7334, "step": 88620 }, { "epoch": 15.065442801291859, "grad_norm": 16.98553466796875, "learning_rate": 8.224261997846903e-06, "loss": 0.7571, "step": 88630 }, { "epoch": 15.067142614312425, "grad_norm": 20.8031005859375, "learning_rate": 8.221428976145958e-06, "loss": 0.6514, "step": 88640 }, { "epoch": 15.068842427332994, "grad_norm": 12.497105598449707, "learning_rate": 8.21859595444501e-06, "loss": 0.7063, "step": 88650 }, { "epoch": 15.07054224035356, "grad_norm": 16.918296813964844, "learning_rate": 8.215762932744065e-06, "loss": 0.628, "step": 88660 }, { "epoch": 15.07224205337413, "grad_norm": 17.088417053222656, "learning_rate": 8.212929911043119e-06, "loss": 0.8694, "step": 88670 }, { "epoch": 15.073941866394696, "grad_norm": 12.697280883789062, "learning_rate": 8.210096889342174e-06, "loss": 0.7808, "step": 88680 }, { "epoch": 15.075641679415265, "grad_norm": 16.535547256469727, "learning_rate": 8.207263867641226e-06, "loss": 0.8248, "step": 88690 }, { "epoch": 15.077341492435831, "grad_norm": 15.365165710449219, "learning_rate": 8.20443084594028e-06, "loss": 0.7953, "step": 88700 }, { "epoch": 15.0790413054564, "grad_norm": 17.44661521911621, "learning_rate": 8.201597824239334e-06, "loss": 0.867, "step": 88710 }, { "epoch": 15.080741118476968, "grad_norm": 17.700674057006836, "learning_rate": 8.198764802538388e-06, "loss": 0.8429, "step": 88720 }, { "epoch": 15.082440931497535, "grad_norm": 14.765105247497559, "learning_rate": 8.195931780837441e-06, "loss": 0.7258, "step": 88730 }, { "epoch": 15.084140744518104, "grad_norm": 12.630170822143555, "learning_rate": 8.193098759136497e-06, "loss": 0.8545, "step": 88740 }, { "epoch": 15.08584055753867, "grad_norm": 17.18745231628418, "learning_rate": 8.190265737435548e-06, "loss": 0.7142, "step": 88750 }, { "epoch": 15.087540370559239, "grad_norm": 15.276220321655273, "learning_rate": 8.187432715734604e-06, "loss": 0.7257, "step": 88760 }, { "epoch": 15.089240183579806, "grad_norm": 13.62463665008545, "learning_rate": 8.184599694033657e-06, "loss": 0.7332, "step": 88770 }, { "epoch": 15.090939996600374, "grad_norm": 11.929555892944336, "learning_rate": 8.181766672332709e-06, "loss": 0.7761, "step": 88780 }, { "epoch": 15.092639809620941, "grad_norm": 11.658105850219727, "learning_rate": 8.178933650631764e-06, "loss": 0.8499, "step": 88790 }, { "epoch": 15.09433962264151, "grad_norm": 16.08230209350586, "learning_rate": 8.176100628930818e-06, "loss": 0.8668, "step": 88800 }, { "epoch": 15.096039435662076, "grad_norm": 13.421504020690918, "learning_rate": 8.173267607229871e-06, "loss": 0.8474, "step": 88810 }, { "epoch": 15.097739248682645, "grad_norm": 12.891195297241211, "learning_rate": 8.170434585528925e-06, "loss": 0.6538, "step": 88820 }, { "epoch": 15.099439061703213, "grad_norm": 13.459688186645508, "learning_rate": 8.16760156382798e-06, "loss": 0.6427, "step": 88830 }, { "epoch": 15.10113887472378, "grad_norm": 14.282980918884277, "learning_rate": 8.164768542127032e-06, "loss": 0.7824, "step": 88840 }, { "epoch": 15.102838687744349, "grad_norm": 13.526939392089844, "learning_rate": 8.161935520426087e-06, "loss": 0.7912, "step": 88850 }, { "epoch": 15.104538500764916, "grad_norm": 11.58427619934082, "learning_rate": 8.15910249872514e-06, "loss": 0.7885, "step": 88860 }, { "epoch": 15.106238313785484, "grad_norm": 18.754850387573242, "learning_rate": 8.156269477024194e-06, "loss": 0.9389, "step": 88870 }, { "epoch": 15.10793812680605, "grad_norm": 18.2018985748291, "learning_rate": 8.153436455323248e-06, "loss": 0.7774, "step": 88880 }, { "epoch": 15.10963793982662, "grad_norm": 14.163029670715332, "learning_rate": 8.150603433622303e-06, "loss": 0.7321, "step": 88890 }, { "epoch": 15.111337752847186, "grad_norm": 19.74342918395996, "learning_rate": 8.147770411921355e-06, "loss": 0.6908, "step": 88900 }, { "epoch": 15.113037565867755, "grad_norm": 14.430459022521973, "learning_rate": 8.14493739022041e-06, "loss": 0.7669, "step": 88910 }, { "epoch": 15.114737378888321, "grad_norm": 12.63357925415039, "learning_rate": 8.142104368519463e-06, "loss": 0.6383, "step": 88920 }, { "epoch": 15.11643719190889, "grad_norm": 14.814640045166016, "learning_rate": 8.139271346818517e-06, "loss": 0.9217, "step": 88930 }, { "epoch": 15.118137004929459, "grad_norm": 12.03897762298584, "learning_rate": 8.13643832511757e-06, "loss": 0.8177, "step": 88940 }, { "epoch": 15.119836817950025, "grad_norm": 21.362979888916016, "learning_rate": 8.133605303416624e-06, "loss": 0.9134, "step": 88950 }, { "epoch": 15.121536630970594, "grad_norm": 10.485945701599121, "learning_rate": 8.130772281715677e-06, "loss": 0.7088, "step": 88960 }, { "epoch": 15.12323644399116, "grad_norm": 12.674934387207031, "learning_rate": 8.127939260014731e-06, "loss": 0.8281, "step": 88970 }, { "epoch": 15.12493625701173, "grad_norm": 13.152344703674316, "learning_rate": 8.125106238313786e-06, "loss": 0.8224, "step": 88980 }, { "epoch": 15.126636070032296, "grad_norm": 12.132697105407715, "learning_rate": 8.122273216612838e-06, "loss": 0.897, "step": 88990 }, { "epoch": 15.128335883052864, "grad_norm": 72.98114776611328, "learning_rate": 8.119440194911893e-06, "loss": 0.8281, "step": 89000 }, { "epoch": 15.130035696073431, "grad_norm": 17.84326171875, "learning_rate": 8.116607173210947e-06, "loss": 0.6856, "step": 89010 }, { "epoch": 15.131735509094, "grad_norm": 14.120779991149902, "learning_rate": 8.11377415151e-06, "loss": 0.9345, "step": 89020 }, { "epoch": 15.133435322114567, "grad_norm": 16.068452835083008, "learning_rate": 8.110941129809054e-06, "loss": 0.9205, "step": 89030 }, { "epoch": 15.135135135135135, "grad_norm": 12.981058120727539, "learning_rate": 8.108108108108109e-06, "loss": 0.7276, "step": 89040 }, { "epoch": 15.136834948155704, "grad_norm": 16.715641021728516, "learning_rate": 8.105275086407163e-06, "loss": 0.8146, "step": 89050 }, { "epoch": 15.13853476117627, "grad_norm": 11.84889030456543, "learning_rate": 8.102442064706216e-06, "loss": 0.8373, "step": 89060 }, { "epoch": 15.140234574196839, "grad_norm": 16.263957977294922, "learning_rate": 8.09960904300527e-06, "loss": 0.7637, "step": 89070 }, { "epoch": 15.141934387217406, "grad_norm": 13.567219734191895, "learning_rate": 8.096776021304325e-06, "loss": 0.838, "step": 89080 }, { "epoch": 15.143634200237974, "grad_norm": 14.896403312683105, "learning_rate": 8.093942999603377e-06, "loss": 0.7127, "step": 89090 }, { "epoch": 15.145334013258541, "grad_norm": 13.384915351867676, "learning_rate": 8.091109977902432e-06, "loss": 1.0015, "step": 89100 }, { "epoch": 15.14703382627911, "grad_norm": 12.35360336303711, "learning_rate": 8.088276956201485e-06, "loss": 0.6113, "step": 89110 }, { "epoch": 15.148733639299676, "grad_norm": 19.02676010131836, "learning_rate": 8.085443934500537e-06, "loss": 0.7683, "step": 89120 }, { "epoch": 15.150433452320245, "grad_norm": 17.792774200439453, "learning_rate": 8.082610912799592e-06, "loss": 0.7873, "step": 89130 }, { "epoch": 15.152133265340812, "grad_norm": 21.376012802124023, "learning_rate": 8.079777891098646e-06, "loss": 0.7752, "step": 89140 }, { "epoch": 15.15383307836138, "grad_norm": 17.83869171142578, "learning_rate": 8.0769448693977e-06, "loss": 0.8655, "step": 89150 }, { "epoch": 15.155532891381949, "grad_norm": 42.67610168457031, "learning_rate": 8.074111847696753e-06, "loss": 0.9654, "step": 89160 }, { "epoch": 15.157232704402515, "grad_norm": 13.583166122436523, "learning_rate": 8.071278825995808e-06, "loss": 0.7879, "step": 89170 }, { "epoch": 15.158932517423084, "grad_norm": 13.530864715576172, "learning_rate": 8.06844580429486e-06, "loss": 0.7283, "step": 89180 }, { "epoch": 15.16063233044365, "grad_norm": 10.89435863494873, "learning_rate": 8.065612782593915e-06, "loss": 0.7079, "step": 89190 }, { "epoch": 15.16233214346422, "grad_norm": 17.607553482055664, "learning_rate": 8.062779760892969e-06, "loss": 1.0766, "step": 89200 }, { "epoch": 15.164031956484786, "grad_norm": 20.90250587463379, "learning_rate": 8.059946739192022e-06, "loss": 0.8631, "step": 89210 }, { "epoch": 15.165731769505355, "grad_norm": 11.259592056274414, "learning_rate": 8.057113717491076e-06, "loss": 0.6817, "step": 89220 }, { "epoch": 15.167431582525921, "grad_norm": 14.165058135986328, "learning_rate": 8.054280695790131e-06, "loss": 0.7733, "step": 89230 }, { "epoch": 15.16913139554649, "grad_norm": 16.26675796508789, "learning_rate": 8.051447674089183e-06, "loss": 0.6657, "step": 89240 }, { "epoch": 15.170831208567058, "grad_norm": 22.585817337036133, "learning_rate": 8.048614652388238e-06, "loss": 0.611, "step": 89250 }, { "epoch": 15.172531021587625, "grad_norm": 12.722939491271973, "learning_rate": 8.045781630687292e-06, "loss": 0.7958, "step": 89260 }, { "epoch": 15.174230834608194, "grad_norm": 11.918404579162598, "learning_rate": 8.042948608986345e-06, "loss": 0.9131, "step": 89270 }, { "epoch": 15.17593064762876, "grad_norm": 10.94432258605957, "learning_rate": 8.040115587285399e-06, "loss": 0.6975, "step": 89280 }, { "epoch": 15.177630460649329, "grad_norm": 13.611042976379395, "learning_rate": 8.037282565584454e-06, "loss": 0.7879, "step": 89290 }, { "epoch": 15.179330273669896, "grad_norm": 15.457191467285156, "learning_rate": 8.034449543883506e-06, "loss": 0.799, "step": 89300 }, { "epoch": 15.181030086690464, "grad_norm": 11.844568252563477, "learning_rate": 8.03161652218256e-06, "loss": 0.6759, "step": 89310 }, { "epoch": 15.182729899711031, "grad_norm": 19.1539306640625, "learning_rate": 8.028783500481614e-06, "loss": 0.8818, "step": 89320 }, { "epoch": 15.1844297127316, "grad_norm": 14.966703414916992, "learning_rate": 8.025950478780666e-06, "loss": 0.7526, "step": 89330 }, { "epoch": 15.186129525752166, "grad_norm": 12.611647605895996, "learning_rate": 8.023117457079721e-06, "loss": 0.8215, "step": 89340 }, { "epoch": 15.187829338772735, "grad_norm": 24.70041275024414, "learning_rate": 8.020284435378775e-06, "loss": 1.077, "step": 89350 }, { "epoch": 15.189529151793304, "grad_norm": 17.87723159790039, "learning_rate": 8.017451413677829e-06, "loss": 0.8796, "step": 89360 }, { "epoch": 15.19122896481387, "grad_norm": 16.138586044311523, "learning_rate": 8.014618391976882e-06, "loss": 0.8886, "step": 89370 }, { "epoch": 15.192928777834439, "grad_norm": 10.678848266601562, "learning_rate": 8.011785370275937e-06, "loss": 0.8285, "step": 89380 }, { "epoch": 15.194628590855006, "grad_norm": 20.223270416259766, "learning_rate": 8.008952348574989e-06, "loss": 0.8699, "step": 89390 }, { "epoch": 15.196328403875574, "grad_norm": 11.723883628845215, "learning_rate": 8.006119326874044e-06, "loss": 0.9005, "step": 89400 }, { "epoch": 15.198028216896141, "grad_norm": 13.019248962402344, "learning_rate": 8.003286305173098e-06, "loss": 0.8471, "step": 89410 }, { "epoch": 15.19972802991671, "grad_norm": 12.988792419433594, "learning_rate": 8.000453283472153e-06, "loss": 0.6517, "step": 89420 }, { "epoch": 15.201427842937276, "grad_norm": 12.149751663208008, "learning_rate": 7.997620261771205e-06, "loss": 0.9472, "step": 89430 }, { "epoch": 15.203127655957845, "grad_norm": 15.398514747619629, "learning_rate": 7.99478724007026e-06, "loss": 0.8253, "step": 89440 }, { "epoch": 15.204827468978412, "grad_norm": 14.307899475097656, "learning_rate": 7.991954218369314e-06, "loss": 0.8701, "step": 89450 }, { "epoch": 15.20652728199898, "grad_norm": 12.998059272766113, "learning_rate": 7.989121196668367e-06, "loss": 0.7402, "step": 89460 }, { "epoch": 15.208227095019549, "grad_norm": 20.158443450927734, "learning_rate": 7.98628817496742e-06, "loss": 0.8672, "step": 89470 }, { "epoch": 15.209926908040115, "grad_norm": 12.994078636169434, "learning_rate": 7.983455153266474e-06, "loss": 0.7538, "step": 89480 }, { "epoch": 15.211626721060684, "grad_norm": 12.760147094726562, "learning_rate": 7.980622131565528e-06, "loss": 0.9366, "step": 89490 }, { "epoch": 15.21332653408125, "grad_norm": 16.214263916015625, "learning_rate": 7.977789109864581e-06, "loss": 0.8627, "step": 89500 }, { "epoch": 15.21502634710182, "grad_norm": 21.26758575439453, "learning_rate": 7.974956088163636e-06, "loss": 0.759, "step": 89510 }, { "epoch": 15.216726160122386, "grad_norm": 20.61347770690918, "learning_rate": 7.972123066462688e-06, "loss": 0.7639, "step": 89520 }, { "epoch": 15.218425973142955, "grad_norm": 24.410669326782227, "learning_rate": 7.969290044761743e-06, "loss": 0.8323, "step": 89530 }, { "epoch": 15.220125786163521, "grad_norm": 12.09333324432373, "learning_rate": 7.966457023060797e-06, "loss": 0.7655, "step": 89540 }, { "epoch": 15.22182559918409, "grad_norm": 14.158031463623047, "learning_rate": 7.96362400135985e-06, "loss": 0.6732, "step": 89550 }, { "epoch": 15.223525412204658, "grad_norm": 17.32318115234375, "learning_rate": 7.960790979658904e-06, "loss": 0.8421, "step": 89560 }, { "epoch": 15.225225225225225, "grad_norm": 11.047408103942871, "learning_rate": 7.95795795795796e-06, "loss": 0.838, "step": 89570 }, { "epoch": 15.226925038245794, "grad_norm": 15.419341087341309, "learning_rate": 7.955124936257011e-06, "loss": 0.7749, "step": 89580 }, { "epoch": 15.22862485126636, "grad_norm": 13.793651580810547, "learning_rate": 7.952291914556066e-06, "loss": 0.761, "step": 89590 }, { "epoch": 15.230324664286929, "grad_norm": 12.201021194458008, "learning_rate": 7.94945889285512e-06, "loss": 0.8003, "step": 89600 }, { "epoch": 15.232024477307496, "grad_norm": 17.049833297729492, "learning_rate": 7.946625871154173e-06, "loss": 0.9123, "step": 89610 }, { "epoch": 15.233724290328064, "grad_norm": 9.602704048156738, "learning_rate": 7.943792849453227e-06, "loss": 0.7878, "step": 89620 }, { "epoch": 15.235424103348631, "grad_norm": 10.613449096679688, "learning_rate": 7.940959827752282e-06, "loss": 0.7499, "step": 89630 }, { "epoch": 15.2371239163692, "grad_norm": 11.00075912475586, "learning_rate": 7.938126806051334e-06, "loss": 0.7265, "step": 89640 }, { "epoch": 15.238823729389766, "grad_norm": 15.710715293884277, "learning_rate": 7.935293784350387e-06, "loss": 1.0102, "step": 89650 }, { "epoch": 15.240523542410335, "grad_norm": 13.108120918273926, "learning_rate": 7.932460762649443e-06, "loss": 0.8159, "step": 89660 }, { "epoch": 15.242223355430903, "grad_norm": 16.206132888793945, "learning_rate": 7.929627740948494e-06, "loss": 0.7566, "step": 89670 }, { "epoch": 15.24392316845147, "grad_norm": 20.691205978393555, "learning_rate": 7.92679471924755e-06, "loss": 0.9217, "step": 89680 }, { "epoch": 15.245622981472039, "grad_norm": 17.78836441040039, "learning_rate": 7.923961697546603e-06, "loss": 0.9146, "step": 89690 }, { "epoch": 15.247322794492606, "grad_norm": 14.99380874633789, "learning_rate": 7.921128675845657e-06, "loss": 0.853, "step": 89700 }, { "epoch": 15.249022607513174, "grad_norm": 11.420299530029297, "learning_rate": 7.91829565414471e-06, "loss": 0.6481, "step": 89710 }, { "epoch": 15.25072242053374, "grad_norm": 25.32341766357422, "learning_rate": 7.915462632443765e-06, "loss": 0.5776, "step": 89720 }, { "epoch": 15.25242223355431, "grad_norm": 11.022881507873535, "learning_rate": 7.912629610742817e-06, "loss": 0.7488, "step": 89730 }, { "epoch": 15.254122046574876, "grad_norm": 10.799932479858398, "learning_rate": 7.909796589041873e-06, "loss": 0.7247, "step": 89740 }, { "epoch": 15.255821859595445, "grad_norm": 12.36208724975586, "learning_rate": 7.906963567340926e-06, "loss": 0.5278, "step": 89750 }, { "epoch": 15.257521672616011, "grad_norm": 14.202613830566406, "learning_rate": 7.90413054563998e-06, "loss": 0.7334, "step": 89760 }, { "epoch": 15.25922148563658, "grad_norm": 13.327995300292969, "learning_rate": 7.901297523939033e-06, "loss": 0.8116, "step": 89770 }, { "epoch": 15.260921298657149, "grad_norm": 17.903167724609375, "learning_rate": 7.898464502238088e-06, "loss": 0.8474, "step": 89780 }, { "epoch": 15.262621111677715, "grad_norm": 13.217408180236816, "learning_rate": 7.895631480537142e-06, "loss": 0.8449, "step": 89790 }, { "epoch": 15.264320924698284, "grad_norm": 32.25243377685547, "learning_rate": 7.892798458836195e-06, "loss": 0.8224, "step": 89800 }, { "epoch": 15.26602073771885, "grad_norm": 10.961554527282715, "learning_rate": 7.889965437135249e-06, "loss": 0.7855, "step": 89810 }, { "epoch": 15.26772055073942, "grad_norm": 9.733016967773438, "learning_rate": 7.887132415434302e-06, "loss": 0.8541, "step": 89820 }, { "epoch": 15.269420363759986, "grad_norm": 11.441927909851074, "learning_rate": 7.884299393733356e-06, "loss": 0.7263, "step": 89830 }, { "epoch": 15.271120176780554, "grad_norm": 14.275162696838379, "learning_rate": 7.88146637203241e-06, "loss": 0.8056, "step": 89840 }, { "epoch": 15.272819989801121, "grad_norm": 15.733664512634277, "learning_rate": 7.878633350331465e-06, "loss": 0.8439, "step": 89850 }, { "epoch": 15.27451980282169, "grad_norm": 14.577089309692383, "learning_rate": 7.875800328630516e-06, "loss": 0.6732, "step": 89860 }, { "epoch": 15.276219615842257, "grad_norm": 17.352373123168945, "learning_rate": 7.872967306929572e-06, "loss": 0.8473, "step": 89870 }, { "epoch": 15.277919428862825, "grad_norm": 16.511465072631836, "learning_rate": 7.870134285228625e-06, "loss": 0.8898, "step": 89880 }, { "epoch": 15.279619241883394, "grad_norm": 11.1054105758667, "learning_rate": 7.867301263527679e-06, "loss": 0.7254, "step": 89890 }, { "epoch": 15.28131905490396, "grad_norm": 13.78614616394043, "learning_rate": 7.864468241826732e-06, "loss": 0.6851, "step": 89900 }, { "epoch": 15.283018867924529, "grad_norm": 13.506884574890137, "learning_rate": 7.861635220125787e-06, "loss": 0.7392, "step": 89910 }, { "epoch": 15.284718680945096, "grad_norm": 12.439315795898438, "learning_rate": 7.85880219842484e-06, "loss": 0.7287, "step": 89920 }, { "epoch": 15.286418493965664, "grad_norm": 27.912826538085938, "learning_rate": 7.855969176723895e-06, "loss": 0.8984, "step": 89930 }, { "epoch": 15.288118306986231, "grad_norm": 12.227807998657227, "learning_rate": 7.853136155022948e-06, "loss": 0.8264, "step": 89940 }, { "epoch": 15.2898181200068, "grad_norm": 14.104509353637695, "learning_rate": 7.850303133322002e-06, "loss": 0.5834, "step": 89950 }, { "epoch": 15.291517933027366, "grad_norm": 86.50293731689453, "learning_rate": 7.847470111621055e-06, "loss": 0.758, "step": 89960 }, { "epoch": 15.293217746047935, "grad_norm": 11.904581069946289, "learning_rate": 7.84463708992011e-06, "loss": 0.7897, "step": 89970 }, { "epoch": 15.294917559068502, "grad_norm": 13.979310035705566, "learning_rate": 7.841804068219162e-06, "loss": 0.8007, "step": 89980 }, { "epoch": 15.29661737208907, "grad_norm": 11.815821647644043, "learning_rate": 7.838971046518216e-06, "loss": 0.6927, "step": 89990 }, { "epoch": 15.298317185109639, "grad_norm": 10.428696632385254, "learning_rate": 7.836138024817271e-06, "loss": 0.706, "step": 90000 }, { "epoch": 15.300016998130205, "grad_norm": 21.982587814331055, "learning_rate": 7.833305003116323e-06, "loss": 0.9229, "step": 90010 }, { "epoch": 15.301716811150774, "grad_norm": 10.493045806884766, "learning_rate": 7.830471981415378e-06, "loss": 0.9066, "step": 90020 }, { "epoch": 15.30341662417134, "grad_norm": 12.070223808288574, "learning_rate": 7.827638959714431e-06, "loss": 0.8495, "step": 90030 }, { "epoch": 15.30511643719191, "grad_norm": 14.145990371704102, "learning_rate": 7.824805938013485e-06, "loss": 0.987, "step": 90040 }, { "epoch": 15.306816250212476, "grad_norm": 15.365336418151855, "learning_rate": 7.821972916312538e-06, "loss": 0.8178, "step": 90050 }, { "epoch": 15.308516063233045, "grad_norm": 17.435165405273438, "learning_rate": 7.819139894611594e-06, "loss": 0.7857, "step": 90060 }, { "epoch": 15.310215876253611, "grad_norm": 20.402313232421875, "learning_rate": 7.816306872910646e-06, "loss": 0.8813, "step": 90070 }, { "epoch": 15.31191568927418, "grad_norm": 39.71131134033203, "learning_rate": 7.8134738512097e-06, "loss": 0.6917, "step": 90080 }, { "epoch": 15.313615502294748, "grad_norm": 16.054834365844727, "learning_rate": 7.810640829508754e-06, "loss": 0.8031, "step": 90090 }, { "epoch": 15.315315315315315, "grad_norm": 13.915870666503906, "learning_rate": 7.807807807807808e-06, "loss": 0.8078, "step": 90100 }, { "epoch": 15.317015128335884, "grad_norm": 16.400785446166992, "learning_rate": 7.804974786106861e-06, "loss": 0.9237, "step": 90110 }, { "epoch": 15.31871494135645, "grad_norm": 13.120119094848633, "learning_rate": 7.802141764405917e-06, "loss": 0.8281, "step": 90120 }, { "epoch": 15.320414754377019, "grad_norm": 23.87560272216797, "learning_rate": 7.79930874270497e-06, "loss": 0.7925, "step": 90130 }, { "epoch": 15.322114567397586, "grad_norm": 14.576772689819336, "learning_rate": 7.796475721004024e-06, "loss": 0.8099, "step": 90140 }, { "epoch": 15.323814380418154, "grad_norm": 15.644466400146484, "learning_rate": 7.793642699303077e-06, "loss": 0.7264, "step": 90150 }, { "epoch": 15.325514193438721, "grad_norm": 9.95335578918457, "learning_rate": 7.79080967760213e-06, "loss": 0.6594, "step": 90160 }, { "epoch": 15.32721400645929, "grad_norm": 11.868425369262695, "learning_rate": 7.787976655901184e-06, "loss": 0.6693, "step": 90170 }, { "epoch": 15.328913819479856, "grad_norm": 14.652693748474121, "learning_rate": 7.785143634200238e-06, "loss": 0.884, "step": 90180 }, { "epoch": 15.330613632500425, "grad_norm": 11.631962776184082, "learning_rate": 7.782310612499293e-06, "loss": 0.6759, "step": 90190 }, { "epoch": 15.332313445520994, "grad_norm": 15.686070442199707, "learning_rate": 7.779477590798345e-06, "loss": 0.6768, "step": 90200 }, { "epoch": 15.33401325854156, "grad_norm": 17.660982131958008, "learning_rate": 7.7766445690974e-06, "loss": 0.7678, "step": 90210 }, { "epoch": 15.335713071562129, "grad_norm": 27.886777877807617, "learning_rate": 7.773811547396453e-06, "loss": 0.9201, "step": 90220 }, { "epoch": 15.337412884582696, "grad_norm": 13.692541122436523, "learning_rate": 7.770978525695507e-06, "loss": 0.8557, "step": 90230 }, { "epoch": 15.339112697603264, "grad_norm": 12.83714485168457, "learning_rate": 7.76814550399456e-06, "loss": 0.697, "step": 90240 }, { "epoch": 15.340812510623831, "grad_norm": 12.892491340637207, "learning_rate": 7.765312482293616e-06, "loss": 0.8582, "step": 90250 }, { "epoch": 15.3425123236444, "grad_norm": 18.57016372680664, "learning_rate": 7.762479460592668e-06, "loss": 0.9192, "step": 90260 }, { "epoch": 15.344212136664966, "grad_norm": 11.204704284667969, "learning_rate": 7.759646438891723e-06, "loss": 0.7853, "step": 90270 }, { "epoch": 15.345911949685535, "grad_norm": 18.137296676635742, "learning_rate": 7.756813417190776e-06, "loss": 0.8164, "step": 90280 }, { "epoch": 15.347611762706102, "grad_norm": 16.39504623413086, "learning_rate": 7.75398039548983e-06, "loss": 0.7229, "step": 90290 }, { "epoch": 15.34931157572667, "grad_norm": 14.218488693237305, "learning_rate": 7.751147373788883e-06, "loss": 0.7047, "step": 90300 }, { "epoch": 15.351011388747239, "grad_norm": 11.855220794677734, "learning_rate": 7.748314352087939e-06, "loss": 0.8124, "step": 90310 }, { "epoch": 15.352711201767805, "grad_norm": 21.860124588012695, "learning_rate": 7.74548133038699e-06, "loss": 0.849, "step": 90320 }, { "epoch": 15.354411014788374, "grad_norm": 19.267091751098633, "learning_rate": 7.742648308686046e-06, "loss": 0.7442, "step": 90330 }, { "epoch": 15.35611082780894, "grad_norm": 12.34703540802002, "learning_rate": 7.739815286985099e-06, "loss": 0.6529, "step": 90340 }, { "epoch": 15.35781064082951, "grad_norm": 14.071778297424316, "learning_rate": 7.736982265284151e-06, "loss": 1.0337, "step": 90350 }, { "epoch": 15.359510453850076, "grad_norm": 29.688526153564453, "learning_rate": 7.734149243583206e-06, "loss": 0.7704, "step": 90360 }, { "epoch": 15.361210266870645, "grad_norm": 23.397485733032227, "learning_rate": 7.73131622188226e-06, "loss": 0.6196, "step": 90370 }, { "epoch": 15.362910079891211, "grad_norm": 14.919821739196777, "learning_rate": 7.728483200181313e-06, "loss": 0.8531, "step": 90380 }, { "epoch": 15.36460989291178, "grad_norm": 13.773100852966309, "learning_rate": 7.725650178480367e-06, "loss": 0.7677, "step": 90390 }, { "epoch": 15.366309705932348, "grad_norm": 19.730833053588867, "learning_rate": 7.722817156779422e-06, "loss": 0.7534, "step": 90400 }, { "epoch": 15.368009518952915, "grad_norm": 15.135029792785645, "learning_rate": 7.719984135078474e-06, "loss": 0.8848, "step": 90410 }, { "epoch": 15.369709331973484, "grad_norm": 18.471616744995117, "learning_rate": 7.717151113377529e-06, "loss": 0.9349, "step": 90420 }, { "epoch": 15.37140914499405, "grad_norm": 15.069914817810059, "learning_rate": 7.714318091676582e-06, "loss": 0.9923, "step": 90430 }, { "epoch": 15.373108958014619, "grad_norm": 10.441474914550781, "learning_rate": 7.711485069975636e-06, "loss": 0.7155, "step": 90440 }, { "epoch": 15.374808771035186, "grad_norm": 14.953472137451172, "learning_rate": 7.70865204827469e-06, "loss": 0.8628, "step": 90450 }, { "epoch": 15.376508584055754, "grad_norm": 15.106942176818848, "learning_rate": 7.705819026573745e-06, "loss": 0.7391, "step": 90460 }, { "epoch": 15.378208397076321, "grad_norm": 13.193068504333496, "learning_rate": 7.702986004872797e-06, "loss": 0.6964, "step": 90470 }, { "epoch": 15.37990821009689, "grad_norm": 12.38344669342041, "learning_rate": 7.700152983171852e-06, "loss": 0.9165, "step": 90480 }, { "epoch": 15.381608023117456, "grad_norm": 16.895431518554688, "learning_rate": 7.697319961470905e-06, "loss": 0.7298, "step": 90490 }, { "epoch": 15.383307836138025, "grad_norm": 13.272706031799316, "learning_rate": 7.694486939769959e-06, "loss": 0.8157, "step": 90500 }, { "epoch": 15.385007649158592, "grad_norm": 12.3623046875, "learning_rate": 7.691653918069012e-06, "loss": 0.7191, "step": 90510 }, { "epoch": 15.38670746217916, "grad_norm": 14.924365997314453, "learning_rate": 7.688820896368066e-06, "loss": 0.9085, "step": 90520 }, { "epoch": 15.388407275199729, "grad_norm": 13.300880432128906, "learning_rate": 7.685987874667121e-06, "loss": 0.8314, "step": 90530 }, { "epoch": 15.390107088220295, "grad_norm": 14.279322624206543, "learning_rate": 7.683154852966173e-06, "loss": 0.7012, "step": 90540 }, { "epoch": 15.391806901240864, "grad_norm": 15.712539672851562, "learning_rate": 7.680321831265228e-06, "loss": 0.9551, "step": 90550 }, { "epoch": 15.39350671426143, "grad_norm": 18.081092834472656, "learning_rate": 7.677488809564282e-06, "loss": 0.718, "step": 90560 }, { "epoch": 15.395206527282, "grad_norm": 12.653717994689941, "learning_rate": 7.674655787863335e-06, "loss": 0.6981, "step": 90570 }, { "epoch": 15.396906340302566, "grad_norm": 17.1595458984375, "learning_rate": 7.671822766162389e-06, "loss": 0.7511, "step": 90580 }, { "epoch": 15.398606153323135, "grad_norm": 11.70518970489502, "learning_rate": 7.668989744461444e-06, "loss": 0.7358, "step": 90590 }, { "epoch": 15.400305966343701, "grad_norm": 23.241941452026367, "learning_rate": 7.666156722760496e-06, "loss": 0.7929, "step": 90600 }, { "epoch": 15.40200577936427, "grad_norm": 15.711374282836914, "learning_rate": 7.663323701059551e-06, "loss": 0.9771, "step": 90610 }, { "epoch": 15.403705592384838, "grad_norm": 16.23741340637207, "learning_rate": 7.660490679358604e-06, "loss": 0.7007, "step": 90620 }, { "epoch": 15.405405405405405, "grad_norm": 12.048133850097656, "learning_rate": 7.657657657657658e-06, "loss": 0.7261, "step": 90630 }, { "epoch": 15.407105218425974, "grad_norm": 15.551570892333984, "learning_rate": 7.654824635956712e-06, "loss": 0.8195, "step": 90640 }, { "epoch": 15.40880503144654, "grad_norm": 15.497713088989258, "learning_rate": 7.651991614255767e-06, "loss": 0.8172, "step": 90650 }, { "epoch": 15.410504844467109, "grad_norm": 14.916840553283691, "learning_rate": 7.649158592554819e-06, "loss": 0.8131, "step": 90660 }, { "epoch": 15.412204657487676, "grad_norm": 10.65144157409668, "learning_rate": 7.646325570853874e-06, "loss": 0.787, "step": 90670 }, { "epoch": 15.413904470508244, "grad_norm": 16.053611755371094, "learning_rate": 7.643492549152927e-06, "loss": 0.7735, "step": 90680 }, { "epoch": 15.415604283528811, "grad_norm": 14.425418853759766, "learning_rate": 7.640659527451979e-06, "loss": 0.9316, "step": 90690 }, { "epoch": 15.41730409654938, "grad_norm": 12.765830993652344, "learning_rate": 7.637826505751034e-06, "loss": 0.786, "step": 90700 }, { "epoch": 15.419003909569946, "grad_norm": 18.410991668701172, "learning_rate": 7.634993484050088e-06, "loss": 0.8773, "step": 90710 }, { "epoch": 15.420703722590515, "grad_norm": 12.861896514892578, "learning_rate": 7.632160462349141e-06, "loss": 0.6669, "step": 90720 }, { "epoch": 15.422403535611084, "grad_norm": 12.68874454498291, "learning_rate": 7.629327440648195e-06, "loss": 0.8053, "step": 90730 }, { "epoch": 15.42410334863165, "grad_norm": 17.926776885986328, "learning_rate": 7.626494418947249e-06, "loss": 0.6846, "step": 90740 }, { "epoch": 15.425803161652219, "grad_norm": 15.703386306762695, "learning_rate": 7.623661397246303e-06, "loss": 0.9334, "step": 90750 }, { "epoch": 15.427502974672786, "grad_norm": 14.529833793640137, "learning_rate": 7.620828375545357e-06, "loss": 0.7736, "step": 90760 }, { "epoch": 15.429202787693354, "grad_norm": 12.089536666870117, "learning_rate": 7.617995353844411e-06, "loss": 0.8501, "step": 90770 }, { "epoch": 15.430902600713921, "grad_norm": 15.255928993225098, "learning_rate": 7.615162332143465e-06, "loss": 0.7252, "step": 90780 }, { "epoch": 15.43260241373449, "grad_norm": 14.529139518737793, "learning_rate": 7.612329310442518e-06, "loss": 0.8516, "step": 90790 }, { "epoch": 15.434302226755056, "grad_norm": 16.488500595092773, "learning_rate": 7.609496288741573e-06, "loss": 0.8652, "step": 90800 }, { "epoch": 15.436002039775625, "grad_norm": 12.124156951904297, "learning_rate": 7.606663267040626e-06, "loss": 0.932, "step": 90810 }, { "epoch": 15.437701852796192, "grad_norm": 7.8656840324401855, "learning_rate": 7.60383024533968e-06, "loss": 0.8969, "step": 90820 }, { "epoch": 15.43940166581676, "grad_norm": 13.7754545211792, "learning_rate": 7.6009972236387335e-06, "loss": 0.7134, "step": 90830 }, { "epoch": 15.441101478837329, "grad_norm": 13.470656394958496, "learning_rate": 7.598164201937788e-06, "loss": 0.7278, "step": 90840 }, { "epoch": 15.442801291857895, "grad_norm": 16.319536209106445, "learning_rate": 7.5953311802368405e-06, "loss": 0.6983, "step": 90850 }, { "epoch": 15.444501104878464, "grad_norm": 14.393630981445312, "learning_rate": 7.592498158535894e-06, "loss": 0.6021, "step": 90860 }, { "epoch": 15.44620091789903, "grad_norm": 18.58411407470703, "learning_rate": 7.5896651368349484e-06, "loss": 0.7565, "step": 90870 }, { "epoch": 15.4479007309196, "grad_norm": 17.058958053588867, "learning_rate": 7.586832115134001e-06, "loss": 0.6811, "step": 90880 }, { "epoch": 15.449600543940166, "grad_norm": 12.84446907043457, "learning_rate": 7.583999093433056e-06, "loss": 0.8069, "step": 90890 }, { "epoch": 15.451300356960735, "grad_norm": 10.469186782836914, "learning_rate": 7.581166071732109e-06, "loss": 0.9658, "step": 90900 }, { "epoch": 15.453000169981301, "grad_norm": 16.58416175842285, "learning_rate": 7.578333050031163e-06, "loss": 0.847, "step": 90910 }, { "epoch": 15.45469998300187, "grad_norm": 16.311769485473633, "learning_rate": 7.575500028330217e-06, "loss": 0.9035, "step": 90920 }, { "epoch": 15.456399796022438, "grad_norm": 18.283721923828125, "learning_rate": 7.572667006629271e-06, "loss": 1.0282, "step": 90930 }, { "epoch": 15.458099609043005, "grad_norm": 13.162254333496094, "learning_rate": 7.569833984928324e-06, "loss": 0.8754, "step": 90940 }, { "epoch": 15.459799422063574, "grad_norm": 13.961657524108887, "learning_rate": 7.567000963227379e-06, "loss": 0.8308, "step": 90950 }, { "epoch": 15.46149923508414, "grad_norm": 15.44456958770752, "learning_rate": 7.564167941526432e-06, "loss": 0.7948, "step": 90960 }, { "epoch": 15.463199048104709, "grad_norm": 15.861493110656738, "learning_rate": 7.561334919825487e-06, "loss": 0.6876, "step": 90970 }, { "epoch": 15.464898861125276, "grad_norm": 12.016098022460938, "learning_rate": 7.55850189812454e-06, "loss": 0.9219, "step": 90980 }, { "epoch": 15.466598674145844, "grad_norm": 26.033145904541016, "learning_rate": 7.555668876423594e-06, "loss": 0.6756, "step": 90990 }, { "epoch": 15.468298487166411, "grad_norm": 14.052813529968262, "learning_rate": 7.552835854722648e-06, "loss": 0.8249, "step": 91000 }, { "epoch": 15.46999830018698, "grad_norm": 11.828422546386719, "learning_rate": 7.550002833021702e-06, "loss": 0.6662, "step": 91010 }, { "epoch": 15.471698113207546, "grad_norm": 13.220342636108398, "learning_rate": 7.547169811320755e-06, "loss": 0.9366, "step": 91020 }, { "epoch": 15.473397926228115, "grad_norm": 19.5731201171875, "learning_rate": 7.544336789619808e-06, "loss": 0.7397, "step": 91030 }, { "epoch": 15.475097739248683, "grad_norm": 14.072806358337402, "learning_rate": 7.5415037679188625e-06, "loss": 0.8101, "step": 91040 }, { "epoch": 15.47679755226925, "grad_norm": 13.810400009155273, "learning_rate": 7.538670746217915e-06, "loss": 0.8015, "step": 91050 }, { "epoch": 15.478497365289819, "grad_norm": 14.271106719970703, "learning_rate": 7.5358377245169704e-06, "loss": 0.8036, "step": 91060 }, { "epoch": 15.480197178310386, "grad_norm": 15.941022872924805, "learning_rate": 7.533004702816023e-06, "loss": 0.8028, "step": 91070 }, { "epoch": 15.481896991330954, "grad_norm": 15.384509086608887, "learning_rate": 7.5301716811150775e-06, "loss": 0.8113, "step": 91080 }, { "epoch": 15.48359680435152, "grad_norm": 23.9345645904541, "learning_rate": 7.527338659414131e-06, "loss": 0.7627, "step": 91090 }, { "epoch": 15.48529661737209, "grad_norm": 14.902624130249023, "learning_rate": 7.524505637713185e-06, "loss": 0.838, "step": 91100 }, { "epoch": 15.486996430392656, "grad_norm": 11.319453239440918, "learning_rate": 7.521672616012238e-06, "loss": 0.7734, "step": 91110 }, { "epoch": 15.488696243413225, "grad_norm": 14.314045906066895, "learning_rate": 7.518839594311293e-06, "loss": 0.702, "step": 91120 }, { "epoch": 15.490396056433791, "grad_norm": 11.721952438354492, "learning_rate": 7.516006572610346e-06, "loss": 0.8615, "step": 91130 }, { "epoch": 15.49209586945436, "grad_norm": 13.036624908447266, "learning_rate": 7.513173550909401e-06, "loss": 0.828, "step": 91140 }, { "epoch": 15.493795682474929, "grad_norm": 32.344573974609375, "learning_rate": 7.510340529208454e-06, "loss": 0.9238, "step": 91150 }, { "epoch": 15.495495495495495, "grad_norm": 17.57582664489746, "learning_rate": 7.507507507507508e-06, "loss": 0.6761, "step": 91160 }, { "epoch": 15.497195308516064, "grad_norm": 11.628421783447266, "learning_rate": 7.504674485806562e-06, "loss": 0.6863, "step": 91170 }, { "epoch": 15.49889512153663, "grad_norm": 12.573805809020996, "learning_rate": 7.501841464105616e-06, "loss": 0.6235, "step": 91180 }, { "epoch": 15.5005949345572, "grad_norm": 17.374340057373047, "learning_rate": 7.499008442404669e-06, "loss": 0.7291, "step": 91190 }, { "epoch": 15.502294747577766, "grad_norm": 16.175365447998047, "learning_rate": 7.496175420703723e-06, "loss": 0.7329, "step": 91200 }, { "epoch": 15.503994560598334, "grad_norm": 57.37847137451172, "learning_rate": 7.493342399002777e-06, "loss": 0.8471, "step": 91210 }, { "epoch": 15.505694373618901, "grad_norm": 13.108363151550293, "learning_rate": 7.49050937730183e-06, "loss": 0.9038, "step": 91220 }, { "epoch": 15.50739418663947, "grad_norm": 9.816061019897461, "learning_rate": 7.4876763556008845e-06, "loss": 0.7685, "step": 91230 }, { "epoch": 15.509093999660038, "grad_norm": 20.107213973999023, "learning_rate": 7.484843333899938e-06, "loss": 0.863, "step": 91240 }, { "epoch": 15.510793812680605, "grad_norm": 8.413163185119629, "learning_rate": 7.482010312198992e-06, "loss": 0.76, "step": 91250 }, { "epoch": 15.512493625701174, "grad_norm": 34.95592498779297, "learning_rate": 7.479177290498046e-06, "loss": 0.8256, "step": 91260 }, { "epoch": 15.51419343872174, "grad_norm": 13.42744255065918, "learning_rate": 7.4763442687970995e-06, "loss": 0.9801, "step": 91270 }, { "epoch": 15.515893251742309, "grad_norm": 28.508953094482422, "learning_rate": 7.473511247096153e-06, "loss": 0.8221, "step": 91280 }, { "epoch": 15.517593064762876, "grad_norm": 16.614116668701172, "learning_rate": 7.4706782253952065e-06, "loss": 0.8025, "step": 91290 }, { "epoch": 15.519292877783444, "grad_norm": 20.195728302001953, "learning_rate": 7.46784520369426e-06, "loss": 0.7818, "step": 91300 }, { "epoch": 15.520992690804011, "grad_norm": 53.522430419921875, "learning_rate": 7.4650121819933136e-06, "loss": 0.7634, "step": 91310 }, { "epoch": 15.52269250382458, "grad_norm": 14.624631881713867, "learning_rate": 7.462179160292368e-06, "loss": 0.9493, "step": 91320 }, { "epoch": 15.524392316845146, "grad_norm": 15.005112648010254, "learning_rate": 7.4593461385914214e-06, "loss": 0.8, "step": 91330 }, { "epoch": 15.526092129865715, "grad_norm": 13.623465538024902, "learning_rate": 7.456513116890476e-06, "loss": 0.7928, "step": 91340 }, { "epoch": 15.527791942886282, "grad_norm": 17.64896583557129, "learning_rate": 7.453680095189529e-06, "loss": 0.7721, "step": 91350 }, { "epoch": 15.52949175590685, "grad_norm": 13.15283203125, "learning_rate": 7.450847073488583e-06, "loss": 0.645, "step": 91360 }, { "epoch": 15.531191568927419, "grad_norm": 19.08284568786621, "learning_rate": 7.448014051787637e-06, "loss": 0.8651, "step": 91370 }, { "epoch": 15.532891381947985, "grad_norm": 15.59489917755127, "learning_rate": 7.445181030086691e-06, "loss": 0.8917, "step": 91380 }, { "epoch": 15.534591194968554, "grad_norm": 12.975179672241211, "learning_rate": 7.442348008385744e-06, "loss": 0.8441, "step": 91390 }, { "epoch": 15.53629100798912, "grad_norm": 13.256369590759277, "learning_rate": 7.439514986684799e-06, "loss": 0.6258, "step": 91400 }, { "epoch": 15.53799082100969, "grad_norm": 15.677138328552246, "learning_rate": 7.436681964983852e-06, "loss": 0.8443, "step": 91410 }, { "epoch": 15.539690634030256, "grad_norm": 13.667989730834961, "learning_rate": 7.433848943282906e-06, "loss": 0.7008, "step": 91420 }, { "epoch": 15.541390447050825, "grad_norm": 16.19575309753418, "learning_rate": 7.43101592158196e-06, "loss": 0.8577, "step": 91430 }, { "epoch": 15.543090260071391, "grad_norm": 10.654214859008789, "learning_rate": 7.4281828998810136e-06, "loss": 0.7503, "step": 91440 }, { "epoch": 15.54479007309196, "grad_norm": 13.028264045715332, "learning_rate": 7.425349878180067e-06, "loss": 0.9058, "step": 91450 }, { "epoch": 15.546489886112528, "grad_norm": 13.405503273010254, "learning_rate": 7.422516856479121e-06, "loss": 0.8683, "step": 91460 }, { "epoch": 15.548189699133095, "grad_norm": 10.328731536865234, "learning_rate": 7.419683834778174e-06, "loss": 0.7898, "step": 91470 }, { "epoch": 15.549889512153664, "grad_norm": 17.68368911743164, "learning_rate": 7.416850813077228e-06, "loss": 1.0269, "step": 91480 }, { "epoch": 15.55158932517423, "grad_norm": 12.896712303161621, "learning_rate": 7.414017791376282e-06, "loss": 0.5975, "step": 91490 }, { "epoch": 15.553289138194799, "grad_norm": 15.299263954162598, "learning_rate": 7.4111847696753356e-06, "loss": 0.6335, "step": 91500 }, { "epoch": 15.554988951215366, "grad_norm": 11.406927108764648, "learning_rate": 7.40835174797439e-06, "loss": 0.764, "step": 91510 }, { "epoch": 15.556688764235934, "grad_norm": 17.147397994995117, "learning_rate": 7.4055187262734434e-06, "loss": 0.8113, "step": 91520 }, { "epoch": 15.558388577256501, "grad_norm": 18.787370681762695, "learning_rate": 7.402685704572497e-06, "loss": 0.6354, "step": 91530 }, { "epoch": 15.56008839027707, "grad_norm": 16.814245223999023, "learning_rate": 7.399852682871551e-06, "loss": 0.934, "step": 91540 }, { "epoch": 15.561788203297636, "grad_norm": 14.964144706726074, "learning_rate": 7.397019661170605e-06, "loss": 0.673, "step": 91550 }, { "epoch": 15.563488016318205, "grad_norm": 14.177610397338867, "learning_rate": 7.394186639469658e-06, "loss": 0.8759, "step": 91560 }, { "epoch": 15.565187829338774, "grad_norm": 17.075763702392578, "learning_rate": 7.391353617768713e-06, "loss": 0.9143, "step": 91570 }, { "epoch": 15.56688764235934, "grad_norm": 11.584887504577637, "learning_rate": 7.388520596067766e-06, "loss": 0.8311, "step": 91580 }, { "epoch": 15.568587455379909, "grad_norm": 13.523076057434082, "learning_rate": 7.38568757436682e-06, "loss": 0.7418, "step": 91590 }, { "epoch": 15.570287268400476, "grad_norm": 19.32057762145996, "learning_rate": 7.382854552665874e-06, "loss": 0.876, "step": 91600 }, { "epoch": 15.571987081421044, "grad_norm": 14.388504028320312, "learning_rate": 7.380021530964928e-06, "loss": 0.8053, "step": 91610 }, { "epoch": 15.573686894441611, "grad_norm": 17.512557983398438, "learning_rate": 7.377188509263981e-06, "loss": 0.7187, "step": 91620 }, { "epoch": 15.57538670746218, "grad_norm": 14.381280899047852, "learning_rate": 7.374355487563035e-06, "loss": 0.8768, "step": 91630 }, { "epoch": 15.577086520482746, "grad_norm": 11.874649047851562, "learning_rate": 7.371522465862088e-06, "loss": 0.801, "step": 91640 }, { "epoch": 15.578786333503315, "grad_norm": 16.130483627319336, "learning_rate": 7.368689444161142e-06, "loss": 0.9711, "step": 91650 }, { "epoch": 15.580486146523882, "grad_norm": 15.296242713928223, "learning_rate": 7.365856422460196e-06, "loss": 0.6321, "step": 91660 }, { "epoch": 15.58218595954445, "grad_norm": 16.762102127075195, "learning_rate": 7.36302340075925e-06, "loss": 0.7388, "step": 91670 }, { "epoch": 15.583885772565019, "grad_norm": 17.71272850036621, "learning_rate": 7.360190379058304e-06, "loss": 0.8464, "step": 91680 }, { "epoch": 15.585585585585585, "grad_norm": 11.976479530334473, "learning_rate": 7.3573573573573575e-06, "loss": 0.9247, "step": 91690 }, { "epoch": 15.587285398606154, "grad_norm": 14.739922523498535, "learning_rate": 7.354524335656411e-06, "loss": 0.6966, "step": 91700 }, { "epoch": 15.58898521162672, "grad_norm": 12.261043548583984, "learning_rate": 7.3516913139554654e-06, "loss": 0.68, "step": 91710 }, { "epoch": 15.59068502464729, "grad_norm": 18.703527450561523, "learning_rate": 7.348858292254519e-06, "loss": 0.8558, "step": 91720 }, { "epoch": 15.592384837667856, "grad_norm": 10.908007621765137, "learning_rate": 7.3460252705535725e-06, "loss": 0.8811, "step": 91730 }, { "epoch": 15.594084650688425, "grad_norm": 16.394060134887695, "learning_rate": 7.343192248852627e-06, "loss": 0.7989, "step": 91740 }, { "epoch": 15.595784463708991, "grad_norm": 15.357726097106934, "learning_rate": 7.34035922715168e-06, "loss": 0.8809, "step": 91750 }, { "epoch": 15.59748427672956, "grad_norm": 16.17452049255371, "learning_rate": 7.337526205450734e-06, "loss": 0.7605, "step": 91760 }, { "epoch": 15.599184089750128, "grad_norm": 14.931264877319336, "learning_rate": 7.334693183749788e-06, "loss": 0.8193, "step": 91770 }, { "epoch": 15.600883902770695, "grad_norm": 13.020003318786621, "learning_rate": 7.331860162048842e-06, "loss": 0.7266, "step": 91780 }, { "epoch": 15.602583715791264, "grad_norm": 16.095651626586914, "learning_rate": 7.329027140347895e-06, "loss": 0.7771, "step": 91790 }, { "epoch": 15.60428352881183, "grad_norm": 18.72491455078125, "learning_rate": 7.32619411864695e-06, "loss": 0.9067, "step": 91800 }, { "epoch": 15.605983341832399, "grad_norm": 16.99665069580078, "learning_rate": 7.323361096946002e-06, "loss": 0.8033, "step": 91810 }, { "epoch": 15.607683154852966, "grad_norm": 20.78124237060547, "learning_rate": 7.320528075245056e-06, "loss": 0.9144, "step": 91820 }, { "epoch": 15.609382967873534, "grad_norm": 11.573631286621094, "learning_rate": 7.31769505354411e-06, "loss": 0.8005, "step": 91830 }, { "epoch": 15.611082780894101, "grad_norm": 12.13751220703125, "learning_rate": 7.314862031843164e-06, "loss": 0.6966, "step": 91840 }, { "epoch": 15.61278259391467, "grad_norm": 19.609806060791016, "learning_rate": 7.312029010142217e-06, "loss": 1.0897, "step": 91850 }, { "epoch": 15.614482406935236, "grad_norm": 14.164785385131836, "learning_rate": 7.309195988441272e-06, "loss": 0.9728, "step": 91860 }, { "epoch": 15.616182219955805, "grad_norm": 21.315074920654297, "learning_rate": 7.306362966740325e-06, "loss": 0.8846, "step": 91870 }, { "epoch": 15.617882032976372, "grad_norm": 12.478965759277344, "learning_rate": 7.3035299450393795e-06, "loss": 0.7692, "step": 91880 }, { "epoch": 15.61958184599694, "grad_norm": 11.173360824584961, "learning_rate": 7.300696923338433e-06, "loss": 0.7316, "step": 91890 }, { "epoch": 15.621281659017509, "grad_norm": 15.661722183227539, "learning_rate": 7.297863901637487e-06, "loss": 0.9903, "step": 91900 }, { "epoch": 15.622981472038076, "grad_norm": 13.551709175109863, "learning_rate": 7.295030879936541e-06, "loss": 0.813, "step": 91910 }, { "epoch": 15.624681285058644, "grad_norm": 13.340328216552734, "learning_rate": 7.2921978582355945e-06, "loss": 0.6453, "step": 91920 }, { "epoch": 15.62638109807921, "grad_norm": 14.000825881958008, "learning_rate": 7.289364836534648e-06, "loss": 0.8437, "step": 91930 }, { "epoch": 15.62808091109978, "grad_norm": 20.01459503173828, "learning_rate": 7.286531814833702e-06, "loss": 0.7846, "step": 91940 }, { "epoch": 15.629780724120346, "grad_norm": 12.276582717895508, "learning_rate": 7.283698793132756e-06, "loss": 0.8932, "step": 91950 }, { "epoch": 15.631480537140915, "grad_norm": 20.771034240722656, "learning_rate": 7.280865771431809e-06, "loss": 0.7104, "step": 91960 }, { "epoch": 15.633180350161481, "grad_norm": 14.365242958068848, "learning_rate": 7.278032749730864e-06, "loss": 0.722, "step": 91970 }, { "epoch": 15.63488016318205, "grad_norm": 12.43301773071289, "learning_rate": 7.2751997280299165e-06, "loss": 0.6681, "step": 91980 }, { "epoch": 15.636579976202619, "grad_norm": 17.63393211364746, "learning_rate": 7.27236670632897e-06, "loss": 0.7559, "step": 91990 }, { "epoch": 15.638279789223185, "grad_norm": 15.330915451049805, "learning_rate": 7.269533684628024e-06, "loss": 0.9438, "step": 92000 }, { "epoch": 15.639979602243754, "grad_norm": 15.835800170898438, "learning_rate": 7.266700662927078e-06, "loss": 0.8115, "step": 92010 }, { "epoch": 15.64167941526432, "grad_norm": 16.63861656188965, "learning_rate": 7.263867641226131e-06, "loss": 0.7086, "step": 92020 }, { "epoch": 15.64337922828489, "grad_norm": 18.231674194335938, "learning_rate": 7.261034619525186e-06, "loss": 0.7135, "step": 92030 }, { "epoch": 15.645079041305456, "grad_norm": 17.417957305908203, "learning_rate": 7.258201597824239e-06, "loss": 0.4501, "step": 92040 }, { "epoch": 15.646778854326024, "grad_norm": 13.511438369750977, "learning_rate": 7.255368576123294e-06, "loss": 1.1083, "step": 92050 }, { "epoch": 15.648478667346591, "grad_norm": 13.074175834655762, "learning_rate": 7.252535554422347e-06, "loss": 0.8177, "step": 92060 }, { "epoch": 15.65017848036716, "grad_norm": 8.62028980255127, "learning_rate": 7.249702532721401e-06, "loss": 0.8791, "step": 92070 }, { "epoch": 15.651878293387728, "grad_norm": 15.35122299194336, "learning_rate": 7.246869511020455e-06, "loss": 0.709, "step": 92080 }, { "epoch": 15.653578106408295, "grad_norm": 14.14490032196045, "learning_rate": 7.244036489319509e-06, "loss": 0.7674, "step": 92090 }, { "epoch": 15.655277919428864, "grad_norm": 11.488975524902344, "learning_rate": 7.241203467618562e-06, "loss": 0.7863, "step": 92100 }, { "epoch": 15.65697773244943, "grad_norm": 20.979318618774414, "learning_rate": 7.2383704459176165e-06, "loss": 0.6786, "step": 92110 }, { "epoch": 15.658677545469999, "grad_norm": 13.438459396362305, "learning_rate": 7.23553742421667e-06, "loss": 0.7883, "step": 92120 }, { "epoch": 15.660377358490566, "grad_norm": 19.346242904663086, "learning_rate": 7.2327044025157235e-06, "loss": 1.0702, "step": 92130 }, { "epoch": 15.662077171511134, "grad_norm": 14.14321517944336, "learning_rate": 7.229871380814778e-06, "loss": 1.0092, "step": 92140 }, { "epoch": 15.663776984531701, "grad_norm": 22.3133487701416, "learning_rate": 7.2270383591138306e-06, "loss": 0.7211, "step": 92150 }, { "epoch": 15.66547679755227, "grad_norm": 13.386056900024414, "learning_rate": 7.224205337412884e-06, "loss": 0.7463, "step": 92160 }, { "epoch": 15.667176610572836, "grad_norm": 12.778800010681152, "learning_rate": 7.2213723157119385e-06, "loss": 0.7713, "step": 92170 }, { "epoch": 15.668876423593405, "grad_norm": 12.538823127746582, "learning_rate": 7.218539294010992e-06, "loss": 0.8848, "step": 92180 }, { "epoch": 15.670576236613972, "grad_norm": 14.217129707336426, "learning_rate": 7.2157062723100455e-06, "loss": 0.7966, "step": 92190 }, { "epoch": 15.67227604963454, "grad_norm": 12.237483024597168, "learning_rate": 7.2128732506091e-06, "loss": 0.604, "step": 92200 }, { "epoch": 15.673975862655109, "grad_norm": 12.030130386352539, "learning_rate": 7.210040228908153e-06, "loss": 0.9747, "step": 92210 }, { "epoch": 15.675675675675675, "grad_norm": 15.837207794189453, "learning_rate": 7.207207207207207e-06, "loss": 0.8301, "step": 92220 }, { "epoch": 15.677375488696244, "grad_norm": 13.659130096435547, "learning_rate": 7.204374185506261e-06, "loss": 1.0732, "step": 92230 }, { "epoch": 15.67907530171681, "grad_norm": 16.147855758666992, "learning_rate": 7.201541163805315e-06, "loss": 0.6953, "step": 92240 }, { "epoch": 15.68077511473738, "grad_norm": 13.178147315979004, "learning_rate": 7.198708142104369e-06, "loss": 0.7881, "step": 92250 }, { "epoch": 15.682474927757946, "grad_norm": 16.228593826293945, "learning_rate": 7.195875120403423e-06, "loss": 0.8615, "step": 92260 }, { "epoch": 15.684174740778515, "grad_norm": 12.846068382263184, "learning_rate": 7.193042098702476e-06, "loss": 0.7621, "step": 92270 }, { "epoch": 15.685874553799081, "grad_norm": 11.990915298461914, "learning_rate": 7.190209077001531e-06, "loss": 0.8114, "step": 92280 }, { "epoch": 15.68757436681965, "grad_norm": 16.8099308013916, "learning_rate": 7.187376055300584e-06, "loss": 0.7295, "step": 92290 }, { "epoch": 15.689274179840218, "grad_norm": 18.963224411010742, "learning_rate": 7.184543033599638e-06, "loss": 0.7803, "step": 92300 }, { "epoch": 15.690973992860785, "grad_norm": 13.015341758728027, "learning_rate": 7.181710011898692e-06, "loss": 0.8633, "step": 92310 }, { "epoch": 15.692673805881354, "grad_norm": 11.335762977600098, "learning_rate": 7.178876990197745e-06, "loss": 0.7169, "step": 92320 }, { "epoch": 15.69437361890192, "grad_norm": 10.687763214111328, "learning_rate": 7.176043968496798e-06, "loss": 0.6854, "step": 92330 }, { "epoch": 15.696073431922489, "grad_norm": 30.305028915405273, "learning_rate": 7.1732109467958526e-06, "loss": 0.835, "step": 92340 }, { "epoch": 15.697773244943056, "grad_norm": 13.544591903686523, "learning_rate": 7.170377925094906e-06, "loss": 0.8798, "step": 92350 }, { "epoch": 15.699473057963624, "grad_norm": 16.298702239990234, "learning_rate": 7.16754490339396e-06, "loss": 0.8794, "step": 92360 }, { "epoch": 15.701172870984191, "grad_norm": 11.6947660446167, "learning_rate": 7.164711881693014e-06, "loss": 0.64, "step": 92370 }, { "epoch": 15.70287268400476, "grad_norm": 15.125605583190918, "learning_rate": 7.1618788599920675e-06, "loss": 0.8716, "step": 92380 }, { "epoch": 15.704572497025326, "grad_norm": 14.992840766906738, "learning_rate": 7.159045838291121e-06, "loss": 0.9454, "step": 92390 }, { "epoch": 15.706272310045895, "grad_norm": 16.866348266601562, "learning_rate": 7.156212816590175e-06, "loss": 0.8596, "step": 92400 }, { "epoch": 15.707972123066464, "grad_norm": 10.901652336120605, "learning_rate": 7.153379794889229e-06, "loss": 0.7423, "step": 92410 }, { "epoch": 15.70967193608703, "grad_norm": 14.414742469787598, "learning_rate": 7.150546773188283e-06, "loss": 0.6977, "step": 92420 }, { "epoch": 15.711371749107599, "grad_norm": 33.12068176269531, "learning_rate": 7.147713751487337e-06, "loss": 0.8993, "step": 92430 }, { "epoch": 15.713071562128166, "grad_norm": 14.393491744995117, "learning_rate": 7.14488072978639e-06, "loss": 0.7575, "step": 92440 }, { "epoch": 15.714771375148734, "grad_norm": 14.938018798828125, "learning_rate": 7.142047708085445e-06, "loss": 0.6846, "step": 92450 }, { "epoch": 15.7164711881693, "grad_norm": 13.733177185058594, "learning_rate": 7.139214686384498e-06, "loss": 0.838, "step": 92460 }, { "epoch": 15.71817100118987, "grad_norm": 12.63624382019043, "learning_rate": 7.136381664683552e-06, "loss": 0.8311, "step": 92470 }, { "epoch": 15.719870814210436, "grad_norm": 27.407609939575195, "learning_rate": 7.133548642982606e-06, "loss": 0.8397, "step": 92480 }, { "epoch": 15.721570627231005, "grad_norm": 12.153931617736816, "learning_rate": 7.13071562128166e-06, "loss": 0.8542, "step": 92490 }, { "epoch": 15.723270440251572, "grad_norm": 11.995092391967773, "learning_rate": 7.127882599580712e-06, "loss": 0.8777, "step": 92500 }, { "epoch": 15.72497025327214, "grad_norm": 169.76329040527344, "learning_rate": 7.125049577879767e-06, "loss": 0.7055, "step": 92510 }, { "epoch": 15.726670066292709, "grad_norm": 17.38300323486328, "learning_rate": 7.12221655617882e-06, "loss": 0.8275, "step": 92520 }, { "epoch": 15.728369879313275, "grad_norm": 13.205431938171387, "learning_rate": 7.119383534477874e-06, "loss": 0.73, "step": 92530 }, { "epoch": 15.730069692333844, "grad_norm": 16.54874038696289, "learning_rate": 7.116550512776928e-06, "loss": 0.9686, "step": 92540 }, { "epoch": 15.73176950535441, "grad_norm": 16.083066940307617, "learning_rate": 7.113717491075982e-06, "loss": 0.8582, "step": 92550 }, { "epoch": 15.73346931837498, "grad_norm": 13.94778060913086, "learning_rate": 7.110884469375035e-06, "loss": 0.8572, "step": 92560 }, { "epoch": 15.735169131395546, "grad_norm": 16.969091415405273, "learning_rate": 7.1080514476740895e-06, "loss": 0.7711, "step": 92570 }, { "epoch": 15.736868944416115, "grad_norm": 16.303939819335938, "learning_rate": 7.105218425973143e-06, "loss": 0.8153, "step": 92580 }, { "epoch": 15.738568757436681, "grad_norm": 21.016826629638672, "learning_rate": 7.1023854042721965e-06, "loss": 0.7657, "step": 92590 }, { "epoch": 15.74026857045725, "grad_norm": 18.390649795532227, "learning_rate": 7.099552382571251e-06, "loss": 0.9271, "step": 92600 }, { "epoch": 15.741968383477818, "grad_norm": 17.680280685424805, "learning_rate": 7.0967193608703044e-06, "loss": 0.6309, "step": 92610 }, { "epoch": 15.743668196498385, "grad_norm": 11.911101341247559, "learning_rate": 7.093886339169359e-06, "loss": 0.6953, "step": 92620 }, { "epoch": 15.745368009518954, "grad_norm": 10.053357124328613, "learning_rate": 7.091053317468412e-06, "loss": 0.7576, "step": 92630 }, { "epoch": 15.74706782253952, "grad_norm": 13.145296096801758, "learning_rate": 7.088220295767466e-06, "loss": 0.7379, "step": 92640 }, { "epoch": 15.748767635560089, "grad_norm": 12.517041206359863, "learning_rate": 7.08538727406652e-06, "loss": 0.8565, "step": 92650 }, { "epoch": 15.750467448580656, "grad_norm": 15.624812126159668, "learning_rate": 7.082554252365574e-06, "loss": 1.0744, "step": 92660 }, { "epoch": 15.752167261601224, "grad_norm": 11.485295295715332, "learning_rate": 7.079721230664626e-06, "loss": 0.9207, "step": 92670 }, { "epoch": 15.753867074621791, "grad_norm": 24.697328567504883, "learning_rate": 7.076888208963681e-06, "loss": 0.6387, "step": 92680 }, { "epoch": 15.75556688764236, "grad_norm": 11.447662353515625, "learning_rate": 7.074055187262734e-06, "loss": 0.6825, "step": 92690 }, { "epoch": 15.757266700662926, "grad_norm": 13.981681823730469, "learning_rate": 7.071222165561788e-06, "loss": 0.8926, "step": 92700 }, { "epoch": 15.758966513683495, "grad_norm": 24.281543731689453, "learning_rate": 7.068389143860842e-06, "loss": 0.7218, "step": 92710 }, { "epoch": 15.760666326704062, "grad_norm": 21.919618606567383, "learning_rate": 7.065556122159896e-06, "loss": 0.8648, "step": 92720 }, { "epoch": 15.76236613972463, "grad_norm": 15.133880615234375, "learning_rate": 7.062723100458949e-06, "loss": 1.0621, "step": 92730 }, { "epoch": 15.764065952745199, "grad_norm": 13.976706504821777, "learning_rate": 7.059890078758004e-06, "loss": 0.8633, "step": 92740 }, { "epoch": 15.765765765765765, "grad_norm": 9.210417747497559, "learning_rate": 7.057057057057057e-06, "loss": 0.5934, "step": 92750 }, { "epoch": 15.767465578786334, "grad_norm": 11.815301895141602, "learning_rate": 7.054224035356111e-06, "loss": 0.7518, "step": 92760 }, { "epoch": 15.7691653918069, "grad_norm": 11.120850563049316, "learning_rate": 7.051391013655165e-06, "loss": 0.8527, "step": 92770 }, { "epoch": 15.77086520482747, "grad_norm": 17.187091827392578, "learning_rate": 7.0485579919542185e-06, "loss": 0.7898, "step": 92780 }, { "epoch": 15.772565017848036, "grad_norm": 14.77146053314209, "learning_rate": 7.045724970253273e-06, "loss": 0.684, "step": 92790 }, { "epoch": 15.774264830868605, "grad_norm": 11.413466453552246, "learning_rate": 7.0428919485523264e-06, "loss": 0.7127, "step": 92800 }, { "epoch": 15.775964643889171, "grad_norm": 13.59022331237793, "learning_rate": 7.04005892685138e-06, "loss": 0.8631, "step": 92810 }, { "epoch": 15.77766445690974, "grad_norm": 13.174467086791992, "learning_rate": 7.037225905150434e-06, "loss": 0.8517, "step": 92820 }, { "epoch": 15.779364269930308, "grad_norm": 12.408873558044434, "learning_rate": 7.034392883449488e-06, "loss": 0.7704, "step": 92830 }, { "epoch": 15.781064082950875, "grad_norm": 13.234356880187988, "learning_rate": 7.0315598617485405e-06, "loss": 0.8855, "step": 92840 }, { "epoch": 15.782763895971444, "grad_norm": 14.958456993103027, "learning_rate": 7.028726840047595e-06, "loss": 0.8405, "step": 92850 }, { "epoch": 15.78446370899201, "grad_norm": 11.362282752990723, "learning_rate": 7.025893818346648e-06, "loss": 0.8067, "step": 92860 }, { "epoch": 15.786163522012579, "grad_norm": 17.768651962280273, "learning_rate": 7.023060796645702e-06, "loss": 0.8832, "step": 92870 }, { "epoch": 15.787863335033146, "grad_norm": 14.443549156188965, "learning_rate": 7.020227774944756e-06, "loss": 0.7034, "step": 92880 }, { "epoch": 15.789563148053714, "grad_norm": 12.455947875976562, "learning_rate": 7.01739475324381e-06, "loss": 0.7952, "step": 92890 }, { "epoch": 15.791262961074281, "grad_norm": 13.568368911743164, "learning_rate": 7.014561731542863e-06, "loss": 0.84, "step": 92900 }, { "epoch": 15.79296277409485, "grad_norm": 18.189462661743164, "learning_rate": 7.011728709841918e-06, "loss": 0.8599, "step": 92910 }, { "epoch": 15.794662587115416, "grad_norm": 20.593494415283203, "learning_rate": 7.008895688140971e-06, "loss": 0.7805, "step": 92920 }, { "epoch": 15.796362400135985, "grad_norm": 12.92370891571045, "learning_rate": 7.006062666440025e-06, "loss": 0.8358, "step": 92930 }, { "epoch": 15.798062213156554, "grad_norm": 25.64452362060547, "learning_rate": 7.003229644739079e-06, "loss": 0.7807, "step": 92940 }, { "epoch": 15.79976202617712, "grad_norm": 11.51123332977295, "learning_rate": 7.000396623038133e-06, "loss": 0.8742, "step": 92950 }, { "epoch": 15.801461839197689, "grad_norm": 19.564664840698242, "learning_rate": 6.997563601337187e-06, "loss": 0.7375, "step": 92960 }, { "epoch": 15.803161652218256, "grad_norm": 17.168977737426758, "learning_rate": 6.9947305796362405e-06, "loss": 0.6748, "step": 92970 }, { "epoch": 15.804861465238824, "grad_norm": 16.20827865600586, "learning_rate": 6.991897557935294e-06, "loss": 0.7507, "step": 92980 }, { "epoch": 15.806561278259391, "grad_norm": 12.976093292236328, "learning_rate": 6.989064536234348e-06, "loss": 0.7166, "step": 92990 }, { "epoch": 15.80826109127996, "grad_norm": 18.994142532348633, "learning_rate": 6.986231514533402e-06, "loss": 0.9123, "step": 93000 }, { "epoch": 15.809960904300526, "grad_norm": 11.712574005126953, "learning_rate": 6.9833984928324555e-06, "loss": 0.814, "step": 93010 }, { "epoch": 15.811660717321095, "grad_norm": 16.05300521850586, "learning_rate": 6.980565471131509e-06, "loss": 0.9145, "step": 93020 }, { "epoch": 15.813360530341662, "grad_norm": 17.40557289123535, "learning_rate": 6.9777324494305625e-06, "loss": 0.8445, "step": 93030 }, { "epoch": 15.81506034336223, "grad_norm": 12.57970905303955, "learning_rate": 6.974899427729616e-06, "loss": 0.6349, "step": 93040 }, { "epoch": 15.816760156382799, "grad_norm": 14.788106918334961, "learning_rate": 6.97206640602867e-06, "loss": 0.759, "step": 93050 }, { "epoch": 15.818459969403365, "grad_norm": 13.37831974029541, "learning_rate": 6.969233384327724e-06, "loss": 0.7035, "step": 93060 }, { "epoch": 15.820159782423934, "grad_norm": 12.189935684204102, "learning_rate": 6.9664003626267774e-06, "loss": 0.8479, "step": 93070 }, { "epoch": 15.8218595954445, "grad_norm": 10.237060546875, "learning_rate": 6.963567340925832e-06, "loss": 0.6909, "step": 93080 }, { "epoch": 15.82355940846507, "grad_norm": 12.960701942443848, "learning_rate": 6.960734319224885e-06, "loss": 0.8478, "step": 93090 }, { "epoch": 15.825259221485636, "grad_norm": 13.89614200592041, "learning_rate": 6.957901297523939e-06, "loss": 0.8706, "step": 93100 }, { "epoch": 15.826959034506205, "grad_norm": 12.91996955871582, "learning_rate": 6.955068275822993e-06, "loss": 0.8538, "step": 93110 }, { "epoch": 15.828658847526771, "grad_norm": 13.760066032409668, "learning_rate": 6.952235254122047e-06, "loss": 0.7781, "step": 93120 }, { "epoch": 15.83035866054734, "grad_norm": 11.16109848022461, "learning_rate": 6.9494022324211e-06, "loss": 1.0137, "step": 93130 }, { "epoch": 15.832058473567908, "grad_norm": 13.62110710144043, "learning_rate": 6.946569210720155e-06, "loss": 0.9908, "step": 93140 }, { "epoch": 15.833758286588475, "grad_norm": 11.689766883850098, "learning_rate": 6.943736189019208e-06, "loss": 0.7945, "step": 93150 }, { "epoch": 15.835458099609044, "grad_norm": 22.036836624145508, "learning_rate": 6.9409031673182625e-06, "loss": 0.9205, "step": 93160 }, { "epoch": 15.83715791262961, "grad_norm": 14.803422927856445, "learning_rate": 6.938070145617316e-06, "loss": 0.7972, "step": 93170 }, { "epoch": 15.838857725650179, "grad_norm": 13.300980567932129, "learning_rate": 6.9352371239163696e-06, "loss": 0.9331, "step": 93180 }, { "epoch": 15.840557538670746, "grad_norm": 16.40839958190918, "learning_rate": 6.932404102215423e-06, "loss": 0.6827, "step": 93190 }, { "epoch": 15.842257351691314, "grad_norm": 13.275299072265625, "learning_rate": 6.929571080514477e-06, "loss": 0.6747, "step": 93200 }, { "epoch": 15.843957164711881, "grad_norm": 22.903533935546875, "learning_rate": 6.92673805881353e-06, "loss": 0.7685, "step": 93210 }, { "epoch": 15.84565697773245, "grad_norm": 16.752845764160156, "learning_rate": 6.9239050371125845e-06, "loss": 0.9921, "step": 93220 }, { "epoch": 15.847356790753016, "grad_norm": 9.0663423538208, "learning_rate": 6.921072015411638e-06, "loss": 1.0125, "step": 93230 }, { "epoch": 15.849056603773585, "grad_norm": 16.306371688842773, "learning_rate": 6.9182389937106915e-06, "loss": 0.8743, "step": 93240 }, { "epoch": 15.850756416794152, "grad_norm": 15.040854454040527, "learning_rate": 6.915405972009746e-06, "loss": 0.7456, "step": 93250 }, { "epoch": 15.85245622981472, "grad_norm": 10.524969100952148, "learning_rate": 6.9125729503087994e-06, "loss": 0.8031, "step": 93260 }, { "epoch": 15.854156042835289, "grad_norm": 36.92729187011719, "learning_rate": 6.909739928607853e-06, "loss": 0.88, "step": 93270 }, { "epoch": 15.855855855855856, "grad_norm": 12.319005966186523, "learning_rate": 6.906906906906907e-06, "loss": 0.8691, "step": 93280 }, { "epoch": 15.857555668876424, "grad_norm": 14.854984283447266, "learning_rate": 6.904073885205961e-06, "loss": 0.875, "step": 93290 }, { "epoch": 15.85925548189699, "grad_norm": 12.771882057189941, "learning_rate": 6.901240863505014e-06, "loss": 0.8021, "step": 93300 }, { "epoch": 15.86095529491756, "grad_norm": 16.61958885192871, "learning_rate": 6.898407841804069e-06, "loss": 0.7666, "step": 93310 }, { "epoch": 15.862655107938126, "grad_norm": 12.63956069946289, "learning_rate": 6.895574820103122e-06, "loss": 0.8666, "step": 93320 }, { "epoch": 15.864354920958695, "grad_norm": 17.672584533691406, "learning_rate": 6.892741798402177e-06, "loss": 0.706, "step": 93330 }, { "epoch": 15.866054733979261, "grad_norm": 29.054637908935547, "learning_rate": 6.88990877670123e-06, "loss": 0.7283, "step": 93340 }, { "epoch": 15.86775454699983, "grad_norm": 13.764692306518555, "learning_rate": 6.887075755000284e-06, "loss": 0.795, "step": 93350 }, { "epoch": 15.869454360020399, "grad_norm": 11.343280792236328, "learning_rate": 6.884242733299337e-06, "loss": 0.8365, "step": 93360 }, { "epoch": 15.871154173040965, "grad_norm": 21.58892822265625, "learning_rate": 6.881409711598391e-06, "loss": 0.6699, "step": 93370 }, { "epoch": 15.872853986061534, "grad_norm": 15.29456615447998, "learning_rate": 6.878576689897444e-06, "loss": 0.6429, "step": 93380 }, { "epoch": 15.8745537990821, "grad_norm": 16.841230392456055, "learning_rate": 6.875743668196499e-06, "loss": 0.7065, "step": 93390 }, { "epoch": 15.87625361210267, "grad_norm": 13.14892578125, "learning_rate": 6.872910646495552e-06, "loss": 0.6877, "step": 93400 }, { "epoch": 15.877953425123236, "grad_norm": 16.402616500854492, "learning_rate": 6.870077624794606e-06, "loss": 0.7056, "step": 93410 }, { "epoch": 15.879653238143804, "grad_norm": 14.147699356079102, "learning_rate": 6.86724460309366e-06, "loss": 0.8865, "step": 93420 }, { "epoch": 15.881353051164371, "grad_norm": 12.389761924743652, "learning_rate": 6.8644115813927135e-06, "loss": 0.8149, "step": 93430 }, { "epoch": 15.88305286418494, "grad_norm": 16.56352996826172, "learning_rate": 6.861578559691767e-06, "loss": 0.8115, "step": 93440 }, { "epoch": 15.884752677205508, "grad_norm": 19.612642288208008, "learning_rate": 6.8587455379908214e-06, "loss": 0.7284, "step": 93450 }, { "epoch": 15.886452490226075, "grad_norm": 13.239107131958008, "learning_rate": 6.855912516289875e-06, "loss": 0.9493, "step": 93460 }, { "epoch": 15.888152303246644, "grad_norm": 16.1804141998291, "learning_rate": 6.8530794945889285e-06, "loss": 0.6468, "step": 93470 }, { "epoch": 15.88985211626721, "grad_norm": 38.16307067871094, "learning_rate": 6.850246472887983e-06, "loss": 0.6543, "step": 93480 }, { "epoch": 15.891551929287779, "grad_norm": 14.473855018615723, "learning_rate": 6.847413451187036e-06, "loss": 0.6255, "step": 93490 }, { "epoch": 15.893251742308346, "grad_norm": 14.158689498901367, "learning_rate": 6.84458042948609e-06, "loss": 0.7751, "step": 93500 }, { "epoch": 15.894951555328914, "grad_norm": 16.280811309814453, "learning_rate": 6.841747407785144e-06, "loss": 0.8613, "step": 93510 }, { "epoch": 15.896651368349481, "grad_norm": 13.516128540039062, "learning_rate": 6.838914386084198e-06, "loss": 0.8612, "step": 93520 }, { "epoch": 15.89835118137005, "grad_norm": 13.073161125183105, "learning_rate": 6.836081364383252e-06, "loss": 0.8442, "step": 93530 }, { "epoch": 15.900050994390616, "grad_norm": 17.462404251098633, "learning_rate": 6.833248342682305e-06, "loss": 0.7586, "step": 93540 }, { "epoch": 15.901750807411185, "grad_norm": 16.65427017211914, "learning_rate": 6.830415320981358e-06, "loss": 0.7094, "step": 93550 }, { "epoch": 15.903450620431752, "grad_norm": 12.945104598999023, "learning_rate": 6.827582299280413e-06, "loss": 0.669, "step": 93560 }, { "epoch": 15.90515043345232, "grad_norm": 19.585540771484375, "learning_rate": 6.824749277579466e-06, "loss": 0.8036, "step": 93570 }, { "epoch": 15.906850246472889, "grad_norm": 12.37489128112793, "learning_rate": 6.82191625587852e-06, "loss": 0.7888, "step": 93580 }, { "epoch": 15.908550059493455, "grad_norm": 13.479696273803711, "learning_rate": 6.819083234177574e-06, "loss": 0.8201, "step": 93590 }, { "epoch": 15.910249872514024, "grad_norm": 50.4504508972168, "learning_rate": 6.816250212476628e-06, "loss": 0.641, "step": 93600 }, { "epoch": 15.91194968553459, "grad_norm": 18.99456024169922, "learning_rate": 6.813417190775681e-06, "loss": 1.0367, "step": 93610 }, { "epoch": 15.91364949855516, "grad_norm": 17.838645935058594, "learning_rate": 6.8105841690747355e-06, "loss": 0.6493, "step": 93620 }, { "epoch": 15.915349311575726, "grad_norm": 12.89978313446045, "learning_rate": 6.807751147373789e-06, "loss": 0.924, "step": 93630 }, { "epoch": 15.917049124596295, "grad_norm": 16.709156036376953, "learning_rate": 6.804918125672843e-06, "loss": 0.9316, "step": 93640 }, { "epoch": 15.918748937616861, "grad_norm": 13.952961921691895, "learning_rate": 6.802085103971897e-06, "loss": 0.8092, "step": 93650 }, { "epoch": 15.92044875063743, "grad_norm": 11.371923446655273, "learning_rate": 6.7992520822709505e-06, "loss": 0.89, "step": 93660 }, { "epoch": 15.922148563657998, "grad_norm": 17.214202880859375, "learning_rate": 6.796419060570004e-06, "loss": 0.9033, "step": 93670 }, { "epoch": 15.923848376678565, "grad_norm": 12.923935890197754, "learning_rate": 6.793586038869058e-06, "loss": 0.7545, "step": 93680 }, { "epoch": 15.925548189699134, "grad_norm": 19.02928352355957, "learning_rate": 6.790753017168112e-06, "loss": 0.7458, "step": 93690 }, { "epoch": 15.9272480027197, "grad_norm": 18.84684181213379, "learning_rate": 6.787919995467166e-06, "loss": 0.8697, "step": 93700 }, { "epoch": 15.928947815740269, "grad_norm": 13.521613121032715, "learning_rate": 6.785086973766219e-06, "loss": 0.8219, "step": 93710 }, { "epoch": 15.930647628760836, "grad_norm": 10.264008522033691, "learning_rate": 6.7822539520652725e-06, "loss": 0.9008, "step": 93720 }, { "epoch": 15.932347441781404, "grad_norm": 19.49981117248535, "learning_rate": 6.779420930364327e-06, "loss": 0.9369, "step": 93730 }, { "epoch": 15.934047254801971, "grad_norm": 23.193180084228516, "learning_rate": 6.77658790866338e-06, "loss": 0.7552, "step": 93740 }, { "epoch": 15.93574706782254, "grad_norm": 13.423709869384766, "learning_rate": 6.773754886962434e-06, "loss": 0.794, "step": 93750 }, { "epoch": 15.937446880843106, "grad_norm": 14.654776573181152, "learning_rate": 6.770921865261488e-06, "loss": 0.7215, "step": 93760 }, { "epoch": 15.939146693863675, "grad_norm": 9.370463371276855, "learning_rate": 6.768088843560542e-06, "loss": 0.9581, "step": 93770 }, { "epoch": 15.940846506884244, "grad_norm": 13.590559959411621, "learning_rate": 6.765255821859595e-06, "loss": 0.8043, "step": 93780 }, { "epoch": 15.94254631990481, "grad_norm": 8.239019393920898, "learning_rate": 6.76242280015865e-06, "loss": 0.9784, "step": 93790 }, { "epoch": 15.944246132925379, "grad_norm": 12.708474159240723, "learning_rate": 6.759589778457703e-06, "loss": 0.68, "step": 93800 }, { "epoch": 15.945945945945946, "grad_norm": 19.032806396484375, "learning_rate": 6.756756756756757e-06, "loss": 0.9525, "step": 93810 }, { "epoch": 15.947645758966514, "grad_norm": 6.389180660247803, "learning_rate": 6.753923735055811e-06, "loss": 0.6088, "step": 93820 }, { "epoch": 15.949345571987081, "grad_norm": 13.928420066833496, "learning_rate": 6.751090713354865e-06, "loss": 0.8423, "step": 93830 }, { "epoch": 15.95104538500765, "grad_norm": 13.462114334106445, "learning_rate": 6.748257691653918e-06, "loss": 0.9244, "step": 93840 }, { "epoch": 15.952745198028216, "grad_norm": 17.764883041381836, "learning_rate": 6.7454246699529725e-06, "loss": 0.8883, "step": 93850 }, { "epoch": 15.954445011048785, "grad_norm": 11.286561965942383, "learning_rate": 6.742591648252026e-06, "loss": 0.8868, "step": 93860 }, { "epoch": 15.956144824069352, "grad_norm": 12.618694305419922, "learning_rate": 6.7397586265510795e-06, "loss": 0.9123, "step": 93870 }, { "epoch": 15.95784463708992, "grad_norm": 12.653572082519531, "learning_rate": 6.736925604850133e-06, "loss": 0.7729, "step": 93880 }, { "epoch": 15.959544450110489, "grad_norm": 9.77664566040039, "learning_rate": 6.7340925831491866e-06, "loss": 0.6811, "step": 93890 }, { "epoch": 15.961244263131055, "grad_norm": 13.403559684753418, "learning_rate": 6.731259561448241e-06, "loss": 0.752, "step": 93900 }, { "epoch": 15.962944076151624, "grad_norm": 10.322463989257812, "learning_rate": 6.7284265397472945e-06, "loss": 0.646, "step": 93910 }, { "epoch": 15.96464388917219, "grad_norm": 13.418858528137207, "learning_rate": 6.725593518046348e-06, "loss": 0.8268, "step": 93920 }, { "epoch": 15.96634370219276, "grad_norm": 13.14787483215332, "learning_rate": 6.722760496345402e-06, "loss": 0.752, "step": 93930 }, { "epoch": 15.968043515213326, "grad_norm": 13.642178535461426, "learning_rate": 6.719927474644456e-06, "loss": 0.7774, "step": 93940 }, { "epoch": 15.969743328233895, "grad_norm": 14.244461059570312, "learning_rate": 6.717094452943509e-06, "loss": 0.8151, "step": 93950 }, { "epoch": 15.971443141254461, "grad_norm": 16.78225326538086, "learning_rate": 6.714261431242564e-06, "loss": 0.724, "step": 93960 }, { "epoch": 15.97314295427503, "grad_norm": 9.473749160766602, "learning_rate": 6.711428409541617e-06, "loss": 0.886, "step": 93970 }, { "epoch": 15.974842767295598, "grad_norm": 10.807464599609375, "learning_rate": 6.708595387840671e-06, "loss": 0.9423, "step": 93980 }, { "epoch": 15.976542580316165, "grad_norm": 12.839872360229492, "learning_rate": 6.705762366139725e-06, "loss": 0.7414, "step": 93990 }, { "epoch": 15.978242393336734, "grad_norm": 20.229232788085938, "learning_rate": 6.702929344438779e-06, "loss": 0.7595, "step": 94000 }, { "epoch": 15.9799422063573, "grad_norm": 21.584550857543945, "learning_rate": 6.700096322737832e-06, "loss": 0.8247, "step": 94010 }, { "epoch": 15.981642019377869, "grad_norm": 12.382209777832031, "learning_rate": 6.697263301036887e-06, "loss": 0.5747, "step": 94020 }, { "epoch": 15.983341832398436, "grad_norm": 59.27082824707031, "learning_rate": 6.69443027933594e-06, "loss": 0.8471, "step": 94030 }, { "epoch": 15.985041645419004, "grad_norm": 13.546774864196777, "learning_rate": 6.691597257634994e-06, "loss": 0.8266, "step": 94040 }, { "epoch": 15.986741458439571, "grad_norm": 13.145606994628906, "learning_rate": 6.688764235934047e-06, "loss": 0.6941, "step": 94050 }, { "epoch": 15.98844127146014, "grad_norm": 15.201464653015137, "learning_rate": 6.685931214233101e-06, "loss": 0.7848, "step": 94060 }, { "epoch": 15.990141084480706, "grad_norm": 12.25406551361084, "learning_rate": 6.683098192532155e-06, "loss": 0.6658, "step": 94070 }, { "epoch": 15.991840897501275, "grad_norm": 15.720108985900879, "learning_rate": 6.6802651708312086e-06, "loss": 0.9082, "step": 94080 }, { "epoch": 15.993540710521842, "grad_norm": 12.904730796813965, "learning_rate": 6.677432149130262e-06, "loss": 0.7804, "step": 94090 }, { "epoch": 15.99524052354241, "grad_norm": 12.031758308410645, "learning_rate": 6.6745991274293164e-06, "loss": 0.6737, "step": 94100 }, { "epoch": 15.996940336562979, "grad_norm": 11.626665115356445, "learning_rate": 6.67176610572837e-06, "loss": 0.9022, "step": 94110 }, { "epoch": 15.998640149583546, "grad_norm": 11.662053108215332, "learning_rate": 6.6689330840274235e-06, "loss": 0.7244, "step": 94120 }, { "epoch": 16.0, "eval_cer": 1.0, "eval_loss": 2.4962618350982666, "eval_runtime": 1963.0868, "eval_samples_per_second": 0.24, "eval_steps_per_second": 0.24, "step": 94128 }, { "epoch": 16.000339962604112, "grad_norm": 11.850662231445312, "learning_rate": 6.666100062326478e-06, "loss": 0.8707, "step": 94130 }, { "epoch": 16.00203977562468, "grad_norm": 16.665687561035156, "learning_rate": 6.663267040625531e-06, "loss": 0.7732, "step": 94140 }, { "epoch": 16.00373958864525, "grad_norm": 13.453717231750488, "learning_rate": 6.660434018924585e-06, "loss": 0.7537, "step": 94150 }, { "epoch": 16.005439401665818, "grad_norm": 19.42255210876465, "learning_rate": 6.657600997223639e-06, "loss": 0.8582, "step": 94160 }, { "epoch": 16.007139214686383, "grad_norm": 20.41779136657715, "learning_rate": 6.654767975522693e-06, "loss": 0.7499, "step": 94170 }, { "epoch": 16.00883902770695, "grad_norm": 12.502609252929688, "learning_rate": 6.651934953821746e-06, "loss": 0.6401, "step": 94180 }, { "epoch": 16.01053884072752, "grad_norm": 10.289313316345215, "learning_rate": 6.649101932120801e-06, "loss": 0.8354, "step": 94190 }, { "epoch": 16.01223865374809, "grad_norm": 11.618278503417969, "learning_rate": 6.646268910419854e-06, "loss": 0.5761, "step": 94200 }, { "epoch": 16.013938466768657, "grad_norm": 16.047618865966797, "learning_rate": 6.643435888718908e-06, "loss": 0.7281, "step": 94210 }, { "epoch": 16.015638279789222, "grad_norm": 14.139970779418945, "learning_rate": 6.640602867017962e-06, "loss": 0.7347, "step": 94220 }, { "epoch": 16.01733809280979, "grad_norm": 13.779814720153809, "learning_rate": 6.637769845317015e-06, "loss": 0.7938, "step": 94230 }, { "epoch": 16.01903790583036, "grad_norm": 13.934256553649902, "learning_rate": 6.634936823616069e-06, "loss": 0.6418, "step": 94240 }, { "epoch": 16.020737718850928, "grad_norm": 24.795507431030273, "learning_rate": 6.632103801915123e-06, "loss": 0.7348, "step": 94250 }, { "epoch": 16.022437531871493, "grad_norm": 14.075949668884277, "learning_rate": 6.629270780214176e-06, "loss": 0.7439, "step": 94260 }, { "epoch": 16.02413734489206, "grad_norm": 10.239325523376465, "learning_rate": 6.6264377585132306e-06, "loss": 0.6983, "step": 94270 }, { "epoch": 16.02583715791263, "grad_norm": 12.405949592590332, "learning_rate": 6.623604736812284e-06, "loss": 0.6597, "step": 94280 }, { "epoch": 16.0275369709332, "grad_norm": 23.741411209106445, "learning_rate": 6.620771715111338e-06, "loss": 0.6838, "step": 94290 }, { "epoch": 16.029236783953767, "grad_norm": 18.914817810058594, "learning_rate": 6.617938693410392e-06, "loss": 0.6257, "step": 94300 }, { "epoch": 16.030936596974332, "grad_norm": 18.11052131652832, "learning_rate": 6.6151056717094455e-06, "loss": 0.8924, "step": 94310 }, { "epoch": 16.0326364099949, "grad_norm": 14.230270385742188, "learning_rate": 6.612272650008499e-06, "loss": 0.6454, "step": 94320 }, { "epoch": 16.03433622301547, "grad_norm": 14.053570747375488, "learning_rate": 6.609439628307553e-06, "loss": 0.7286, "step": 94330 }, { "epoch": 16.036036036036037, "grad_norm": 11.943059921264648, "learning_rate": 6.606606606606607e-06, "loss": 0.7548, "step": 94340 }, { "epoch": 16.037735849056602, "grad_norm": 20.419483184814453, "learning_rate": 6.60377358490566e-06, "loss": 0.8698, "step": 94350 }, { "epoch": 16.03943566207717, "grad_norm": 11.637051582336426, "learning_rate": 6.600940563204715e-06, "loss": 0.831, "step": 94360 }, { "epoch": 16.04113547509774, "grad_norm": 9.888873100280762, "learning_rate": 6.598107541503768e-06, "loss": 0.6782, "step": 94370 }, { "epoch": 16.042835288118308, "grad_norm": 16.284202575683594, "learning_rate": 6.595274519802822e-06, "loss": 0.8508, "step": 94380 }, { "epoch": 16.044535101138873, "grad_norm": 12.554417610168457, "learning_rate": 6.592441498101876e-06, "loss": 0.6542, "step": 94390 }, { "epoch": 16.04623491415944, "grad_norm": 13.49608039855957, "learning_rate": 6.589608476400929e-06, "loss": 0.7147, "step": 94400 }, { "epoch": 16.04793472718001, "grad_norm": 17.378437042236328, "learning_rate": 6.586775454699983e-06, "loss": 0.7868, "step": 94410 }, { "epoch": 16.04963454020058, "grad_norm": 21.021800994873047, "learning_rate": 6.583942432999037e-06, "loss": 0.7574, "step": 94420 }, { "epoch": 16.051334353221147, "grad_norm": 10.422021865844727, "learning_rate": 6.58110941129809e-06, "loss": 0.7655, "step": 94430 }, { "epoch": 16.053034166241712, "grad_norm": 11.184931755065918, "learning_rate": 6.578276389597145e-06, "loss": 0.8426, "step": 94440 }, { "epoch": 16.05473397926228, "grad_norm": 17.310041427612305, "learning_rate": 6.575443367896198e-06, "loss": 0.6832, "step": 94450 }, { "epoch": 16.05643379228285, "grad_norm": 15.07467269897461, "learning_rate": 6.572610346195252e-06, "loss": 0.6848, "step": 94460 }, { "epoch": 16.058133605303418, "grad_norm": 9.53735065460205, "learning_rate": 6.569777324494306e-06, "loss": 0.6779, "step": 94470 }, { "epoch": 16.059833418323983, "grad_norm": 13.955942153930664, "learning_rate": 6.56694430279336e-06, "loss": 0.7479, "step": 94480 }, { "epoch": 16.06153323134455, "grad_norm": 10.025227546691895, "learning_rate": 6.564111281092413e-06, "loss": 0.6659, "step": 94490 }, { "epoch": 16.06323304436512, "grad_norm": 11.795533180236816, "learning_rate": 6.5612782593914675e-06, "loss": 0.6863, "step": 94500 }, { "epoch": 16.06493285738569, "grad_norm": 13.282934188842773, "learning_rate": 6.558445237690521e-06, "loss": 0.8524, "step": 94510 }, { "epoch": 16.066632670406257, "grad_norm": 10.290315628051758, "learning_rate": 6.5556122159895745e-06, "loss": 0.8121, "step": 94520 }, { "epoch": 16.068332483426822, "grad_norm": 24.09855842590332, "learning_rate": 6.552779194288629e-06, "loss": 0.9097, "step": 94530 }, { "epoch": 16.07003229644739, "grad_norm": 11.464580535888672, "learning_rate": 6.549946172587682e-06, "loss": 0.8527, "step": 94540 }, { "epoch": 16.07173210946796, "grad_norm": 25.126249313354492, "learning_rate": 6.547113150886736e-06, "loss": 0.7105, "step": 94550 }, { "epoch": 16.073431922488528, "grad_norm": 13.480386734008789, "learning_rate": 6.54428012918579e-06, "loss": 0.7719, "step": 94560 }, { "epoch": 16.075131735509093, "grad_norm": 17.477371215820312, "learning_rate": 6.541447107484843e-06, "loss": 0.6669, "step": 94570 }, { "epoch": 16.07683154852966, "grad_norm": 14.352362632751465, "learning_rate": 6.5386140857838965e-06, "loss": 0.8097, "step": 94580 }, { "epoch": 16.07853136155023, "grad_norm": 13.612058639526367, "learning_rate": 6.535781064082951e-06, "loss": 0.7629, "step": 94590 }, { "epoch": 16.080231174570798, "grad_norm": 16.87390899658203, "learning_rate": 6.532948042382004e-06, "loss": 0.842, "step": 94600 }, { "epoch": 16.081930987591363, "grad_norm": 23.660295486450195, "learning_rate": 6.530115020681059e-06, "loss": 0.86, "step": 94610 }, { "epoch": 16.08363080061193, "grad_norm": 13.256831169128418, "learning_rate": 6.527281998980112e-06, "loss": 0.7477, "step": 94620 }, { "epoch": 16.0853306136325, "grad_norm": 11.214134216308594, "learning_rate": 6.524448977279166e-06, "loss": 0.7529, "step": 94630 }, { "epoch": 16.08703042665307, "grad_norm": 13.4163179397583, "learning_rate": 6.52161595557822e-06, "loss": 0.6575, "step": 94640 }, { "epoch": 16.088730239673637, "grad_norm": 12.690673828125, "learning_rate": 6.518782933877274e-06, "loss": 0.7548, "step": 94650 }, { "epoch": 16.090430052694202, "grad_norm": 8.501276016235352, "learning_rate": 6.515949912176327e-06, "loss": 0.6646, "step": 94660 }, { "epoch": 16.09212986571477, "grad_norm": 22.166332244873047, "learning_rate": 6.513116890475382e-06, "loss": 0.7934, "step": 94670 }, { "epoch": 16.09382967873534, "grad_norm": 19.050662994384766, "learning_rate": 6.510283868774435e-06, "loss": 0.8326, "step": 94680 }, { "epoch": 16.095529491755908, "grad_norm": 23.5483455657959, "learning_rate": 6.507450847073489e-06, "loss": 0.8587, "step": 94690 }, { "epoch": 16.097229304776473, "grad_norm": 15.325182914733887, "learning_rate": 6.504617825372543e-06, "loss": 0.8485, "step": 94700 }, { "epoch": 16.09892911779704, "grad_norm": 11.563066482543945, "learning_rate": 6.5017848036715965e-06, "loss": 0.7013, "step": 94710 }, { "epoch": 16.10062893081761, "grad_norm": 18.33222007751465, "learning_rate": 6.49895178197065e-06, "loss": 0.9786, "step": 94720 }, { "epoch": 16.10232874383818, "grad_norm": 15.894659042358398, "learning_rate": 6.496118760269704e-06, "loss": 0.727, "step": 94730 }, { "epoch": 16.104028556858747, "grad_norm": 14.696802139282227, "learning_rate": 6.493285738568758e-06, "loss": 0.7581, "step": 94740 }, { "epoch": 16.105728369879312, "grad_norm": 13.614619255065918, "learning_rate": 6.490452716867811e-06, "loss": 0.7719, "step": 94750 }, { "epoch": 16.10742818289988, "grad_norm": 11.859769821166992, "learning_rate": 6.487619695166865e-06, "loss": 0.5876, "step": 94760 }, { "epoch": 16.10912799592045, "grad_norm": 15.657140731811523, "learning_rate": 6.4847866734659185e-06, "loss": 0.6557, "step": 94770 }, { "epoch": 16.110827808941018, "grad_norm": 10.876596450805664, "learning_rate": 6.481953651764973e-06, "loss": 0.7531, "step": 94780 }, { "epoch": 16.112527621961583, "grad_norm": 15.288163185119629, "learning_rate": 6.479120630064026e-06, "loss": 0.7308, "step": 94790 }, { "epoch": 16.11422743498215, "grad_norm": 14.771580696105957, "learning_rate": 6.47628760836308e-06, "loss": 0.6383, "step": 94800 }, { "epoch": 16.11592724800272, "grad_norm": 17.21615982055664, "learning_rate": 6.473454586662134e-06, "loss": 0.8448, "step": 94810 }, { "epoch": 16.11762706102329, "grad_norm": 14.045947074890137, "learning_rate": 6.470621564961188e-06, "loss": 0.8682, "step": 94820 }, { "epoch": 16.119326874043857, "grad_norm": 19.68157958984375, "learning_rate": 6.467788543260241e-06, "loss": 0.713, "step": 94830 }, { "epoch": 16.121026687064422, "grad_norm": 13.065120697021484, "learning_rate": 6.464955521559296e-06, "loss": 0.7804, "step": 94840 }, { "epoch": 16.12272650008499, "grad_norm": 15.661027908325195, "learning_rate": 6.462122499858349e-06, "loss": 0.5572, "step": 94850 }, { "epoch": 16.12442631310556, "grad_norm": 18.5533390045166, "learning_rate": 6.459289478157403e-06, "loss": 0.8734, "step": 94860 }, { "epoch": 16.126126126126128, "grad_norm": 16.401634216308594, "learning_rate": 6.456456456456457e-06, "loss": 0.8842, "step": 94870 }, { "epoch": 16.127825939146692, "grad_norm": 15.434537887573242, "learning_rate": 6.453623434755511e-06, "loss": 0.7985, "step": 94880 }, { "epoch": 16.12952575216726, "grad_norm": 13.024036407470703, "learning_rate": 6.450790413054564e-06, "loss": 0.7459, "step": 94890 }, { "epoch": 16.13122556518783, "grad_norm": 10.082413673400879, "learning_rate": 6.4479573913536185e-06, "loss": 0.6513, "step": 94900 }, { "epoch": 16.132925378208398, "grad_norm": 10.531816482543945, "learning_rate": 6.445124369652672e-06, "loss": 0.7194, "step": 94910 }, { "epoch": 16.134625191228963, "grad_norm": 12.426054000854492, "learning_rate": 6.442291347951725e-06, "loss": 0.663, "step": 94920 }, { "epoch": 16.13632500424953, "grad_norm": 15.433904647827148, "learning_rate": 6.439458326250779e-06, "loss": 0.8022, "step": 94930 }, { "epoch": 16.1380248172701, "grad_norm": 13.578911781311035, "learning_rate": 6.436625304549833e-06, "loss": 0.7992, "step": 94940 }, { "epoch": 16.13972463029067, "grad_norm": 101.18867492675781, "learning_rate": 6.433792282848886e-06, "loss": 0.7178, "step": 94950 }, { "epoch": 16.141424443311237, "grad_norm": 12.234299659729004, "learning_rate": 6.4309592611479405e-06, "loss": 0.8316, "step": 94960 }, { "epoch": 16.143124256331802, "grad_norm": 15.678895950317383, "learning_rate": 6.428126239446994e-06, "loss": 0.7417, "step": 94970 }, { "epoch": 16.14482406935237, "grad_norm": 12.71475601196289, "learning_rate": 6.425293217746048e-06, "loss": 0.6989, "step": 94980 }, { "epoch": 16.14652388237294, "grad_norm": 68.50823211669922, "learning_rate": 6.422460196045102e-06, "loss": 0.5746, "step": 94990 }, { "epoch": 16.148223695393508, "grad_norm": 17.35826301574707, "learning_rate": 6.4196271743441554e-06, "loss": 0.8975, "step": 95000 }, { "epoch": 16.149923508414073, "grad_norm": 15.127362251281738, "learning_rate": 6.41679415264321e-06, "loss": 0.748, "step": 95010 }, { "epoch": 16.15162332143464, "grad_norm": 14.234935760498047, "learning_rate": 6.413961130942263e-06, "loss": 0.8421, "step": 95020 }, { "epoch": 16.15332313445521, "grad_norm": 12.449882507324219, "learning_rate": 6.411128109241317e-06, "loss": 0.8085, "step": 95030 }, { "epoch": 16.15502294747578, "grad_norm": 13.506613731384277, "learning_rate": 6.408295087540371e-06, "loss": 0.9071, "step": 95040 }, { "epoch": 16.156722760496347, "grad_norm": 11.83249282836914, "learning_rate": 6.405462065839425e-06, "loss": 0.6498, "step": 95050 }, { "epoch": 16.158422573516912, "grad_norm": 15.246715545654297, "learning_rate": 6.402629044138478e-06, "loss": 0.8474, "step": 95060 }, { "epoch": 16.16012238653748, "grad_norm": 12.58112907409668, "learning_rate": 6.399796022437533e-06, "loss": 0.678, "step": 95070 }, { "epoch": 16.16182219955805, "grad_norm": 12.225918769836426, "learning_rate": 6.396963000736586e-06, "loss": 0.8237, "step": 95080 }, { "epoch": 16.163522012578618, "grad_norm": 14.406818389892578, "learning_rate": 6.394129979035639e-06, "loss": 0.8201, "step": 95090 }, { "epoch": 16.165221825599183, "grad_norm": 15.252153396606445, "learning_rate": 6.391296957334693e-06, "loss": 0.6724, "step": 95100 }, { "epoch": 16.16692163861975, "grad_norm": 18.97928810119629, "learning_rate": 6.388463935633747e-06, "loss": 0.6527, "step": 95110 }, { "epoch": 16.16862145164032, "grad_norm": 12.602779388427734, "learning_rate": 6.3856309139328e-06, "loss": 0.7271, "step": 95120 }, { "epoch": 16.17032126466089, "grad_norm": 17.040895462036133, "learning_rate": 6.382797892231855e-06, "loss": 0.7608, "step": 95130 }, { "epoch": 16.172021077681457, "grad_norm": 14.884512901306152, "learning_rate": 6.379964870530908e-06, "loss": 0.6123, "step": 95140 }, { "epoch": 16.173720890702022, "grad_norm": 15.510823249816895, "learning_rate": 6.3771318488299625e-06, "loss": 0.7833, "step": 95150 }, { "epoch": 16.17542070372259, "grad_norm": 14.1714506149292, "learning_rate": 6.374298827129016e-06, "loss": 0.8744, "step": 95160 }, { "epoch": 16.17712051674316, "grad_norm": 15.35073471069336, "learning_rate": 6.3714658054280695e-06, "loss": 0.8394, "step": 95170 }, { "epoch": 16.178820329763727, "grad_norm": 14.601058959960938, "learning_rate": 6.368632783727124e-06, "loss": 0.6321, "step": 95180 }, { "epoch": 16.180520142784292, "grad_norm": 12.371885299682617, "learning_rate": 6.3657997620261774e-06, "loss": 0.9182, "step": 95190 }, { "epoch": 16.18221995580486, "grad_norm": 9.886587142944336, "learning_rate": 6.362966740325231e-06, "loss": 0.8138, "step": 95200 }, { "epoch": 16.18391976882543, "grad_norm": 11.795520782470703, "learning_rate": 6.360133718624285e-06, "loss": 0.6921, "step": 95210 }, { "epoch": 16.185619581845998, "grad_norm": 14.717759132385254, "learning_rate": 6.357300696923339e-06, "loss": 0.6609, "step": 95220 }, { "epoch": 16.187319394866563, "grad_norm": 11.83198356628418, "learning_rate": 6.354467675222392e-06, "loss": 0.8395, "step": 95230 }, { "epoch": 16.18901920788713, "grad_norm": 13.892160415649414, "learning_rate": 6.351634653521447e-06, "loss": 0.9156, "step": 95240 }, { "epoch": 16.1907190209077, "grad_norm": 15.427993774414062, "learning_rate": 6.3488016318205e-06, "loss": 0.8799, "step": 95250 }, { "epoch": 16.19241883392827, "grad_norm": 18.024093627929688, "learning_rate": 6.345968610119554e-06, "loss": 1.0386, "step": 95260 }, { "epoch": 16.194118646948837, "grad_norm": 9.884815216064453, "learning_rate": 6.343135588418607e-06, "loss": 0.7166, "step": 95270 }, { "epoch": 16.195818459969402, "grad_norm": 11.48335075378418, "learning_rate": 6.340302566717661e-06, "loss": 0.7589, "step": 95280 }, { "epoch": 16.19751827298997, "grad_norm": 13.07231330871582, "learning_rate": 6.337469545016714e-06, "loss": 0.4847, "step": 95290 }, { "epoch": 16.19921808601054, "grad_norm": 14.21225357055664, "learning_rate": 6.334636523315769e-06, "loss": 0.6272, "step": 95300 }, { "epoch": 16.200917899031108, "grad_norm": 16.74759292602539, "learning_rate": 6.331803501614822e-06, "loss": 0.6986, "step": 95310 }, { "epoch": 16.202617712051673, "grad_norm": 15.367176055908203, "learning_rate": 6.328970479913876e-06, "loss": 0.744, "step": 95320 }, { "epoch": 16.20431752507224, "grad_norm": 16.837947845458984, "learning_rate": 6.32613745821293e-06, "loss": 0.8969, "step": 95330 }, { "epoch": 16.20601733809281, "grad_norm": 11.167716026306152, "learning_rate": 6.323304436511984e-06, "loss": 0.7937, "step": 95340 }, { "epoch": 16.20771715111338, "grad_norm": 11.818330764770508, "learning_rate": 6.320471414811038e-06, "loss": 0.7667, "step": 95350 }, { "epoch": 16.209416964133947, "grad_norm": 16.142955780029297, "learning_rate": 6.3176383931100915e-06, "loss": 0.9515, "step": 95360 }, { "epoch": 16.211116777154512, "grad_norm": 11.904732704162598, "learning_rate": 6.314805371409145e-06, "loss": 0.7617, "step": 95370 }, { "epoch": 16.21281659017508, "grad_norm": 22.14413070678711, "learning_rate": 6.3119723497081994e-06, "loss": 0.6145, "step": 95380 }, { "epoch": 16.21451640319565, "grad_norm": 10.381065368652344, "learning_rate": 6.309139328007253e-06, "loss": 0.6855, "step": 95390 }, { "epoch": 16.216216216216218, "grad_norm": 14.651754379272461, "learning_rate": 6.3063063063063065e-06, "loss": 0.6714, "step": 95400 }, { "epoch": 16.217916029236783, "grad_norm": 10.15567684173584, "learning_rate": 6.303473284605361e-06, "loss": 0.6804, "step": 95410 }, { "epoch": 16.21961584225735, "grad_norm": 13.386844635009766, "learning_rate": 6.300640262904414e-06, "loss": 0.7587, "step": 95420 }, { "epoch": 16.22131565527792, "grad_norm": 11.43278980255127, "learning_rate": 6.297807241203468e-06, "loss": 0.8362, "step": 95430 }, { "epoch": 16.223015468298488, "grad_norm": 9.683015823364258, "learning_rate": 6.294974219502521e-06, "loss": 0.6993, "step": 95440 }, { "epoch": 16.224715281319053, "grad_norm": 15.5335054397583, "learning_rate": 6.292141197801575e-06, "loss": 0.5698, "step": 95450 }, { "epoch": 16.22641509433962, "grad_norm": 19.96981430053711, "learning_rate": 6.2893081761006284e-06, "loss": 0.6822, "step": 95460 }, { "epoch": 16.22811490736019, "grad_norm": 18.7454833984375, "learning_rate": 6.286475154399683e-06, "loss": 0.7056, "step": 95470 }, { "epoch": 16.22981472038076, "grad_norm": 16.507003784179688, "learning_rate": 6.283642132698736e-06, "loss": 0.7519, "step": 95480 }, { "epoch": 16.231514533401327, "grad_norm": 21.36182975769043, "learning_rate": 6.28080911099779e-06, "loss": 0.5226, "step": 95490 }, { "epoch": 16.233214346421892, "grad_norm": 18.20394515991211, "learning_rate": 6.277976089296844e-06, "loss": 0.877, "step": 95500 }, { "epoch": 16.23491415944246, "grad_norm": 19.089380264282227, "learning_rate": 6.275143067595898e-06, "loss": 0.751, "step": 95510 }, { "epoch": 16.23661397246303, "grad_norm": 17.454416275024414, "learning_rate": 6.272310045894952e-06, "loss": 0.8526, "step": 95520 }, { "epoch": 16.238313785483598, "grad_norm": 15.79848861694336, "learning_rate": 6.269477024194006e-06, "loss": 0.7467, "step": 95530 }, { "epoch": 16.240013598504163, "grad_norm": 14.161514282226562, "learning_rate": 6.266644002493059e-06, "loss": 0.7579, "step": 95540 }, { "epoch": 16.24171341152473, "grad_norm": 14.087259292602539, "learning_rate": 6.2638109807921135e-06, "loss": 0.7038, "step": 95550 }, { "epoch": 16.2434132245453, "grad_norm": 11.379752159118652, "learning_rate": 6.260977959091167e-06, "loss": 0.6286, "step": 95560 }, { "epoch": 16.24511303756587, "grad_norm": 28.06945037841797, "learning_rate": 6.258144937390221e-06, "loss": 0.8972, "step": 95570 }, { "epoch": 16.246812850586437, "grad_norm": 11.736759185791016, "learning_rate": 6.255311915689275e-06, "loss": 0.608, "step": 95580 }, { "epoch": 16.248512663607002, "grad_norm": 23.210859298706055, "learning_rate": 6.2524788939883285e-06, "loss": 0.5705, "step": 95590 }, { "epoch": 16.25021247662757, "grad_norm": 13.99059772491455, "learning_rate": 6.249645872287382e-06, "loss": 0.7867, "step": 95600 }, { "epoch": 16.25191228964814, "grad_norm": 14.804900169372559, "learning_rate": 6.2468128505864355e-06, "loss": 0.9766, "step": 95610 }, { "epoch": 16.253612102668708, "grad_norm": 11.487985610961914, "learning_rate": 6.243979828885489e-06, "loss": 0.8696, "step": 95620 }, { "epoch": 16.255311915689273, "grad_norm": 13.31179428100586, "learning_rate": 6.2411468071845426e-06, "loss": 0.8279, "step": 95630 }, { "epoch": 16.25701172870984, "grad_norm": 16.2862548828125, "learning_rate": 6.238313785483597e-06, "loss": 0.6227, "step": 95640 }, { "epoch": 16.25871154173041, "grad_norm": 13.959118843078613, "learning_rate": 6.2354807637826504e-06, "loss": 0.7088, "step": 95650 }, { "epoch": 16.26041135475098, "grad_norm": 16.068723678588867, "learning_rate": 6.232647742081704e-06, "loss": 0.8517, "step": 95660 }, { "epoch": 16.262111167771547, "grad_norm": 12.592799186706543, "learning_rate": 6.229814720380758e-06, "loss": 0.7881, "step": 95670 }, { "epoch": 16.263810980792112, "grad_norm": 15.919031143188477, "learning_rate": 6.226981698679812e-06, "loss": 0.6205, "step": 95680 }, { "epoch": 16.26551079381268, "grad_norm": 11.491318702697754, "learning_rate": 6.224148676978866e-06, "loss": 0.6896, "step": 95690 }, { "epoch": 16.26721060683325, "grad_norm": 7.633574962615967, "learning_rate": 6.22131565527792e-06, "loss": 0.5969, "step": 95700 }, { "epoch": 16.268910419853817, "grad_norm": 15.122759819030762, "learning_rate": 6.218482633576973e-06, "loss": 0.8152, "step": 95710 }, { "epoch": 16.270610232874382, "grad_norm": 11.57759952545166, "learning_rate": 6.215649611876028e-06, "loss": 0.7983, "step": 95720 }, { "epoch": 16.27231004589495, "grad_norm": 12.999868392944336, "learning_rate": 6.212816590175081e-06, "loss": 0.7463, "step": 95730 }, { "epoch": 16.27400985891552, "grad_norm": 12.5873384475708, "learning_rate": 6.209983568474135e-06, "loss": 0.822, "step": 95740 }, { "epoch": 16.275709671936088, "grad_norm": 11.079249382019043, "learning_rate": 6.207150546773189e-06, "loss": 0.9075, "step": 95750 }, { "epoch": 16.277409484956653, "grad_norm": 16.95192527770996, "learning_rate": 6.2043175250722426e-06, "loss": 0.6284, "step": 95760 }, { "epoch": 16.27910929797722, "grad_norm": 11.013534545898438, "learning_rate": 6.201484503371296e-06, "loss": 0.6853, "step": 95770 }, { "epoch": 16.28080911099779, "grad_norm": 12.408713340759277, "learning_rate": 6.19865148167035e-06, "loss": 0.6918, "step": 95780 }, { "epoch": 16.28250892401836, "grad_norm": 12.653868675231934, "learning_rate": 6.195818459969403e-06, "loss": 0.708, "step": 95790 }, { "epoch": 16.284208737038927, "grad_norm": 12.212306022644043, "learning_rate": 6.192985438268457e-06, "loss": 0.7312, "step": 95800 }, { "epoch": 16.285908550059492, "grad_norm": 12.639330863952637, "learning_rate": 6.190152416567511e-06, "loss": 0.7514, "step": 95810 }, { "epoch": 16.28760836308006, "grad_norm": 20.29656410217285, "learning_rate": 6.1873193948665646e-06, "loss": 0.6158, "step": 95820 }, { "epoch": 16.28930817610063, "grad_norm": 12.435831069946289, "learning_rate": 6.184486373165618e-06, "loss": 0.666, "step": 95830 }, { "epoch": 16.291007989121198, "grad_norm": 14.140435218811035, "learning_rate": 6.1816533514646724e-06, "loss": 0.7941, "step": 95840 }, { "epoch": 16.292707802141763, "grad_norm": 13.644177436828613, "learning_rate": 6.178820329763726e-06, "loss": 0.7781, "step": 95850 }, { "epoch": 16.29440761516233, "grad_norm": 11.940055847167969, "learning_rate": 6.1759873080627795e-06, "loss": 0.6797, "step": 95860 }, { "epoch": 16.2961074281829, "grad_norm": 10.312359809875488, "learning_rate": 6.173154286361834e-06, "loss": 0.718, "step": 95870 }, { "epoch": 16.29780724120347, "grad_norm": 12.20959186553955, "learning_rate": 6.170321264660887e-06, "loss": 0.7561, "step": 95880 }, { "epoch": 16.299507054224037, "grad_norm": 11.982789993286133, "learning_rate": 6.167488242959942e-06, "loss": 0.6845, "step": 95890 }, { "epoch": 16.301206867244602, "grad_norm": 15.870207786560059, "learning_rate": 6.164655221258995e-06, "loss": 0.7667, "step": 95900 }, { "epoch": 16.30290668026517, "grad_norm": 15.949148178100586, "learning_rate": 6.161822199558049e-06, "loss": 0.6936, "step": 95910 }, { "epoch": 16.30460649328574, "grad_norm": 13.063764572143555, "learning_rate": 6.158989177857103e-06, "loss": 0.7522, "step": 95920 }, { "epoch": 16.306306306306308, "grad_norm": 10.41268253326416, "learning_rate": 6.156156156156157e-06, "loss": 0.7791, "step": 95930 }, { "epoch": 16.308006119326873, "grad_norm": 15.831742286682129, "learning_rate": 6.15332313445521e-06, "loss": 0.9215, "step": 95940 }, { "epoch": 16.30970593234744, "grad_norm": 12.153138160705566, "learning_rate": 6.1504901127542646e-06, "loss": 0.5249, "step": 95950 }, { "epoch": 16.31140574536801, "grad_norm": 18.11806297302246, "learning_rate": 6.147657091053317e-06, "loss": 0.7511, "step": 95960 }, { "epoch": 16.31310555838858, "grad_norm": 17.927467346191406, "learning_rate": 6.144824069352371e-06, "loss": 0.7578, "step": 95970 }, { "epoch": 16.314805371409143, "grad_norm": 12.55445384979248, "learning_rate": 6.141991047651425e-06, "loss": 0.7047, "step": 95980 }, { "epoch": 16.31650518442971, "grad_norm": 10.331082344055176, "learning_rate": 6.139158025950479e-06, "loss": 0.7285, "step": 95990 }, { "epoch": 16.31820499745028, "grad_norm": 14.011054039001465, "learning_rate": 6.136325004249532e-06, "loss": 0.6451, "step": 96000 }, { "epoch": 16.31990481047085, "grad_norm": 15.042731285095215, "learning_rate": 6.1334919825485865e-06, "loss": 0.6673, "step": 96010 }, { "epoch": 16.321604623491417, "grad_norm": 14.604105949401855, "learning_rate": 6.13065896084764e-06, "loss": 0.6954, "step": 96020 }, { "epoch": 16.323304436511982, "grad_norm": 12.178070068359375, "learning_rate": 6.127825939146694e-06, "loss": 0.6523, "step": 96030 }, { "epoch": 16.32500424953255, "grad_norm": 13.49663257598877, "learning_rate": 6.124992917445748e-06, "loss": 0.7113, "step": 96040 }, { "epoch": 16.32670406255312, "grad_norm": 13.768289566040039, "learning_rate": 6.1221598957448015e-06, "loss": 0.7766, "step": 96050 }, { "epoch": 16.328403875573688, "grad_norm": 19.116458892822266, "learning_rate": 6.119326874043856e-06, "loss": 0.6917, "step": 96060 }, { "epoch": 16.330103688594253, "grad_norm": 14.06328296661377, "learning_rate": 6.116493852342909e-06, "loss": 0.7661, "step": 96070 }, { "epoch": 16.33180350161482, "grad_norm": 15.146754264831543, "learning_rate": 6.113660830641963e-06, "loss": 0.7596, "step": 96080 }, { "epoch": 16.33350331463539, "grad_norm": 15.79770278930664, "learning_rate": 6.110827808941017e-06, "loss": 0.6447, "step": 96090 }, { "epoch": 16.33520312765596, "grad_norm": 12.70321273803711, "learning_rate": 6.107994787240071e-06, "loss": 0.5922, "step": 96100 }, { "epoch": 16.336902940676527, "grad_norm": 11.317610740661621, "learning_rate": 6.105161765539124e-06, "loss": 0.6426, "step": 96110 }, { "epoch": 16.338602753697092, "grad_norm": 15.731001853942871, "learning_rate": 6.102328743838179e-06, "loss": 0.731, "step": 96120 }, { "epoch": 16.34030256671766, "grad_norm": 11.3192138671875, "learning_rate": 6.099495722137231e-06, "loss": 0.745, "step": 96130 }, { "epoch": 16.34200237973823, "grad_norm": 14.438511848449707, "learning_rate": 6.096662700436285e-06, "loss": 0.6439, "step": 96140 }, { "epoch": 16.343702192758798, "grad_norm": 13.832892417907715, "learning_rate": 6.093829678735339e-06, "loss": 0.6793, "step": 96150 }, { "epoch": 16.345402005779363, "grad_norm": 13.058018684387207, "learning_rate": 6.090996657034393e-06, "loss": 0.9107, "step": 96160 }, { "epoch": 16.34710181879993, "grad_norm": 11.765164375305176, "learning_rate": 6.088163635333446e-06, "loss": 0.5614, "step": 96170 }, { "epoch": 16.3488016318205, "grad_norm": 17.781816482543945, "learning_rate": 6.085330613632501e-06, "loss": 0.7829, "step": 96180 }, { "epoch": 16.35050144484107, "grad_norm": 15.861519813537598, "learning_rate": 6.082497591931554e-06, "loss": 0.8254, "step": 96190 }, { "epoch": 16.352201257861637, "grad_norm": 15.70860767364502, "learning_rate": 6.079664570230608e-06, "loss": 0.7215, "step": 96200 }, { "epoch": 16.353901070882202, "grad_norm": 12.00113582611084, "learning_rate": 6.076831548529662e-06, "loss": 0.6724, "step": 96210 }, { "epoch": 16.35560088390277, "grad_norm": 16.737043380737305, "learning_rate": 6.073998526828716e-06, "loss": 0.6505, "step": 96220 }, { "epoch": 16.35730069692334, "grad_norm": 13.796512603759766, "learning_rate": 6.071165505127769e-06, "loss": 0.7405, "step": 96230 }, { "epoch": 16.359000509943908, "grad_norm": 16.906600952148438, "learning_rate": 6.0683324834268235e-06, "loss": 0.8788, "step": 96240 }, { "epoch": 16.360700322964473, "grad_norm": 12.537353515625, "learning_rate": 6.065499461725877e-06, "loss": 0.7102, "step": 96250 }, { "epoch": 16.36240013598504, "grad_norm": 20.15594482421875, "learning_rate": 6.062666440024931e-06, "loss": 0.7075, "step": 96260 }, { "epoch": 16.36409994900561, "grad_norm": 14.897061347961426, "learning_rate": 6.059833418323985e-06, "loss": 0.6925, "step": 96270 }, { "epoch": 16.365799762026178, "grad_norm": 12.763294219970703, "learning_rate": 6.057000396623038e-06, "loss": 0.8596, "step": 96280 }, { "epoch": 16.367499575046743, "grad_norm": 14.1249418258667, "learning_rate": 6.054167374922093e-06, "loss": 0.7281, "step": 96290 }, { "epoch": 16.36919938806731, "grad_norm": 12.818243026733398, "learning_rate": 6.0513343532211455e-06, "loss": 0.7782, "step": 96300 }, { "epoch": 16.37089920108788, "grad_norm": 15.794089317321777, "learning_rate": 6.048501331520199e-06, "loss": 0.6796, "step": 96310 }, { "epoch": 16.37259901410845, "grad_norm": 15.25248908996582, "learning_rate": 6.045668309819253e-06, "loss": 0.7499, "step": 96320 }, { "epoch": 16.374298827129017, "grad_norm": 17.3869571685791, "learning_rate": 6.042835288118307e-06, "loss": 0.7294, "step": 96330 }, { "epoch": 16.375998640149582, "grad_norm": 13.180164337158203, "learning_rate": 6.04000226641736e-06, "loss": 0.7384, "step": 96340 }, { "epoch": 16.37769845317015, "grad_norm": 11.873671531677246, "learning_rate": 6.037169244716415e-06, "loss": 0.6486, "step": 96350 }, { "epoch": 16.37939826619072, "grad_norm": 14.101275444030762, "learning_rate": 6.034336223015468e-06, "loss": 0.5965, "step": 96360 }, { "epoch": 16.381098079211288, "grad_norm": 15.52951717376709, "learning_rate": 6.031503201314522e-06, "loss": 0.8014, "step": 96370 }, { "epoch": 16.382797892231853, "grad_norm": 24.37042236328125, "learning_rate": 6.028670179613576e-06, "loss": 0.6753, "step": 96380 }, { "epoch": 16.38449770525242, "grad_norm": 16.206642150878906, "learning_rate": 6.02583715791263e-06, "loss": 0.8163, "step": 96390 }, { "epoch": 16.38619751827299, "grad_norm": 12.201406478881836, "learning_rate": 6.023004136211683e-06, "loss": 0.6888, "step": 96400 }, { "epoch": 16.38789733129356, "grad_norm": 13.602884292602539, "learning_rate": 6.020171114510738e-06, "loss": 0.8256, "step": 96410 }, { "epoch": 16.389597144314127, "grad_norm": 14.32323169708252, "learning_rate": 6.017338092809791e-06, "loss": 0.6902, "step": 96420 }, { "epoch": 16.391296957334692, "grad_norm": 13.159873962402344, "learning_rate": 6.0145050711088455e-06, "loss": 0.7874, "step": 96430 }, { "epoch": 16.39299677035526, "grad_norm": 15.070839881896973, "learning_rate": 6.011672049407899e-06, "loss": 0.6757, "step": 96440 }, { "epoch": 16.39469658337583, "grad_norm": 20.9780216217041, "learning_rate": 6.0088390277069525e-06, "loss": 0.7667, "step": 96450 }, { "epoch": 16.396396396396398, "grad_norm": 14.059232711791992, "learning_rate": 6.006006006006007e-06, "loss": 0.7432, "step": 96460 }, { "epoch": 16.398096209416963, "grad_norm": 19.55699348449707, "learning_rate": 6.00317298430506e-06, "loss": 0.7478, "step": 96470 }, { "epoch": 16.39979602243753, "grad_norm": 14.27641487121582, "learning_rate": 6.000339962604113e-06, "loss": 0.7869, "step": 96480 }, { "epoch": 16.4014958354581, "grad_norm": 10.803327560424805, "learning_rate": 5.9975069409031675e-06, "loss": 0.7957, "step": 96490 }, { "epoch": 16.40319564847867, "grad_norm": 13.366015434265137, "learning_rate": 5.994673919202221e-06, "loss": 0.75, "step": 96500 }, { "epoch": 16.404895461499237, "grad_norm": 16.175689697265625, "learning_rate": 5.9918408975012745e-06, "loss": 0.7776, "step": 96510 }, { "epoch": 16.406595274519802, "grad_norm": 14.85659122467041, "learning_rate": 5.989007875800329e-06, "loss": 0.7046, "step": 96520 }, { "epoch": 16.40829508754037, "grad_norm": 13.99462604522705, "learning_rate": 5.986174854099382e-06, "loss": 0.644, "step": 96530 }, { "epoch": 16.40999490056094, "grad_norm": 15.138409614562988, "learning_rate": 5.983341832398436e-06, "loss": 0.6577, "step": 96540 }, { "epoch": 16.411694713581507, "grad_norm": 11.949607849121094, "learning_rate": 5.98050881069749e-06, "loss": 0.6225, "step": 96550 }, { "epoch": 16.413394526602072, "grad_norm": 12.146129608154297, "learning_rate": 5.977675788996544e-06, "loss": 0.8784, "step": 96560 }, { "epoch": 16.41509433962264, "grad_norm": 14.96983528137207, "learning_rate": 5.974842767295597e-06, "loss": 0.7423, "step": 96570 }, { "epoch": 16.41679415264321, "grad_norm": 12.064775466918945, "learning_rate": 5.972009745594652e-06, "loss": 0.8078, "step": 96580 }, { "epoch": 16.418493965663778, "grad_norm": 21.42249298095703, "learning_rate": 5.969176723893705e-06, "loss": 0.7313, "step": 96590 }, { "epoch": 16.420193778684343, "grad_norm": 11.18040943145752, "learning_rate": 5.966343702192759e-06, "loss": 0.767, "step": 96600 }, { "epoch": 16.42189359170491, "grad_norm": 15.455613136291504, "learning_rate": 5.963510680491813e-06, "loss": 0.7678, "step": 96610 }, { "epoch": 16.42359340472548, "grad_norm": 23.425376892089844, "learning_rate": 5.960677658790867e-06, "loss": 0.5339, "step": 96620 }, { "epoch": 16.42529321774605, "grad_norm": 29.53130340576172, "learning_rate": 5.957844637089921e-06, "loss": 0.7806, "step": 96630 }, { "epoch": 16.426993030766617, "grad_norm": 11.323678970336914, "learning_rate": 5.9550116153889745e-06, "loss": 0.6765, "step": 96640 }, { "epoch": 16.428692843787182, "grad_norm": 12.901641845703125, "learning_rate": 5.952178593688027e-06, "loss": 0.7427, "step": 96650 }, { "epoch": 16.43039265680775, "grad_norm": 15.901906967163086, "learning_rate": 5.9493455719870816e-06, "loss": 0.6867, "step": 96660 }, { "epoch": 16.43209246982832, "grad_norm": 13.526026725769043, "learning_rate": 5.946512550286135e-06, "loss": 0.7057, "step": 96670 }, { "epoch": 16.433792282848888, "grad_norm": 11.840184211730957, "learning_rate": 5.943679528585189e-06, "loss": 0.6607, "step": 96680 }, { "epoch": 16.435492095869453, "grad_norm": 13.607419967651367, "learning_rate": 5.940846506884243e-06, "loss": 0.8942, "step": 96690 }, { "epoch": 16.43719190889002, "grad_norm": 12.801511764526367, "learning_rate": 5.9380134851832965e-06, "loss": 0.6864, "step": 96700 }, { "epoch": 16.43889172191059, "grad_norm": 14.80361270904541, "learning_rate": 5.93518046348235e-06, "loss": 0.7084, "step": 96710 }, { "epoch": 16.44059153493116, "grad_norm": 18.904611587524414, "learning_rate": 5.932347441781404e-06, "loss": 0.696, "step": 96720 }, { "epoch": 16.442291347951727, "grad_norm": 16.367263793945312, "learning_rate": 5.929514420080458e-06, "loss": 0.7002, "step": 96730 }, { "epoch": 16.443991160972292, "grad_norm": 9.327521324157715, "learning_rate": 5.9266813983795114e-06, "loss": 0.7424, "step": 96740 }, { "epoch": 16.44569097399286, "grad_norm": 14.131990432739258, "learning_rate": 5.923848376678566e-06, "loss": 0.5981, "step": 96750 }, { "epoch": 16.44739078701343, "grad_norm": 12.918000221252441, "learning_rate": 5.921015354977619e-06, "loss": 0.6569, "step": 96760 }, { "epoch": 16.449090600033998, "grad_norm": 17.446678161621094, "learning_rate": 5.918182333276673e-06, "loss": 0.6614, "step": 96770 }, { "epoch": 16.450790413054563, "grad_norm": 21.906494140625, "learning_rate": 5.915349311575727e-06, "loss": 0.7832, "step": 96780 }, { "epoch": 16.45249022607513, "grad_norm": 19.01746940612793, "learning_rate": 5.912516289874781e-06, "loss": 0.8383, "step": 96790 }, { "epoch": 16.4541900390957, "grad_norm": 19.906883239746094, "learning_rate": 5.909683268173835e-06, "loss": 0.7586, "step": 96800 }, { "epoch": 16.455889852116268, "grad_norm": 14.458695411682129, "learning_rate": 5.906850246472889e-06, "loss": 0.6392, "step": 96810 }, { "epoch": 16.457589665136837, "grad_norm": 14.509683609008789, "learning_rate": 5.904017224771941e-06, "loss": 0.7593, "step": 96820 }, { "epoch": 16.4592894781574, "grad_norm": 15.863680839538574, "learning_rate": 5.901184203070996e-06, "loss": 0.6956, "step": 96830 }, { "epoch": 16.46098929117797, "grad_norm": 13.738016128540039, "learning_rate": 5.898351181370049e-06, "loss": 0.8799, "step": 96840 }, { "epoch": 16.46268910419854, "grad_norm": 11.185956001281738, "learning_rate": 5.895518159669103e-06, "loss": 0.8896, "step": 96850 }, { "epoch": 16.464388917219107, "grad_norm": 13.79110050201416, "learning_rate": 5.892685137968157e-06, "loss": 0.9373, "step": 96860 }, { "epoch": 16.466088730239672, "grad_norm": 12.627634048461914, "learning_rate": 5.889852116267211e-06, "loss": 0.6618, "step": 96870 }, { "epoch": 16.46778854326024, "grad_norm": 13.69186782836914, "learning_rate": 5.887019094566264e-06, "loss": 0.8007, "step": 96880 }, { "epoch": 16.46948835628081, "grad_norm": 10.373554229736328, "learning_rate": 5.8841860728653185e-06, "loss": 0.8654, "step": 96890 }, { "epoch": 16.471188169301378, "grad_norm": 14.399827003479004, "learning_rate": 5.881353051164372e-06, "loss": 0.6729, "step": 96900 }, { "epoch": 16.472887982321943, "grad_norm": 15.191516876220703, "learning_rate": 5.8785200294634255e-06, "loss": 0.6628, "step": 96910 }, { "epoch": 16.47458779534251, "grad_norm": 16.19341468811035, "learning_rate": 5.87568700776248e-06, "loss": 0.7545, "step": 96920 }, { "epoch": 16.47628760836308, "grad_norm": 15.793132781982422, "learning_rate": 5.8728539860615334e-06, "loss": 0.7458, "step": 96930 }, { "epoch": 16.47798742138365, "grad_norm": 12.179720878601074, "learning_rate": 5.870020964360587e-06, "loss": 0.6731, "step": 96940 }, { "epoch": 16.479687234404217, "grad_norm": 9.483220100402832, "learning_rate": 5.867187942659641e-06, "loss": 0.9777, "step": 96950 }, { "epoch": 16.481387047424782, "grad_norm": 11.73504638671875, "learning_rate": 5.864354920958695e-06, "loss": 0.7166, "step": 96960 }, { "epoch": 16.48308686044535, "grad_norm": 11.936856269836426, "learning_rate": 5.861521899257749e-06, "loss": 0.6902, "step": 96970 }, { "epoch": 16.48478667346592, "grad_norm": 11.531794548034668, "learning_rate": 5.858688877556803e-06, "loss": 0.778, "step": 96980 }, { "epoch": 16.486486486486488, "grad_norm": 8.91153335571289, "learning_rate": 5.855855855855855e-06, "loss": 0.7313, "step": 96990 }, { "epoch": 16.488186299507053, "grad_norm": 13.557943344116211, "learning_rate": 5.85302283415491e-06, "loss": 0.8322, "step": 97000 }, { "epoch": 16.48988611252762, "grad_norm": 13.004355430603027, "learning_rate": 5.850189812453963e-06, "loss": 0.8032, "step": 97010 }, { "epoch": 16.49158592554819, "grad_norm": 15.585341453552246, "learning_rate": 5.847356790753017e-06, "loss": 0.8088, "step": 97020 }, { "epoch": 16.49328573856876, "grad_norm": 12.59268856048584, "learning_rate": 5.844523769052071e-06, "loss": 0.7091, "step": 97030 }, { "epoch": 16.494985551589327, "grad_norm": 13.776468276977539, "learning_rate": 5.841690747351125e-06, "loss": 0.7615, "step": 97040 }, { "epoch": 16.496685364609892, "grad_norm": 11.76356315612793, "learning_rate": 5.838857725650178e-06, "loss": 0.889, "step": 97050 }, { "epoch": 16.49838517763046, "grad_norm": 8.314994812011719, "learning_rate": 5.836024703949233e-06, "loss": 0.7816, "step": 97060 }, { "epoch": 16.50008499065103, "grad_norm": 12.527036666870117, "learning_rate": 5.833191682248286e-06, "loss": 0.5868, "step": 97070 }, { "epoch": 16.501784803671598, "grad_norm": 14.21451187133789, "learning_rate": 5.83035866054734e-06, "loss": 0.9231, "step": 97080 }, { "epoch": 16.503484616692162, "grad_norm": 11.795348167419434, "learning_rate": 5.827525638846394e-06, "loss": 0.6125, "step": 97090 }, { "epoch": 16.50518442971273, "grad_norm": 10.928459167480469, "learning_rate": 5.8246926171454475e-06, "loss": 0.6866, "step": 97100 }, { "epoch": 16.5068842427333, "grad_norm": 17.89118194580078, "learning_rate": 5.821859595444501e-06, "loss": 0.8766, "step": 97110 }, { "epoch": 16.508584055753868, "grad_norm": 13.868803024291992, "learning_rate": 5.819026573743555e-06, "loss": 0.9166, "step": 97120 }, { "epoch": 16.510283868774433, "grad_norm": 18.14935874938965, "learning_rate": 5.816193552042609e-06, "loss": 0.5868, "step": 97130 }, { "epoch": 16.511983681795, "grad_norm": 12.10456371307373, "learning_rate": 5.8133605303416625e-06, "loss": 0.5725, "step": 97140 }, { "epoch": 16.51368349481557, "grad_norm": 19.517784118652344, "learning_rate": 5.810527508640717e-06, "loss": 0.9028, "step": 97150 }, { "epoch": 16.51538330783614, "grad_norm": 27.40373992919922, "learning_rate": 5.80769448693977e-06, "loss": 0.734, "step": 97160 }, { "epoch": 16.517083120856707, "grad_norm": 23.281051635742188, "learning_rate": 5.804861465238824e-06, "loss": 0.8375, "step": 97170 }, { "epoch": 16.518782933877272, "grad_norm": 21.11579132080078, "learning_rate": 5.802028443537877e-06, "loss": 0.8046, "step": 97180 }, { "epoch": 16.52048274689784, "grad_norm": 22.49371337890625, "learning_rate": 5.799195421836931e-06, "loss": 0.9331, "step": 97190 }, { "epoch": 16.52218255991841, "grad_norm": 16.277484893798828, "learning_rate": 5.796362400135985e-06, "loss": 0.763, "step": 97200 }, { "epoch": 16.523882372938978, "grad_norm": 19.19530487060547, "learning_rate": 5.793529378435039e-06, "loss": 0.7867, "step": 97210 }, { "epoch": 16.525582185959543, "grad_norm": 30.397689819335938, "learning_rate": 5.790696356734092e-06, "loss": 0.7982, "step": 97220 }, { "epoch": 16.52728199898011, "grad_norm": 12.24699878692627, "learning_rate": 5.787863335033147e-06, "loss": 0.8441, "step": 97230 }, { "epoch": 16.52898181200068, "grad_norm": 13.400550842285156, "learning_rate": 5.7850303133322e-06, "loss": 0.8104, "step": 97240 }, { "epoch": 16.53068162502125, "grad_norm": 11.070396423339844, "learning_rate": 5.782197291631254e-06, "loss": 0.692, "step": 97250 }, { "epoch": 16.532381438041817, "grad_norm": 13.678208351135254, "learning_rate": 5.779364269930308e-06, "loss": 0.8076, "step": 97260 }, { "epoch": 16.534081251062382, "grad_norm": 12.868268013000488, "learning_rate": 5.776531248229362e-06, "loss": 0.6792, "step": 97270 }, { "epoch": 16.53578106408295, "grad_norm": 12.023104667663574, "learning_rate": 5.773698226528415e-06, "loss": 0.8912, "step": 97280 }, { "epoch": 16.53748087710352, "grad_norm": 16.092666625976562, "learning_rate": 5.7708652048274695e-06, "loss": 0.7856, "step": 97290 }, { "epoch": 16.539180690124088, "grad_norm": 12.997502326965332, "learning_rate": 5.768032183126523e-06, "loss": 0.8511, "step": 97300 }, { "epoch": 16.540880503144653, "grad_norm": 11.313684463500977, "learning_rate": 5.7651991614255766e-06, "loss": 0.6162, "step": 97310 }, { "epoch": 16.54258031616522, "grad_norm": 10.710216522216797, "learning_rate": 5.762366139724631e-06, "loss": 0.6983, "step": 97320 }, { "epoch": 16.54428012918579, "grad_norm": 13.415820121765137, "learning_rate": 5.7595331180236845e-06, "loss": 0.8285, "step": 97330 }, { "epoch": 16.54597994220636, "grad_norm": 16.14811897277832, "learning_rate": 5.756700096322738e-06, "loss": 0.6638, "step": 97340 }, { "epoch": 16.547679755226923, "grad_norm": 13.335953712463379, "learning_rate": 5.7538670746217915e-06, "loss": 0.6714, "step": 97350 }, { "epoch": 16.549379568247492, "grad_norm": 12.792438507080078, "learning_rate": 5.751034052920845e-06, "loss": 0.7798, "step": 97360 }, { "epoch": 16.55107938126806, "grad_norm": 20.319637298583984, "learning_rate": 5.748201031219899e-06, "loss": 0.6936, "step": 97370 }, { "epoch": 16.55277919428863, "grad_norm": 11.344054222106934, "learning_rate": 5.745368009518953e-06, "loss": 0.8203, "step": 97380 }, { "epoch": 16.554479007309197, "grad_norm": 13.453146934509277, "learning_rate": 5.7425349878180064e-06, "loss": 0.6681, "step": 97390 }, { "epoch": 16.556178820329762, "grad_norm": 15.318568229675293, "learning_rate": 5.739701966117061e-06, "loss": 0.6687, "step": 97400 }, { "epoch": 16.55787863335033, "grad_norm": 12.135614395141602, "learning_rate": 5.736868944416114e-06, "loss": 0.671, "step": 97410 }, { "epoch": 16.5595784463709, "grad_norm": 12.857848167419434, "learning_rate": 5.734035922715168e-06, "loss": 0.8119, "step": 97420 }, { "epoch": 16.561278259391468, "grad_norm": 17.525453567504883, "learning_rate": 5.731202901014222e-06, "loss": 0.8857, "step": 97430 }, { "epoch": 16.562978072412033, "grad_norm": 14.447928428649902, "learning_rate": 5.728369879313276e-06, "loss": 0.6162, "step": 97440 }, { "epoch": 16.5646778854326, "grad_norm": 21.560497283935547, "learning_rate": 5.725536857612329e-06, "loss": 0.7018, "step": 97450 }, { "epoch": 16.56637769845317, "grad_norm": 16.343379974365234, "learning_rate": 5.722703835911384e-06, "loss": 0.7466, "step": 97460 }, { "epoch": 16.56807751147374, "grad_norm": 17.24127769470215, "learning_rate": 5.719870814210437e-06, "loss": 0.6967, "step": 97470 }, { "epoch": 16.569777324494307, "grad_norm": 15.673090934753418, "learning_rate": 5.717037792509491e-06, "loss": 0.6269, "step": 97480 }, { "epoch": 16.571477137514872, "grad_norm": 12.226001739501953, "learning_rate": 5.714204770808545e-06, "loss": 0.885, "step": 97490 }, { "epoch": 16.57317695053544, "grad_norm": 20.03717803955078, "learning_rate": 5.7113717491075986e-06, "loss": 0.6425, "step": 97500 }, { "epoch": 16.57487676355601, "grad_norm": 13.884374618530273, "learning_rate": 5.708538727406652e-06, "loss": 0.6895, "step": 97510 }, { "epoch": 16.576576576576578, "grad_norm": 11.789029121398926, "learning_rate": 5.705705705705706e-06, "loss": 0.8402, "step": 97520 }, { "epoch": 16.578276389597143, "grad_norm": 15.292840003967285, "learning_rate": 5.702872684004759e-06, "loss": 0.7366, "step": 97530 }, { "epoch": 16.57997620261771, "grad_norm": 31.409536361694336, "learning_rate": 5.7000396623038135e-06, "loss": 0.7243, "step": 97540 }, { "epoch": 16.58167601563828, "grad_norm": 14.007448196411133, "learning_rate": 5.697206640602867e-06, "loss": 0.7153, "step": 97550 }, { "epoch": 16.58337582865885, "grad_norm": 14.204797744750977, "learning_rate": 5.6943736189019205e-06, "loss": 0.738, "step": 97560 }, { "epoch": 16.585075641679417, "grad_norm": 17.51264190673828, "learning_rate": 5.691540597200975e-06, "loss": 0.8242, "step": 97570 }, { "epoch": 16.586775454699982, "grad_norm": 11.816786766052246, "learning_rate": 5.6887075755000284e-06, "loss": 0.7352, "step": 97580 }, { "epoch": 16.58847526772055, "grad_norm": 11.08383560180664, "learning_rate": 5.685874553799082e-06, "loss": 0.7513, "step": 97590 }, { "epoch": 16.59017508074112, "grad_norm": 11.615884780883789, "learning_rate": 5.683041532098136e-06, "loss": 0.687, "step": 97600 }, { "epoch": 16.591874893761688, "grad_norm": 12.591692924499512, "learning_rate": 5.68020851039719e-06, "loss": 0.6347, "step": 97610 }, { "epoch": 16.593574706782253, "grad_norm": 13.418892860412598, "learning_rate": 5.677375488696243e-06, "loss": 0.8171, "step": 97620 }, { "epoch": 16.59527451980282, "grad_norm": 11.38831901550293, "learning_rate": 5.674542466995298e-06, "loss": 0.725, "step": 97630 }, { "epoch": 16.59697433282339, "grad_norm": 48.2918815612793, "learning_rate": 5.671709445294351e-06, "loss": 0.7382, "step": 97640 }, { "epoch": 16.598674145843958, "grad_norm": 11.82051944732666, "learning_rate": 5.668876423593405e-06, "loss": 0.5409, "step": 97650 }, { "epoch": 16.600373958864523, "grad_norm": 14.267768859863281, "learning_rate": 5.666043401892459e-06, "loss": 0.8578, "step": 97660 }, { "epoch": 16.60207377188509, "grad_norm": 13.842114448547363, "learning_rate": 5.663210380191513e-06, "loss": 0.6283, "step": 97670 }, { "epoch": 16.60377358490566, "grad_norm": 9.662273406982422, "learning_rate": 5.660377358490566e-06, "loss": 0.4849, "step": 97680 }, { "epoch": 16.60547339792623, "grad_norm": 11.642067909240723, "learning_rate": 5.65754433678962e-06, "loss": 0.7719, "step": 97690 }, { "epoch": 16.607173210946797, "grad_norm": 19.3331298828125, "learning_rate": 5.654711315088673e-06, "loss": 0.7525, "step": 97700 }, { "epoch": 16.608873023967362, "grad_norm": 9.696639060974121, "learning_rate": 5.651878293387728e-06, "loss": 0.8029, "step": 97710 }, { "epoch": 16.61057283698793, "grad_norm": 82.2906494140625, "learning_rate": 5.649045271686781e-06, "loss": 0.6883, "step": 97720 }, { "epoch": 16.6122726500085, "grad_norm": 12.150812149047852, "learning_rate": 5.646212249985835e-06, "loss": 0.6426, "step": 97730 }, { "epoch": 16.613972463029068, "grad_norm": 14.263008117675781, "learning_rate": 5.643379228284889e-06, "loss": 0.7221, "step": 97740 }, { "epoch": 16.615672276049633, "grad_norm": 11.384204864501953, "learning_rate": 5.6405462065839425e-06, "loss": 0.7296, "step": 97750 }, { "epoch": 16.6173720890702, "grad_norm": 18.044328689575195, "learning_rate": 5.637713184882996e-06, "loss": 0.7855, "step": 97760 }, { "epoch": 16.61907190209077, "grad_norm": 10.94267749786377, "learning_rate": 5.6348801631820504e-06, "loss": 0.8867, "step": 97770 }, { "epoch": 16.62077171511134, "grad_norm": 14.346254348754883, "learning_rate": 5.632047141481104e-06, "loss": 0.8038, "step": 97780 }, { "epoch": 16.622471528131907, "grad_norm": 11.403383255004883, "learning_rate": 5.6292141197801575e-06, "loss": 0.7167, "step": 97790 }, { "epoch": 16.624171341152472, "grad_norm": 17.404279708862305, "learning_rate": 5.626381098079212e-06, "loss": 0.7401, "step": 97800 }, { "epoch": 16.62587115417304, "grad_norm": 11.676870346069336, "learning_rate": 5.623548076378265e-06, "loss": 0.6037, "step": 97810 }, { "epoch": 16.62757096719361, "grad_norm": 12.552372932434082, "learning_rate": 5.620715054677319e-06, "loss": 0.8582, "step": 97820 }, { "epoch": 16.629270780214178, "grad_norm": 12.517885208129883, "learning_rate": 5.617882032976373e-06, "loss": 0.7948, "step": 97830 }, { "epoch": 16.630970593234743, "grad_norm": 10.867655754089355, "learning_rate": 5.615049011275427e-06, "loss": 0.6371, "step": 97840 }, { "epoch": 16.63267040625531, "grad_norm": 16.174087524414062, "learning_rate": 5.61221598957448e-06, "loss": 0.7387, "step": 97850 }, { "epoch": 16.63437021927588, "grad_norm": 23.98827362060547, "learning_rate": 5.609382967873534e-06, "loss": 0.6985, "step": 97860 }, { "epoch": 16.63607003229645, "grad_norm": 18.691884994506836, "learning_rate": 5.606549946172587e-06, "loss": 0.7238, "step": 97870 }, { "epoch": 16.637769845317017, "grad_norm": 11.718374252319336, "learning_rate": 5.603716924471642e-06, "loss": 0.6842, "step": 97880 }, { "epoch": 16.639469658337582, "grad_norm": 18.555770874023438, "learning_rate": 5.600883902770695e-06, "loss": 0.7782, "step": 97890 }, { "epoch": 16.64116947135815, "grad_norm": 10.3773775100708, "learning_rate": 5.598050881069749e-06, "loss": 0.6432, "step": 97900 }, { "epoch": 16.64286928437872, "grad_norm": 12.070219039916992, "learning_rate": 5.595217859368803e-06, "loss": 0.8015, "step": 97910 }, { "epoch": 16.644569097399287, "grad_norm": 13.690192222595215, "learning_rate": 5.592384837667857e-06, "loss": 0.8688, "step": 97920 }, { "epoch": 16.646268910419852, "grad_norm": 14.189915657043457, "learning_rate": 5.58955181596691e-06, "loss": 0.7895, "step": 97930 }, { "epoch": 16.64796872344042, "grad_norm": 13.657259941101074, "learning_rate": 5.5867187942659645e-06, "loss": 0.8087, "step": 97940 }, { "epoch": 16.64966853646099, "grad_norm": 11.744668960571289, "learning_rate": 5.583885772565018e-06, "loss": 0.6374, "step": 97950 }, { "epoch": 16.651368349481558, "grad_norm": 11.355055809020996, "learning_rate": 5.581052750864072e-06, "loss": 0.7, "step": 97960 }, { "epoch": 16.653068162502123, "grad_norm": 14.043991088867188, "learning_rate": 5.578219729163126e-06, "loss": 0.8067, "step": 97970 }, { "epoch": 16.65476797552269, "grad_norm": 9.748190879821777, "learning_rate": 5.5753867074621795e-06, "loss": 0.7862, "step": 97980 }, { "epoch": 16.65646778854326, "grad_norm": 23.221290588378906, "learning_rate": 5.572553685761233e-06, "loss": 0.6312, "step": 97990 }, { "epoch": 16.65816760156383, "grad_norm": 17.973857879638672, "learning_rate": 5.569720664060287e-06, "loss": 0.6678, "step": 98000 }, { "epoch": 16.659867414584397, "grad_norm": 14.943496704101562, "learning_rate": 5.566887642359341e-06, "loss": 0.6972, "step": 98010 }, { "epoch": 16.661567227604962, "grad_norm": 15.637673377990723, "learning_rate": 5.564054620658394e-06, "loss": 0.7958, "step": 98020 }, { "epoch": 16.66326704062553, "grad_norm": 13.825990676879883, "learning_rate": 5.561221598957448e-06, "loss": 0.7082, "step": 98030 }, { "epoch": 16.6649668536461, "grad_norm": 13.362590789794922, "learning_rate": 5.5583885772565015e-06, "loss": 0.8837, "step": 98040 }, { "epoch": 16.666666666666668, "grad_norm": 9.956690788269043, "learning_rate": 5.555555555555555e-06, "loss": 0.84, "step": 98050 }, { "epoch": 16.668366479687233, "grad_norm": 13.987351417541504, "learning_rate": 5.552722533854609e-06, "loss": 0.795, "step": 98060 }, { "epoch": 16.6700662927078, "grad_norm": 14.514073371887207, "learning_rate": 5.549889512153663e-06, "loss": 0.7228, "step": 98070 }, { "epoch": 16.67176610572837, "grad_norm": 16.686206817626953, "learning_rate": 5.547056490452717e-06, "loss": 0.8048, "step": 98080 }, { "epoch": 16.67346591874894, "grad_norm": 16.4757080078125, "learning_rate": 5.544223468751771e-06, "loss": 0.7717, "step": 98090 }, { "epoch": 16.675165731769507, "grad_norm": 14.801216125488281, "learning_rate": 5.541390447050824e-06, "loss": 0.6375, "step": 98100 }, { "epoch": 16.676865544790072, "grad_norm": 17.502382278442383, "learning_rate": 5.538557425349879e-06, "loss": 0.7151, "step": 98110 }, { "epoch": 16.67856535781064, "grad_norm": 15.999221801757812, "learning_rate": 5.535724403648932e-06, "loss": 0.6747, "step": 98120 }, { "epoch": 16.68026517083121, "grad_norm": 14.42905044555664, "learning_rate": 5.532891381947986e-06, "loss": 0.7223, "step": 98130 }, { "epoch": 16.681964983851778, "grad_norm": 14.285968780517578, "learning_rate": 5.53005836024704e-06, "loss": 0.8574, "step": 98140 }, { "epoch": 16.683664796872343, "grad_norm": 27.072784423828125, "learning_rate": 5.527225338546094e-06, "loss": 0.8854, "step": 98150 }, { "epoch": 16.68536460989291, "grad_norm": 16.94573211669922, "learning_rate": 5.524392316845147e-06, "loss": 0.7827, "step": 98160 }, { "epoch": 16.68706442291348, "grad_norm": 12.919599533081055, "learning_rate": 5.5215592951442015e-06, "loss": 0.7278, "step": 98170 }, { "epoch": 16.68876423593405, "grad_norm": 16.771865844726562, "learning_rate": 5.518726273443255e-06, "loss": 0.7241, "step": 98180 }, { "epoch": 16.690464048954617, "grad_norm": 12.730335235595703, "learning_rate": 5.5158932517423085e-06, "loss": 0.8932, "step": 98190 }, { "epoch": 16.69216386197518, "grad_norm": 15.986985206604004, "learning_rate": 5.513060230041363e-06, "loss": 0.8521, "step": 98200 }, { "epoch": 16.69386367499575, "grad_norm": 13.187149047851562, "learning_rate": 5.5102272083404156e-06, "loss": 0.7536, "step": 98210 }, { "epoch": 16.69556348801632, "grad_norm": 13.123066902160645, "learning_rate": 5.507394186639469e-06, "loss": 0.7036, "step": 98220 }, { "epoch": 16.697263301036887, "grad_norm": 12.967872619628906, "learning_rate": 5.5045611649385234e-06, "loss": 0.6266, "step": 98230 }, { "epoch": 16.698963114057452, "grad_norm": 12.347799301147461, "learning_rate": 5.501728143237577e-06, "loss": 0.6919, "step": 98240 }, { "epoch": 16.70066292707802, "grad_norm": 14.411253929138184, "learning_rate": 5.498895121536631e-06, "loss": 0.857, "step": 98250 }, { "epoch": 16.70236274009859, "grad_norm": 15.269564628601074, "learning_rate": 5.496062099835685e-06, "loss": 0.9353, "step": 98260 }, { "epoch": 16.704062553119158, "grad_norm": 18.19996452331543, "learning_rate": 5.493229078134738e-06, "loss": 0.7514, "step": 98270 }, { "epoch": 16.705762366139723, "grad_norm": 15.600926399230957, "learning_rate": 5.490396056433793e-06, "loss": 0.8721, "step": 98280 }, { "epoch": 16.70746217916029, "grad_norm": 15.357949256896973, "learning_rate": 5.487563034732846e-06, "loss": 0.7062, "step": 98290 }, { "epoch": 16.70916199218086, "grad_norm": 12.16358470916748, "learning_rate": 5.4847300130319e-06, "loss": 0.882, "step": 98300 }, { "epoch": 16.71086180520143, "grad_norm": 16.65254783630371, "learning_rate": 5.481896991330954e-06, "loss": 0.7692, "step": 98310 }, { "epoch": 16.712561618221997, "grad_norm": 13.847084045410156, "learning_rate": 5.479063969630008e-06, "loss": 0.9054, "step": 98320 }, { "epoch": 16.714261431242562, "grad_norm": 8.734892845153809, "learning_rate": 5.476230947929061e-06, "loss": 0.6917, "step": 98330 }, { "epoch": 16.71596124426313, "grad_norm": 10.826726913452148, "learning_rate": 5.473397926228116e-06, "loss": 0.6842, "step": 98340 }, { "epoch": 16.7176610572837, "grad_norm": 16.694252014160156, "learning_rate": 5.470564904527169e-06, "loss": 0.7815, "step": 98350 }, { "epoch": 16.719360870304268, "grad_norm": 15.689366340637207, "learning_rate": 5.467731882826223e-06, "loss": 0.7838, "step": 98360 }, { "epoch": 16.721060683324833, "grad_norm": 18.726634979248047, "learning_rate": 5.464898861125277e-06, "loss": 0.6851, "step": 98370 }, { "epoch": 16.7227604963454, "grad_norm": 16.418733596801758, "learning_rate": 5.46206583942433e-06, "loss": 0.7708, "step": 98380 }, { "epoch": 16.72446030936597, "grad_norm": 12.547072410583496, "learning_rate": 5.459232817723383e-06, "loss": 1.0082, "step": 98390 }, { "epoch": 16.72616012238654, "grad_norm": 12.015093803405762, "learning_rate": 5.4563997960224376e-06, "loss": 0.7999, "step": 98400 }, { "epoch": 16.727859935407107, "grad_norm": 18.002704620361328, "learning_rate": 5.453566774321491e-06, "loss": 0.9265, "step": 98410 }, { "epoch": 16.729559748427672, "grad_norm": 14.352075576782227, "learning_rate": 5.450733752620545e-06, "loss": 0.6336, "step": 98420 }, { "epoch": 16.73125956144824, "grad_norm": 11.636346817016602, "learning_rate": 5.447900730919599e-06, "loss": 0.7236, "step": 98430 }, { "epoch": 16.73295937446881, "grad_norm": 13.11498737335205, "learning_rate": 5.4450677092186525e-06, "loss": 0.8347, "step": 98440 }, { "epoch": 16.734659187489378, "grad_norm": 12.680707931518555, "learning_rate": 5.442234687517707e-06, "loss": 0.6879, "step": 98450 }, { "epoch": 16.736359000509943, "grad_norm": 11.759921073913574, "learning_rate": 5.43940166581676e-06, "loss": 0.695, "step": 98460 }, { "epoch": 16.73805881353051, "grad_norm": 15.717877388000488, "learning_rate": 5.436568644115814e-06, "loss": 0.8411, "step": 98470 }, { "epoch": 16.73975862655108, "grad_norm": 34.39799118041992, "learning_rate": 5.433735622414868e-06, "loss": 0.7657, "step": 98480 }, { "epoch": 16.741458439571648, "grad_norm": 10.917055130004883, "learning_rate": 5.430902600713922e-06, "loss": 0.8171, "step": 98490 }, { "epoch": 16.743158252592217, "grad_norm": 14.143784523010254, "learning_rate": 5.428069579012975e-06, "loss": 0.7269, "step": 98500 }, { "epoch": 16.74485806561278, "grad_norm": 13.834939956665039, "learning_rate": 5.42523655731203e-06, "loss": 0.6652, "step": 98510 }, { "epoch": 16.74655787863335, "grad_norm": 9.981776237487793, "learning_rate": 5.422403535611083e-06, "loss": 0.7304, "step": 98520 }, { "epoch": 16.74825769165392, "grad_norm": 11.525485038757324, "learning_rate": 5.419570513910137e-06, "loss": 0.6834, "step": 98530 }, { "epoch": 16.749957504674487, "grad_norm": 15.851181983947754, "learning_rate": 5.416737492209191e-06, "loss": 0.7998, "step": 98540 }, { "epoch": 16.751657317695052, "grad_norm": 13.359821319580078, "learning_rate": 5.413904470508244e-06, "loss": 0.7501, "step": 98550 }, { "epoch": 16.75335713071562, "grad_norm": 21.25586700439453, "learning_rate": 5.411071448807297e-06, "loss": 0.7735, "step": 98560 }, { "epoch": 16.75505694373619, "grad_norm": 12.970420837402344, "learning_rate": 5.408238427106352e-06, "loss": 0.7565, "step": 98570 }, { "epoch": 16.756756756756758, "grad_norm": 13.729276657104492, "learning_rate": 5.405405405405405e-06, "loss": 0.9559, "step": 98580 }, { "epoch": 16.758456569777323, "grad_norm": 20.063570022583008, "learning_rate": 5.402572383704459e-06, "loss": 0.872, "step": 98590 }, { "epoch": 16.76015638279789, "grad_norm": 15.476034164428711, "learning_rate": 5.399739362003513e-06, "loss": 0.8193, "step": 98600 }, { "epoch": 16.76185619581846, "grad_norm": 13.904202461242676, "learning_rate": 5.396906340302567e-06, "loss": 0.9139, "step": 98610 }, { "epoch": 16.76355600883903, "grad_norm": 15.124445915222168, "learning_rate": 5.394073318601621e-06, "loss": 0.7607, "step": 98620 }, { "epoch": 16.765255821859597, "grad_norm": 15.605509757995605, "learning_rate": 5.3912402969006745e-06, "loss": 0.6736, "step": 98630 }, { "epoch": 16.766955634880162, "grad_norm": 14.105854034423828, "learning_rate": 5.388407275199728e-06, "loss": 0.9149, "step": 98640 }, { "epoch": 16.76865544790073, "grad_norm": 24.405126571655273, "learning_rate": 5.385574253498782e-06, "loss": 0.7861, "step": 98650 }, { "epoch": 16.7703552609213, "grad_norm": 13.705779075622559, "learning_rate": 5.382741231797836e-06, "loss": 0.7795, "step": 98660 }, { "epoch": 16.772055073941868, "grad_norm": 15.255646705627441, "learning_rate": 5.379908210096889e-06, "loss": 0.6954, "step": 98670 }, { "epoch": 16.773754886962433, "grad_norm": 67.193115234375, "learning_rate": 5.377075188395944e-06, "loss": 0.7049, "step": 98680 }, { "epoch": 16.775454699983, "grad_norm": 15.7434720993042, "learning_rate": 5.374242166694997e-06, "loss": 0.5096, "step": 98690 }, { "epoch": 16.77715451300357, "grad_norm": 15.62812614440918, "learning_rate": 5.371409144994051e-06, "loss": 0.6978, "step": 98700 }, { "epoch": 16.77885432602414, "grad_norm": 13.835428237915039, "learning_rate": 5.368576123293105e-06, "loss": 0.7832, "step": 98710 }, { "epoch": 16.780554139044703, "grad_norm": 12.787149429321289, "learning_rate": 5.365743101592158e-06, "loss": 0.8228, "step": 98720 }, { "epoch": 16.782253952065272, "grad_norm": 19.13554573059082, "learning_rate": 5.362910079891211e-06, "loss": 0.6085, "step": 98730 }, { "epoch": 16.78395376508584, "grad_norm": 14.651016235351562, "learning_rate": 5.360077058190266e-06, "loss": 0.9092, "step": 98740 }, { "epoch": 16.78565357810641, "grad_norm": 11.498518943786621, "learning_rate": 5.357244036489319e-06, "loss": 0.6592, "step": 98750 }, { "epoch": 16.787353391126977, "grad_norm": 12.84927749633789, "learning_rate": 5.354411014788373e-06, "loss": 0.8351, "step": 98760 }, { "epoch": 16.789053204147542, "grad_norm": 14.66650390625, "learning_rate": 5.351577993087427e-06, "loss": 0.7858, "step": 98770 }, { "epoch": 16.79075301716811, "grad_norm": 11.682960510253906, "learning_rate": 5.348744971386481e-06, "loss": 0.8552, "step": 98780 }, { "epoch": 16.79245283018868, "grad_norm": 13.335383415222168, "learning_rate": 5.345911949685535e-06, "loss": 0.6943, "step": 98790 }, { "epoch": 16.794152643209248, "grad_norm": 12.280166625976562, "learning_rate": 5.343078927984589e-06, "loss": 0.7391, "step": 98800 }, { "epoch": 16.795852456229813, "grad_norm": 15.689359664916992, "learning_rate": 5.340245906283642e-06, "loss": 0.7049, "step": 98810 }, { "epoch": 16.79755226925038, "grad_norm": 11.218047142028809, "learning_rate": 5.3374128845826965e-06, "loss": 0.679, "step": 98820 }, { "epoch": 16.79925208227095, "grad_norm": 11.025994300842285, "learning_rate": 5.33457986288175e-06, "loss": 0.7838, "step": 98830 }, { "epoch": 16.80095189529152, "grad_norm": 13.691574096679688, "learning_rate": 5.3317468411808035e-06, "loss": 0.7845, "step": 98840 }, { "epoch": 16.802651708312087, "grad_norm": 16.113224029541016, "learning_rate": 5.328913819479858e-06, "loss": 0.7996, "step": 98850 }, { "epoch": 16.804351521332652, "grad_norm": 13.607747077941895, "learning_rate": 5.326080797778911e-06, "loss": 0.7704, "step": 98860 }, { "epoch": 16.80605133435322, "grad_norm": 11.285735130310059, "learning_rate": 5.323247776077965e-06, "loss": 0.7736, "step": 98870 }, { "epoch": 16.80775114737379, "grad_norm": 8.552005767822266, "learning_rate": 5.320414754377019e-06, "loss": 0.7097, "step": 98880 }, { "epoch": 16.809450960394358, "grad_norm": 14.478362083435059, "learning_rate": 5.317581732676073e-06, "loss": 0.9145, "step": 98890 }, { "epoch": 16.811150773414923, "grad_norm": 15.266671180725098, "learning_rate": 5.3147487109751255e-06, "loss": 0.5698, "step": 98900 }, { "epoch": 16.81285058643549, "grad_norm": 13.469925880432129, "learning_rate": 5.31191568927418e-06, "loss": 0.5728, "step": 98910 }, { "epoch": 16.81455039945606, "grad_norm": 12.222912788391113, "learning_rate": 5.309082667573233e-06, "loss": 0.7268, "step": 98920 }, { "epoch": 16.81625021247663, "grad_norm": 11.636252403259277, "learning_rate": 5.306249645872287e-06, "loss": 0.6975, "step": 98930 }, { "epoch": 16.817950025497197, "grad_norm": 13.025357246398926, "learning_rate": 5.303416624171341e-06, "loss": 0.6761, "step": 98940 }, { "epoch": 16.819649838517762, "grad_norm": 14.586732864379883, "learning_rate": 5.300583602470395e-06, "loss": 0.721, "step": 98950 }, { "epoch": 16.82134965153833, "grad_norm": 16.50642204284668, "learning_rate": 5.297750580769448e-06, "loss": 0.7386, "step": 98960 }, { "epoch": 16.8230494645589, "grad_norm": 12.523856163024902, "learning_rate": 5.294917559068503e-06, "loss": 0.8999, "step": 98970 }, { "epoch": 16.824749277579468, "grad_norm": 15.712963104248047, "learning_rate": 5.292084537367556e-06, "loss": 0.6381, "step": 98980 }, { "epoch": 16.826449090600033, "grad_norm": 13.338252067565918, "learning_rate": 5.289251515666611e-06, "loss": 0.805, "step": 98990 }, { "epoch": 16.8281489036206, "grad_norm": 16.42098617553711, "learning_rate": 5.286418493965664e-06, "loss": 0.7253, "step": 99000 }, { "epoch": 16.82984871664117, "grad_norm": 12.998239517211914, "learning_rate": 5.283585472264718e-06, "loss": 0.8039, "step": 99010 }, { "epoch": 16.831548529661738, "grad_norm": 19.846397399902344, "learning_rate": 5.280752450563772e-06, "loss": 0.7192, "step": 99020 }, { "epoch": 16.833248342682303, "grad_norm": 10.828929901123047, "learning_rate": 5.2779194288628255e-06, "loss": 0.6682, "step": 99030 }, { "epoch": 16.83494815570287, "grad_norm": 10.53524112701416, "learning_rate": 5.275086407161879e-06, "loss": 0.7278, "step": 99040 }, { "epoch": 16.83664796872344, "grad_norm": 14.606832504272461, "learning_rate": 5.272253385460933e-06, "loss": 0.643, "step": 99050 }, { "epoch": 16.83834778174401, "grad_norm": 16.60617446899414, "learning_rate": 5.269420363759987e-06, "loss": 0.7112, "step": 99060 }, { "epoch": 16.840047594764577, "grad_norm": 12.814125061035156, "learning_rate": 5.26658734205904e-06, "loss": 0.8213, "step": 99070 }, { "epoch": 16.841747407785142, "grad_norm": 12.477099418640137, "learning_rate": 5.263754320358094e-06, "loss": 0.801, "step": 99080 }, { "epoch": 16.84344722080571, "grad_norm": 14.410804748535156, "learning_rate": 5.2609212986571475e-06, "loss": 0.681, "step": 99090 }, { "epoch": 16.84514703382628, "grad_norm": 16.3670711517334, "learning_rate": 5.258088276956201e-06, "loss": 0.6164, "step": 99100 }, { "epoch": 16.846846846846848, "grad_norm": 11.263980865478516, "learning_rate": 5.255255255255255e-06, "loss": 0.7279, "step": 99110 }, { "epoch": 16.848546659867413, "grad_norm": 17.207834243774414, "learning_rate": 5.252422233554309e-06, "loss": 0.6666, "step": 99120 }, { "epoch": 16.85024647288798, "grad_norm": 10.483783721923828, "learning_rate": 5.2495892118533624e-06, "loss": 0.7938, "step": 99130 }, { "epoch": 16.85194628590855, "grad_norm": 15.103015899658203, "learning_rate": 5.246756190152417e-06, "loss": 0.8169, "step": 99140 }, { "epoch": 16.85364609892912, "grad_norm": 12.426440238952637, "learning_rate": 5.24392316845147e-06, "loss": 0.8596, "step": 99150 }, { "epoch": 16.855345911949687, "grad_norm": 12.735315322875977, "learning_rate": 5.241090146750525e-06, "loss": 0.6672, "step": 99160 }, { "epoch": 16.857045724970252, "grad_norm": 11.652024269104004, "learning_rate": 5.238257125049578e-06, "loss": 0.5811, "step": 99170 }, { "epoch": 16.85874553799082, "grad_norm": 12.098210334777832, "learning_rate": 5.235424103348632e-06, "loss": 0.776, "step": 99180 }, { "epoch": 16.86044535101139, "grad_norm": 11.481472969055176, "learning_rate": 5.232591081647686e-06, "loss": 0.6242, "step": 99190 }, { "epoch": 16.862145164031958, "grad_norm": 9.169000625610352, "learning_rate": 5.22975805994674e-06, "loss": 0.6402, "step": 99200 }, { "epoch": 16.863844977052523, "grad_norm": 26.466320037841797, "learning_rate": 5.226925038245793e-06, "loss": 0.7831, "step": 99210 }, { "epoch": 16.86554479007309, "grad_norm": 12.202888488769531, "learning_rate": 5.2240920165448475e-06, "loss": 0.6613, "step": 99220 }, { "epoch": 16.86724460309366, "grad_norm": 11.145186424255371, "learning_rate": 5.221258994843901e-06, "loss": 0.626, "step": 99230 }, { "epoch": 16.86894441611423, "grad_norm": 16.234783172607422, "learning_rate": 5.218425973142954e-06, "loss": 0.5943, "step": 99240 }, { "epoch": 16.870644229134797, "grad_norm": 15.788786888122559, "learning_rate": 5.215592951442008e-06, "loss": 0.812, "step": 99250 }, { "epoch": 16.872344042155362, "grad_norm": 12.580160140991211, "learning_rate": 5.212759929741062e-06, "loss": 0.763, "step": 99260 }, { "epoch": 16.87404385517593, "grad_norm": 10.529889106750488, "learning_rate": 5.209926908040115e-06, "loss": 0.8474, "step": 99270 }, { "epoch": 16.8757436681965, "grad_norm": 12.33318042755127, "learning_rate": 5.2070938863391695e-06, "loss": 0.8217, "step": 99280 }, { "epoch": 16.877443481217068, "grad_norm": 9.503293991088867, "learning_rate": 5.204260864638223e-06, "loss": 0.6685, "step": 99290 }, { "epoch": 16.879143294237632, "grad_norm": 17.27822494506836, "learning_rate": 5.2014278429372765e-06, "loss": 0.6299, "step": 99300 }, { "epoch": 16.8808431072582, "grad_norm": 17.63262367248535, "learning_rate": 5.198594821236331e-06, "loss": 0.8145, "step": 99310 }, { "epoch": 16.88254292027877, "grad_norm": 16.760398864746094, "learning_rate": 5.1957617995353844e-06, "loss": 0.7147, "step": 99320 }, { "epoch": 16.884242733299338, "grad_norm": 8.495271682739258, "learning_rate": 5.192928777834438e-06, "loss": 0.6102, "step": 99330 }, { "epoch": 16.885942546319903, "grad_norm": 15.01771354675293, "learning_rate": 5.190095756133492e-06, "loss": 0.7245, "step": 99340 }, { "epoch": 16.88764235934047, "grad_norm": 11.950146675109863, "learning_rate": 5.187262734432546e-06, "loss": 0.775, "step": 99350 }, { "epoch": 16.88934217236104, "grad_norm": 13.496578216552734, "learning_rate": 5.1844297127316e-06, "loss": 0.963, "step": 99360 }, { "epoch": 16.89104198538161, "grad_norm": 11.99622631072998, "learning_rate": 5.181596691030654e-06, "loss": 0.852, "step": 99370 }, { "epoch": 16.892741798402177, "grad_norm": 11.619549751281738, "learning_rate": 5.178763669329707e-06, "loss": 0.7141, "step": 99380 }, { "epoch": 16.894441611422742, "grad_norm": 17.547000885009766, "learning_rate": 5.175930647628762e-06, "loss": 0.653, "step": 99390 }, { "epoch": 16.89614142444331, "grad_norm": 17.715599060058594, "learning_rate": 5.173097625927815e-06, "loss": 0.7732, "step": 99400 }, { "epoch": 16.89784123746388, "grad_norm": 12.62485122680664, "learning_rate": 5.170264604226869e-06, "loss": 0.8182, "step": 99410 }, { "epoch": 16.899541050484448, "grad_norm": 14.283792495727539, "learning_rate": 5.167431582525922e-06, "loss": 0.7459, "step": 99420 }, { "epoch": 16.901240863505013, "grad_norm": 14.137918472290039, "learning_rate": 5.164598560824976e-06, "loss": 0.7616, "step": 99430 }, { "epoch": 16.90294067652558, "grad_norm": 20.559877395629883, "learning_rate": 5.161765539124029e-06, "loss": 0.7039, "step": 99440 }, { "epoch": 16.90464048954615, "grad_norm": 12.748473167419434, "learning_rate": 5.158932517423084e-06, "loss": 0.6829, "step": 99450 }, { "epoch": 16.90634030256672, "grad_norm": 11.083413124084473, "learning_rate": 5.156099495722137e-06, "loss": 0.6017, "step": 99460 }, { "epoch": 16.908040115587287, "grad_norm": 32.4045295715332, "learning_rate": 5.153266474021191e-06, "loss": 0.8069, "step": 99470 }, { "epoch": 16.909739928607852, "grad_norm": 17.24517059326172, "learning_rate": 5.150433452320245e-06, "loss": 0.6304, "step": 99480 }, { "epoch": 16.91143974162842, "grad_norm": 12.625272750854492, "learning_rate": 5.1476004306192985e-06, "loss": 0.7584, "step": 99490 }, { "epoch": 16.91313955464899, "grad_norm": 12.430006980895996, "learning_rate": 5.144767408918352e-06, "loss": 0.8719, "step": 99500 }, { "epoch": 16.914839367669558, "grad_norm": 15.699577331542969, "learning_rate": 5.1419343872174064e-06, "loss": 0.672, "step": 99510 }, { "epoch": 16.916539180690123, "grad_norm": 16.410829544067383, "learning_rate": 5.13910136551646e-06, "loss": 0.6876, "step": 99520 }, { "epoch": 16.91823899371069, "grad_norm": 15.779706954956055, "learning_rate": 5.136268343815514e-06, "loss": 0.8109, "step": 99530 }, { "epoch": 16.91993880673126, "grad_norm": 9.938495635986328, "learning_rate": 5.133435322114568e-06, "loss": 0.843, "step": 99540 }, { "epoch": 16.92163861975183, "grad_norm": 14.398199081420898, "learning_rate": 5.130602300413621e-06, "loss": 0.7482, "step": 99550 }, { "epoch": 16.923338432772397, "grad_norm": 11.99730110168457, "learning_rate": 5.127769278712676e-06, "loss": 0.6845, "step": 99560 }, { "epoch": 16.925038245792962, "grad_norm": 13.801393508911133, "learning_rate": 5.124936257011729e-06, "loss": 0.6718, "step": 99570 }, { "epoch": 16.92673805881353, "grad_norm": 14.448709487915039, "learning_rate": 5.122103235310783e-06, "loss": 0.7111, "step": 99580 }, { "epoch": 16.9284378718341, "grad_norm": 22.9360294342041, "learning_rate": 5.119270213609836e-06, "loss": 0.7087, "step": 99590 }, { "epoch": 16.930137684854667, "grad_norm": 11.861822128295898, "learning_rate": 5.11643719190889e-06, "loss": 0.8082, "step": 99600 }, { "epoch": 16.931837497875232, "grad_norm": 12.737116813659668, "learning_rate": 5.113604170207943e-06, "loss": 0.765, "step": 99610 }, { "epoch": 16.9335373108958, "grad_norm": 11.38776969909668, "learning_rate": 5.110771148506998e-06, "loss": 0.7937, "step": 99620 }, { "epoch": 16.93523712391637, "grad_norm": 13.465673446655273, "learning_rate": 5.107938126806051e-06, "loss": 0.7221, "step": 99630 }, { "epoch": 16.936936936936938, "grad_norm": 13.453686714172363, "learning_rate": 5.105105105105105e-06, "loss": 0.7559, "step": 99640 }, { "epoch": 16.938636749957503, "grad_norm": 13.779156684875488, "learning_rate": 5.102272083404159e-06, "loss": 0.8054, "step": 99650 }, { "epoch": 16.94033656297807, "grad_norm": 20.75614356994629, "learning_rate": 5.099439061703213e-06, "loss": 0.6119, "step": 99660 }, { "epoch": 16.94203637599864, "grad_norm": 26.304759979248047, "learning_rate": 5.096606040002266e-06, "loss": 0.9055, "step": 99670 }, { "epoch": 16.94373618901921, "grad_norm": 15.522879600524902, "learning_rate": 5.0937730183013205e-06, "loss": 0.6673, "step": 99680 }, { "epoch": 16.945436002039777, "grad_norm": 14.638384819030762, "learning_rate": 5.090939996600374e-06, "loss": 0.7118, "step": 99690 }, { "epoch": 16.947135815060342, "grad_norm": 10.5922212600708, "learning_rate": 5.088106974899428e-06, "loss": 0.7275, "step": 99700 }, { "epoch": 16.94883562808091, "grad_norm": 25.204851150512695, "learning_rate": 5.085273953198482e-06, "loss": 0.595, "step": 99710 }, { "epoch": 16.95053544110148, "grad_norm": 18.294469833374023, "learning_rate": 5.0824409314975355e-06, "loss": 0.7299, "step": 99720 }, { "epoch": 16.952235254122048, "grad_norm": 12.37708854675293, "learning_rate": 5.07960790979659e-06, "loss": 0.786, "step": 99730 }, { "epoch": 16.953935067142613, "grad_norm": 11.332855224609375, "learning_rate": 5.076774888095643e-06, "loss": 0.8173, "step": 99740 }, { "epoch": 16.95563488016318, "grad_norm": 11.274993896484375, "learning_rate": 5.073941866394697e-06, "loss": 0.7226, "step": 99750 }, { "epoch": 16.95733469318375, "grad_norm": 14.032957077026367, "learning_rate": 5.07110884469375e-06, "loss": 0.6742, "step": 99760 }, { "epoch": 16.95903450620432, "grad_norm": 13.642412185668945, "learning_rate": 5.068275822992804e-06, "loss": 0.7593, "step": 99770 }, { "epoch": 16.960734319224887, "grad_norm": 14.582815170288086, "learning_rate": 5.0654428012918574e-06, "loss": 0.7063, "step": 99780 }, { "epoch": 16.962434132245452, "grad_norm": 13.867071151733398, "learning_rate": 5.062609779590912e-06, "loss": 0.7066, "step": 99790 }, { "epoch": 16.96413394526602, "grad_norm": 12.520042419433594, "learning_rate": 5.059776757889965e-06, "loss": 0.743, "step": 99800 }, { "epoch": 16.96583375828659, "grad_norm": 14.652582168579102, "learning_rate": 5.056943736189019e-06, "loss": 0.696, "step": 99810 }, { "epoch": 16.967533571307158, "grad_norm": 16.390920639038086, "learning_rate": 5.054110714488073e-06, "loss": 0.8216, "step": 99820 }, { "epoch": 16.969233384327723, "grad_norm": 14.512652397155762, "learning_rate": 5.051277692787127e-06, "loss": 0.5221, "step": 99830 }, { "epoch": 16.97093319734829, "grad_norm": 42.07429885864258, "learning_rate": 5.04844467108618e-06, "loss": 0.6762, "step": 99840 }, { "epoch": 16.97263301036886, "grad_norm": 11.606154441833496, "learning_rate": 5.045611649385235e-06, "loss": 0.8789, "step": 99850 }, { "epoch": 16.974332823389428, "grad_norm": 19.069650650024414, "learning_rate": 5.042778627684288e-06, "loss": 0.8336, "step": 99860 }, { "epoch": 16.976032636409997, "grad_norm": 20.80473518371582, "learning_rate": 5.039945605983342e-06, "loss": 0.8797, "step": 99870 }, { "epoch": 16.97773244943056, "grad_norm": 13.621410369873047, "learning_rate": 5.037112584282396e-06, "loss": 0.7864, "step": 99880 }, { "epoch": 16.97943226245113, "grad_norm": 12.594548225402832, "learning_rate": 5.0342795625814496e-06, "loss": 0.7877, "step": 99890 }, { "epoch": 16.9811320754717, "grad_norm": 20.1540470123291, "learning_rate": 5.031446540880504e-06, "loss": 0.68, "step": 99900 }, { "epoch": 16.982831888492267, "grad_norm": 21.134403228759766, "learning_rate": 5.0286135191795575e-06, "loss": 0.8116, "step": 99910 }, { "epoch": 16.984531701512832, "grad_norm": 16.97315216064453, "learning_rate": 5.025780497478611e-06, "loss": 0.7464, "step": 99920 }, { "epoch": 16.9862315145334, "grad_norm": 16.16950225830078, "learning_rate": 5.022947475777665e-06, "loss": 0.6945, "step": 99930 }, { "epoch": 16.98793132755397, "grad_norm": 12.278165817260742, "learning_rate": 5.020114454076718e-06, "loss": 0.7982, "step": 99940 }, { "epoch": 16.989631140574538, "grad_norm": 13.507087707519531, "learning_rate": 5.0172814323757716e-06, "loss": 0.7435, "step": 99950 }, { "epoch": 16.991330953595103, "grad_norm": 66.00179290771484, "learning_rate": 5.014448410674826e-06, "loss": 0.7665, "step": 99960 }, { "epoch": 16.99303076661567, "grad_norm": 13.104642868041992, "learning_rate": 5.0116153889738794e-06, "loss": 0.7046, "step": 99970 }, { "epoch": 16.99473057963624, "grad_norm": 13.90253734588623, "learning_rate": 5.008782367272933e-06, "loss": 0.8208, "step": 99980 }, { "epoch": 16.99643039265681, "grad_norm": 13.516600608825684, "learning_rate": 5.005949345571987e-06, "loss": 0.9059, "step": 99990 }, { "epoch": 16.998130205677377, "grad_norm": 49.884681701660156, "learning_rate": 5.003116323871041e-06, "loss": 0.7786, "step": 100000 }, { "epoch": 16.999830018697942, "grad_norm": 13.84009838104248, "learning_rate": 5.000283302170094e-06, "loss": 0.6422, "step": 100010 }, { "epoch": 17.0, "eval_cer": 1.0, "eval_loss": 2.538471221923828, "eval_runtime": 2006.9335, "eval_samples_per_second": 0.235, "eval_steps_per_second": 0.235, "step": 100011 }, { "epoch": 17.00152983171851, "grad_norm": 77.8494644165039, "learning_rate": 4.997450280469149e-06, "loss": 0.5564, "step": 100020 }, { "epoch": 17.00322964473908, "grad_norm": 10.166449546813965, "learning_rate": 4.994617258768202e-06, "loss": 0.6537, "step": 100030 }, { "epoch": 17.004929457759648, "grad_norm": 10.305347442626953, "learning_rate": 4.991784237067256e-06, "loss": 0.5766, "step": 100040 }, { "epoch": 17.006629270780213, "grad_norm": 20.169336318969727, "learning_rate": 4.98895121536631e-06, "loss": 0.7262, "step": 100050 }, { "epoch": 17.00832908380078, "grad_norm": 13.377398490905762, "learning_rate": 4.986118193665364e-06, "loss": 0.55, "step": 100060 }, { "epoch": 17.01002889682135, "grad_norm": 13.6314115524292, "learning_rate": 4.983285171964418e-06, "loss": 0.6953, "step": 100070 }, { "epoch": 17.01172870984192, "grad_norm": 15.071025848388672, "learning_rate": 4.9804521502634716e-06, "loss": 0.5304, "step": 100080 }, { "epoch": 17.013428522862487, "grad_norm": 14.439608573913574, "learning_rate": 4.977619128562525e-06, "loss": 0.7478, "step": 100090 }, { "epoch": 17.015128335883052, "grad_norm": 30.462234497070312, "learning_rate": 4.9747861068615795e-06, "loss": 0.8026, "step": 100100 }, { "epoch": 17.01682814890362, "grad_norm": 14.349674224853516, "learning_rate": 4.971953085160632e-06, "loss": 0.6987, "step": 100110 }, { "epoch": 17.01852796192419, "grad_norm": 13.353408813476562, "learning_rate": 4.969120063459686e-06, "loss": 0.7145, "step": 100120 }, { "epoch": 17.020227774944757, "grad_norm": 12.873597145080566, "learning_rate": 4.96628704175874e-06, "loss": 0.7718, "step": 100130 }, { "epoch": 17.021927587965322, "grad_norm": 15.27098560333252, "learning_rate": 4.9634540200577936e-06, "loss": 0.4035, "step": 100140 }, { "epoch": 17.02362740098589, "grad_norm": 12.30180549621582, "learning_rate": 4.960620998356847e-06, "loss": 0.6598, "step": 100150 }, { "epoch": 17.02532721400646, "grad_norm": 14.251274108886719, "learning_rate": 4.9577879766559014e-06, "loss": 0.8106, "step": 100160 }, { "epoch": 17.027027027027028, "grad_norm": 25.586936950683594, "learning_rate": 4.954954954954955e-06, "loss": 0.7439, "step": 100170 }, { "epoch": 17.028726840047593, "grad_norm": 16.36854362487793, "learning_rate": 4.9521219332540085e-06, "loss": 0.6501, "step": 100180 }, { "epoch": 17.03042665306816, "grad_norm": 16.17326545715332, "learning_rate": 4.949288911553063e-06, "loss": 0.6521, "step": 100190 }, { "epoch": 17.03212646608873, "grad_norm": 15.124178886413574, "learning_rate": 4.946455889852116e-06, "loss": 0.631, "step": 100200 }, { "epoch": 17.0338262791093, "grad_norm": 12.766541481018066, "learning_rate": 4.94362286815117e-06, "loss": 0.6512, "step": 100210 }, { "epoch": 17.035526092129867, "grad_norm": 9.590370178222656, "learning_rate": 4.940789846450224e-06, "loss": 0.6522, "step": 100220 }, { "epoch": 17.037225905150432, "grad_norm": 13.373054504394531, "learning_rate": 4.937956824749278e-06, "loss": 0.7539, "step": 100230 }, { "epoch": 17.038925718171, "grad_norm": 14.627347946166992, "learning_rate": 4.935123803048331e-06, "loss": 0.7783, "step": 100240 }, { "epoch": 17.04062553119157, "grad_norm": 12.904533386230469, "learning_rate": 4.932290781347386e-06, "loss": 0.6968, "step": 100250 }, { "epoch": 17.042325344212138, "grad_norm": 11.234346389770508, "learning_rate": 4.929457759646439e-06, "loss": 0.7467, "step": 100260 }, { "epoch": 17.044025157232703, "grad_norm": 10.903895378112793, "learning_rate": 4.9266247379454936e-06, "loss": 0.5379, "step": 100270 }, { "epoch": 17.04572497025327, "grad_norm": 14.36633014678955, "learning_rate": 4.923791716244546e-06, "loss": 0.6533, "step": 100280 }, { "epoch": 17.04742478327384, "grad_norm": 10.976088523864746, "learning_rate": 4.9209586945436e-06, "loss": 0.6816, "step": 100290 }, { "epoch": 17.04912459629441, "grad_norm": 14.247342109680176, "learning_rate": 4.918125672842654e-06, "loss": 0.7332, "step": 100300 }, { "epoch": 17.050824409314977, "grad_norm": 18.366369247436523, "learning_rate": 4.915292651141708e-06, "loss": 0.8503, "step": 100310 }, { "epoch": 17.052524222335542, "grad_norm": 10.045363426208496, "learning_rate": 4.912459629440761e-06, "loss": 0.596, "step": 100320 }, { "epoch": 17.05422403535611, "grad_norm": 11.425585746765137, "learning_rate": 4.9096266077398155e-06, "loss": 0.7246, "step": 100330 }, { "epoch": 17.05592384837668, "grad_norm": 17.744224548339844, "learning_rate": 4.906793586038869e-06, "loss": 0.8154, "step": 100340 }, { "epoch": 17.057623661397248, "grad_norm": 22.24958038330078, "learning_rate": 4.903960564337923e-06, "loss": 0.6816, "step": 100350 }, { "epoch": 17.059323474417813, "grad_norm": 12.405472755432129, "learning_rate": 4.901127542636977e-06, "loss": 0.805, "step": 100360 }, { "epoch": 17.06102328743838, "grad_norm": 11.450078010559082, "learning_rate": 4.8982945209360305e-06, "loss": 0.5922, "step": 100370 }, { "epoch": 17.06272310045895, "grad_norm": 15.649055480957031, "learning_rate": 4.895461499235084e-06, "loss": 0.7356, "step": 100380 }, { "epoch": 17.06442291347952, "grad_norm": 16.755441665649414, "learning_rate": 4.892628477534138e-06, "loss": 0.806, "step": 100390 }, { "epoch": 17.066122726500083, "grad_norm": 12.628274917602539, "learning_rate": 4.889795455833192e-06, "loss": 0.5849, "step": 100400 }, { "epoch": 17.06782253952065, "grad_norm": 11.812752723693848, "learning_rate": 4.886962434132245e-06, "loss": 0.6539, "step": 100410 }, { "epoch": 17.06952235254122, "grad_norm": 13.081724166870117, "learning_rate": 4.8841294124313e-06, "loss": 0.7132, "step": 100420 }, { "epoch": 17.07122216556179, "grad_norm": 25.808574676513672, "learning_rate": 4.881296390730353e-06, "loss": 0.6977, "step": 100430 }, { "epoch": 17.072921978582357, "grad_norm": 15.18544864654541, "learning_rate": 4.878463369029408e-06, "loss": 0.64, "step": 100440 }, { "epoch": 17.074621791602922, "grad_norm": 23.559768676757812, "learning_rate": 4.87563034732846e-06, "loss": 0.6087, "step": 100450 }, { "epoch": 17.07632160462349, "grad_norm": 12.724692344665527, "learning_rate": 4.872797325627514e-06, "loss": 0.8192, "step": 100460 }, { "epoch": 17.07802141764406, "grad_norm": 13.64199161529541, "learning_rate": 4.869964303926568e-06, "loss": 0.6712, "step": 100470 }, { "epoch": 17.079721230664628, "grad_norm": 9.031867980957031, "learning_rate": 4.867131282225622e-06, "loss": 0.8563, "step": 100480 }, { "epoch": 17.081421043685193, "grad_norm": 10.942612648010254, "learning_rate": 4.864298260524675e-06, "loss": 0.6406, "step": 100490 }, { "epoch": 17.08312085670576, "grad_norm": 13.374410629272461, "learning_rate": 4.86146523882373e-06, "loss": 0.6701, "step": 100500 }, { "epoch": 17.08482066972633, "grad_norm": 14.483009338378906, "learning_rate": 4.858632217122783e-06, "loss": 0.7315, "step": 100510 }, { "epoch": 17.0865204827469, "grad_norm": 12.704641342163086, "learning_rate": 4.855799195421837e-06, "loss": 0.8156, "step": 100520 }, { "epoch": 17.088220295767467, "grad_norm": 24.545944213867188, "learning_rate": 4.852966173720891e-06, "loss": 0.6878, "step": 100530 }, { "epoch": 17.089920108788032, "grad_norm": 21.253385543823242, "learning_rate": 4.850133152019945e-06, "loss": 0.8256, "step": 100540 }, { "epoch": 17.0916199218086, "grad_norm": 12.605691909790039, "learning_rate": 4.847300130318998e-06, "loss": 0.8149, "step": 100550 }, { "epoch": 17.09331973482917, "grad_norm": 14.042071342468262, "learning_rate": 4.8444671086180525e-06, "loss": 0.7114, "step": 100560 }, { "epoch": 17.095019547849738, "grad_norm": 16.034912109375, "learning_rate": 4.841634086917106e-06, "loss": 0.7295, "step": 100570 }, { "epoch": 17.096719360870303, "grad_norm": 11.6995210647583, "learning_rate": 4.8388010652161595e-06, "loss": 0.6633, "step": 100580 }, { "epoch": 17.09841917389087, "grad_norm": 14.474237442016602, "learning_rate": 4.835968043515214e-06, "loss": 0.7405, "step": 100590 }, { "epoch": 17.10011898691144, "grad_norm": 10.902093887329102, "learning_rate": 4.833135021814267e-06, "loss": 0.6511, "step": 100600 }, { "epoch": 17.10181879993201, "grad_norm": 13.837170600891113, "learning_rate": 4.830302000113321e-06, "loss": 0.7176, "step": 100610 }, { "epoch": 17.103518612952577, "grad_norm": 15.603742599487305, "learning_rate": 4.827468978412375e-06, "loss": 0.6556, "step": 100620 }, { "epoch": 17.105218425973142, "grad_norm": 12.339377403259277, "learning_rate": 4.824635956711428e-06, "loss": 0.8456, "step": 100630 }, { "epoch": 17.10691823899371, "grad_norm": 9.877686500549316, "learning_rate": 4.821802935010482e-06, "loss": 0.7076, "step": 100640 }, { "epoch": 17.10861805201428, "grad_norm": 14.30955696105957, "learning_rate": 4.818969913309536e-06, "loss": 0.6743, "step": 100650 }, { "epoch": 17.110317865034848, "grad_norm": 15.45864486694336, "learning_rate": 4.816136891608589e-06, "loss": 0.6831, "step": 100660 }, { "epoch": 17.112017678055413, "grad_norm": 11.783454895019531, "learning_rate": 4.813303869907644e-06, "loss": 0.6059, "step": 100670 }, { "epoch": 17.11371749107598, "grad_norm": 15.858689308166504, "learning_rate": 4.810470848206697e-06, "loss": 0.7055, "step": 100680 }, { "epoch": 17.11541730409655, "grad_norm": 11.844365119934082, "learning_rate": 4.807637826505751e-06, "loss": 0.6687, "step": 100690 }, { "epoch": 17.117117117117118, "grad_norm": 16.438261032104492, "learning_rate": 4.804804804804805e-06, "loss": 0.6561, "step": 100700 }, { "epoch": 17.118816930137683, "grad_norm": 10.174988746643066, "learning_rate": 4.801971783103859e-06, "loss": 0.7124, "step": 100710 }, { "epoch": 17.12051674315825, "grad_norm": 15.301926612854004, "learning_rate": 4.799138761402912e-06, "loss": 0.5972, "step": 100720 }, { "epoch": 17.12221655617882, "grad_norm": 14.364119529724121, "learning_rate": 4.796305739701967e-06, "loss": 0.7, "step": 100730 }, { "epoch": 17.12391636919939, "grad_norm": 12.731087684631348, "learning_rate": 4.79347271800102e-06, "loss": 0.6856, "step": 100740 }, { "epoch": 17.125616182219957, "grad_norm": 14.375651359558105, "learning_rate": 4.790639696300074e-06, "loss": 0.7951, "step": 100750 }, { "epoch": 17.127315995240522, "grad_norm": 14.079804420471191, "learning_rate": 4.787806674599128e-06, "loss": 0.8498, "step": 100760 }, { "epoch": 17.12901580826109, "grad_norm": 10.797660827636719, "learning_rate": 4.7849736528981815e-06, "loss": 0.6785, "step": 100770 }, { "epoch": 17.13071562128166, "grad_norm": 19.382455825805664, "learning_rate": 4.782140631197235e-06, "loss": 0.6748, "step": 100780 }, { "epoch": 17.132415434302228, "grad_norm": 14.685644149780273, "learning_rate": 4.779307609496289e-06, "loss": 0.756, "step": 100790 }, { "epoch": 17.134115247322793, "grad_norm": 18.704580307006836, "learning_rate": 4.776474587795342e-06, "loss": 0.6147, "step": 100800 }, { "epoch": 17.13581506034336, "grad_norm": 19.748106002807617, "learning_rate": 4.7736415660943965e-06, "loss": 0.7065, "step": 100810 }, { "epoch": 17.13751487336393, "grad_norm": 13.370463371276855, "learning_rate": 4.77080854439345e-06, "loss": 0.5947, "step": 100820 }, { "epoch": 17.1392146863845, "grad_norm": 14.259337425231934, "learning_rate": 4.7679755226925035e-06, "loss": 0.7901, "step": 100830 }, { "epoch": 17.140914499405067, "grad_norm": 14.160858154296875, "learning_rate": 4.765142500991558e-06, "loss": 0.7757, "step": 100840 }, { "epoch": 17.142614312425632, "grad_norm": 21.278480529785156, "learning_rate": 4.762309479290611e-06, "loss": 0.6196, "step": 100850 }, { "epoch": 17.1443141254462, "grad_norm": 10.422828674316406, "learning_rate": 4.759476457589665e-06, "loss": 0.7148, "step": 100860 }, { "epoch": 17.14601393846677, "grad_norm": 15.353155136108398, "learning_rate": 4.756643435888719e-06, "loss": 0.595, "step": 100870 }, { "epoch": 17.147713751487338, "grad_norm": 11.888163566589355, "learning_rate": 4.753810414187773e-06, "loss": 0.6306, "step": 100880 }, { "epoch": 17.149413564507903, "grad_norm": 206.481201171875, "learning_rate": 4.750977392486826e-06, "loss": 0.8132, "step": 100890 }, { "epoch": 17.15111337752847, "grad_norm": 15.638605117797852, "learning_rate": 4.748144370785881e-06, "loss": 0.6543, "step": 100900 }, { "epoch": 17.15281319054904, "grad_norm": 220.7332305908203, "learning_rate": 4.745311349084934e-06, "loss": 0.8448, "step": 100910 }, { "epoch": 17.15451300356961, "grad_norm": 12.635088920593262, "learning_rate": 4.742478327383988e-06, "loss": 0.5439, "step": 100920 }, { "epoch": 17.156212816590177, "grad_norm": 9.80717945098877, "learning_rate": 4.739645305683042e-06, "loss": 0.787, "step": 100930 }, { "epoch": 17.157912629610742, "grad_norm": 13.457131385803223, "learning_rate": 4.736812283982096e-06, "loss": 0.7616, "step": 100940 }, { "epoch": 17.15961244263131, "grad_norm": 10.901514053344727, "learning_rate": 4.733979262281149e-06, "loss": 0.5416, "step": 100950 }, { "epoch": 17.16131225565188, "grad_norm": 10.43395709991455, "learning_rate": 4.7311462405802035e-06, "loss": 0.7149, "step": 100960 }, { "epoch": 17.163012068672447, "grad_norm": 15.598054885864258, "learning_rate": 4.728313218879256e-06, "loss": 0.7847, "step": 100970 }, { "epoch": 17.164711881693012, "grad_norm": 11.352518081665039, "learning_rate": 4.7254801971783106e-06, "loss": 0.6286, "step": 100980 }, { "epoch": 17.16641169471358, "grad_norm": 21.06853675842285, "learning_rate": 4.722647175477364e-06, "loss": 0.7685, "step": 100990 }, { "epoch": 17.16811150773415, "grad_norm": 16.61741065979004, "learning_rate": 4.719814153776418e-06, "loss": 0.7297, "step": 101000 }, { "epoch": 17.169811320754718, "grad_norm": 23.675840377807617, "learning_rate": 4.716981132075472e-06, "loss": 0.6702, "step": 101010 }, { "epoch": 17.171511133775283, "grad_norm": 14.878889083862305, "learning_rate": 4.7141481103745255e-06, "loss": 0.8552, "step": 101020 }, { "epoch": 17.17321094679585, "grad_norm": 18.224807739257812, "learning_rate": 4.711315088673579e-06, "loss": 0.64, "step": 101030 }, { "epoch": 17.17491075981642, "grad_norm": 10.510433197021484, "learning_rate": 4.708482066972633e-06, "loss": 0.5998, "step": 101040 }, { "epoch": 17.17661057283699, "grad_norm": 10.080787658691406, "learning_rate": 4.705649045271687e-06, "loss": 0.5801, "step": 101050 }, { "epoch": 17.178310385857557, "grad_norm": 14.921381950378418, "learning_rate": 4.7028160235707404e-06, "loss": 0.6118, "step": 101060 }, { "epoch": 17.180010198878122, "grad_norm": 12.380661010742188, "learning_rate": 4.699983001869795e-06, "loss": 0.7611, "step": 101070 }, { "epoch": 17.18171001189869, "grad_norm": 14.211060523986816, "learning_rate": 4.697149980168848e-06, "loss": 0.8394, "step": 101080 }, { "epoch": 17.18340982491926, "grad_norm": 17.674949645996094, "learning_rate": 4.694316958467902e-06, "loss": 0.5191, "step": 101090 }, { "epoch": 17.185109637939828, "grad_norm": 17.323678970336914, "learning_rate": 4.691483936766956e-06, "loss": 0.7596, "step": 101100 }, { "epoch": 17.186809450960393, "grad_norm": 10.340095520019531, "learning_rate": 4.68865091506601e-06, "loss": 0.7051, "step": 101110 }, { "epoch": 17.18850926398096, "grad_norm": 10.684935569763184, "learning_rate": 4.685817893365063e-06, "loss": 0.7207, "step": 101120 }, { "epoch": 17.19020907700153, "grad_norm": 14.406481742858887, "learning_rate": 4.682984871664118e-06, "loss": 0.658, "step": 101130 }, { "epoch": 17.1919088900221, "grad_norm": 12.530491828918457, "learning_rate": 4.680151849963171e-06, "loss": 0.8374, "step": 101140 }, { "epoch": 17.193608703042667, "grad_norm": 13.920970916748047, "learning_rate": 4.677318828262224e-06, "loss": 0.6309, "step": 101150 }, { "epoch": 17.195308516063232, "grad_norm": 13.78067398071289, "learning_rate": 4.674485806561278e-06, "loss": 0.8397, "step": 101160 }, { "epoch": 17.1970083290838, "grad_norm": 8.79401683807373, "learning_rate": 4.671652784860332e-06, "loss": 0.6787, "step": 101170 }, { "epoch": 17.19870814210437, "grad_norm": 14.370908737182617, "learning_rate": 4.668819763159386e-06, "loss": 0.5872, "step": 101180 }, { "epoch": 17.200407955124938, "grad_norm": 9.929638862609863, "learning_rate": 4.66598674145844e-06, "loss": 0.9156, "step": 101190 }, { "epoch": 17.202107768145503, "grad_norm": 17.151290893554688, "learning_rate": 4.663153719757493e-06, "loss": 0.6907, "step": 101200 }, { "epoch": 17.20380758116607, "grad_norm": 17.253093719482422, "learning_rate": 4.6603206980565475e-06, "loss": 0.7155, "step": 101210 }, { "epoch": 17.20550739418664, "grad_norm": 17.434261322021484, "learning_rate": 4.657487676355601e-06, "loss": 0.7511, "step": 101220 }, { "epoch": 17.207207207207208, "grad_norm": 16.411251068115234, "learning_rate": 4.6546546546546545e-06, "loss": 0.706, "step": 101230 }, { "epoch": 17.208907020227773, "grad_norm": 22.578872680664062, "learning_rate": 4.651821632953709e-06, "loss": 0.6845, "step": 101240 }, { "epoch": 17.21060683324834, "grad_norm": 14.903700828552246, "learning_rate": 4.6489886112527624e-06, "loss": 0.6991, "step": 101250 }, { "epoch": 17.21230664626891, "grad_norm": 11.634597778320312, "learning_rate": 4.646155589551816e-06, "loss": 0.5902, "step": 101260 }, { "epoch": 17.21400645928948, "grad_norm": 10.415367126464844, "learning_rate": 4.64332256785087e-06, "loss": 0.5535, "step": 101270 }, { "epoch": 17.215706272310047, "grad_norm": 33.449249267578125, "learning_rate": 4.640489546149924e-06, "loss": 0.6093, "step": 101280 }, { "epoch": 17.217406085330612, "grad_norm": 13.460144996643066, "learning_rate": 4.637656524448977e-06, "loss": 0.6647, "step": 101290 }, { "epoch": 17.21910589835118, "grad_norm": 11.77779483795166, "learning_rate": 4.634823502748032e-06, "loss": 0.899, "step": 101300 }, { "epoch": 17.22080571137175, "grad_norm": 14.104934692382812, "learning_rate": 4.631990481047085e-06, "loss": 0.9136, "step": 101310 }, { "epoch": 17.222505524392318, "grad_norm": 16.608158111572266, "learning_rate": 4.629157459346138e-06, "loss": 0.7044, "step": 101320 }, { "epoch": 17.224205337412883, "grad_norm": 13.201854705810547, "learning_rate": 4.626324437645192e-06, "loss": 0.7267, "step": 101330 }, { "epoch": 17.22590515043345, "grad_norm": 14.485098838806152, "learning_rate": 4.623491415944246e-06, "loss": 0.8503, "step": 101340 }, { "epoch": 17.22760496345402, "grad_norm": 15.697257995605469, "learning_rate": 4.6206583942433e-06, "loss": 0.761, "step": 101350 }, { "epoch": 17.22930477647459, "grad_norm": 12.135674476623535, "learning_rate": 4.617825372542354e-06, "loss": 0.5769, "step": 101360 }, { "epoch": 17.231004589495157, "grad_norm": 15.978527069091797, "learning_rate": 4.614992350841407e-06, "loss": 0.6834, "step": 101370 }, { "epoch": 17.232704402515722, "grad_norm": 10.85843276977539, "learning_rate": 4.612159329140462e-06, "loss": 0.7034, "step": 101380 }, { "epoch": 17.23440421553629, "grad_norm": 9.663069725036621, "learning_rate": 4.609326307439515e-06, "loss": 0.5365, "step": 101390 }, { "epoch": 17.23610402855686, "grad_norm": 10.042035102844238, "learning_rate": 4.606493285738569e-06, "loss": 0.7957, "step": 101400 }, { "epoch": 17.237803841577428, "grad_norm": 15.610759735107422, "learning_rate": 4.603660264037623e-06, "loss": 0.6579, "step": 101410 }, { "epoch": 17.239503654597993, "grad_norm": 18.023874282836914, "learning_rate": 4.6008272423366765e-06, "loss": 0.6839, "step": 101420 }, { "epoch": 17.24120346761856, "grad_norm": 12.586798667907715, "learning_rate": 4.59799422063573e-06, "loss": 0.8566, "step": 101430 }, { "epoch": 17.24290328063913, "grad_norm": 20.480548858642578, "learning_rate": 4.595161198934784e-06, "loss": 0.7625, "step": 101440 }, { "epoch": 17.2446030936597, "grad_norm": 9.271130561828613, "learning_rate": 4.592328177233838e-06, "loss": 0.5946, "step": 101450 }, { "epoch": 17.246302906680267, "grad_norm": 11.851679801940918, "learning_rate": 4.5894951555328915e-06, "loss": 0.6068, "step": 101460 }, { "epoch": 17.248002719700832, "grad_norm": 9.910414695739746, "learning_rate": 4.586662133831946e-06, "loss": 0.6503, "step": 101470 }, { "epoch": 17.2497025327214, "grad_norm": 10.129877090454102, "learning_rate": 4.583829112130999e-06, "loss": 0.8055, "step": 101480 }, { "epoch": 17.25140234574197, "grad_norm": 20.173227310180664, "learning_rate": 4.580996090430052e-06, "loss": 0.7198, "step": 101490 }, { "epoch": 17.253102158762537, "grad_norm": 11.052687644958496, "learning_rate": 4.578163068729106e-06, "loss": 0.5586, "step": 101500 }, { "epoch": 17.254801971783102, "grad_norm": 18.37420654296875, "learning_rate": 4.57533004702816e-06, "loss": 0.8559, "step": 101510 }, { "epoch": 17.25650178480367, "grad_norm": 11.799363136291504, "learning_rate": 4.5724970253272134e-06, "loss": 0.7726, "step": 101520 }, { "epoch": 17.25820159782424, "grad_norm": 15.564471244812012, "learning_rate": 4.569664003626268e-06, "loss": 0.6726, "step": 101530 }, { "epoch": 17.259901410844808, "grad_norm": 15.55721378326416, "learning_rate": 4.566830981925321e-06, "loss": 0.6823, "step": 101540 }, { "epoch": 17.261601223865373, "grad_norm": 24.16900062561035, "learning_rate": 4.563997960224376e-06, "loss": 0.7339, "step": 101550 }, { "epoch": 17.26330103688594, "grad_norm": 14.480071067810059, "learning_rate": 4.561164938523429e-06, "loss": 0.6706, "step": 101560 }, { "epoch": 17.26500084990651, "grad_norm": 12.174972534179688, "learning_rate": 4.558331916822483e-06, "loss": 0.6222, "step": 101570 }, { "epoch": 17.26670066292708, "grad_norm": 10.141741752624512, "learning_rate": 4.555498895121537e-06, "loss": 0.6295, "step": 101580 }, { "epoch": 17.268400475947647, "grad_norm": 11.26165771484375, "learning_rate": 4.552665873420591e-06, "loss": 0.6921, "step": 101590 }, { "epoch": 17.270100288968212, "grad_norm": 16.038326263427734, "learning_rate": 4.549832851719644e-06, "loss": 0.8886, "step": 101600 }, { "epoch": 17.27180010198878, "grad_norm": 16.759342193603516, "learning_rate": 4.5469998300186985e-06, "loss": 0.7774, "step": 101610 }, { "epoch": 17.27349991500935, "grad_norm": 16.807153701782227, "learning_rate": 4.544166808317752e-06, "loss": 0.857, "step": 101620 }, { "epoch": 17.275199728029918, "grad_norm": 13.114189147949219, "learning_rate": 4.5413337866168056e-06, "loss": 0.642, "step": 101630 }, { "epoch": 17.276899541050483, "grad_norm": 24.49906349182129, "learning_rate": 4.53850076491586e-06, "loss": 0.5532, "step": 101640 }, { "epoch": 17.27859935407105, "grad_norm": 17.012104034423828, "learning_rate": 4.5356677432149135e-06, "loss": 0.6269, "step": 101650 }, { "epoch": 17.28029916709162, "grad_norm": 15.147608757019043, "learning_rate": 4.532834721513967e-06, "loss": 0.6291, "step": 101660 }, { "epoch": 17.28199898011219, "grad_norm": 37.98456954956055, "learning_rate": 4.5300016998130205e-06, "loss": 0.6135, "step": 101670 }, { "epoch": 17.283698793132757, "grad_norm": 12.654294967651367, "learning_rate": 4.527168678112074e-06, "loss": 0.8626, "step": 101680 }, { "epoch": 17.285398606153322, "grad_norm": 12.585589408874512, "learning_rate": 4.5243356564111275e-06, "loss": 0.8355, "step": 101690 }, { "epoch": 17.28709841917389, "grad_norm": 12.13803768157959, "learning_rate": 4.521502634710182e-06, "loss": 0.7538, "step": 101700 }, { "epoch": 17.28879823219446, "grad_norm": 20.764074325561523, "learning_rate": 4.5186696130092354e-06, "loss": 0.7764, "step": 101710 }, { "epoch": 17.290498045215028, "grad_norm": 15.073623657226562, "learning_rate": 4.51583659130829e-06, "loss": 0.7532, "step": 101720 }, { "epoch": 17.292197858235593, "grad_norm": 18.11009407043457, "learning_rate": 4.513003569607343e-06, "loss": 0.6923, "step": 101730 }, { "epoch": 17.29389767125616, "grad_norm": 19.234296798706055, "learning_rate": 4.510170547906397e-06, "loss": 0.5732, "step": 101740 }, { "epoch": 17.29559748427673, "grad_norm": 16.703142166137695, "learning_rate": 4.507337526205451e-06, "loss": 0.6644, "step": 101750 }, { "epoch": 17.2972972972973, "grad_norm": 13.852286338806152, "learning_rate": 4.504504504504505e-06, "loss": 0.657, "step": 101760 }, { "epoch": 17.298997110317863, "grad_norm": 15.064737319946289, "learning_rate": 4.501671482803558e-06, "loss": 0.697, "step": 101770 }, { "epoch": 17.300696923338432, "grad_norm": 15.319920539855957, "learning_rate": 4.498838461102613e-06, "loss": 0.6994, "step": 101780 }, { "epoch": 17.302396736359, "grad_norm": 11.492655754089355, "learning_rate": 4.496005439401666e-06, "loss": 0.6059, "step": 101790 }, { "epoch": 17.30409654937957, "grad_norm": 15.982558250427246, "learning_rate": 4.49317241770072e-06, "loss": 0.6185, "step": 101800 }, { "epoch": 17.305796362400137, "grad_norm": 14.150919914245605, "learning_rate": 4.490339395999774e-06, "loss": 0.6906, "step": 101810 }, { "epoch": 17.307496175420702, "grad_norm": 10.970236778259277, "learning_rate": 4.4875063742988276e-06, "loss": 0.5764, "step": 101820 }, { "epoch": 17.30919598844127, "grad_norm": 18.079362869262695, "learning_rate": 4.484673352597881e-06, "loss": 0.5797, "step": 101830 }, { "epoch": 17.31089580146184, "grad_norm": 18.256275177001953, "learning_rate": 4.481840330896935e-06, "loss": 0.782, "step": 101840 }, { "epoch": 17.312595614482408, "grad_norm": 17.1889705657959, "learning_rate": 4.479007309195988e-06, "loss": 0.6301, "step": 101850 }, { "epoch": 17.314295427502973, "grad_norm": 12.694974899291992, "learning_rate": 4.476174287495042e-06, "loss": 0.596, "step": 101860 }, { "epoch": 17.31599524052354, "grad_norm": 13.504274368286133, "learning_rate": 4.473341265794096e-06, "loss": 0.6492, "step": 101870 }, { "epoch": 17.31769505354411, "grad_norm": 14.869491577148438, "learning_rate": 4.4705082440931495e-06, "loss": 0.8588, "step": 101880 }, { "epoch": 17.31939486656468, "grad_norm": 12.259411811828613, "learning_rate": 4.467675222392204e-06, "loss": 0.6824, "step": 101890 }, { "epoch": 17.321094679585247, "grad_norm": 102.33544158935547, "learning_rate": 4.4648422006912574e-06, "loss": 0.7552, "step": 101900 }, { "epoch": 17.322794492605812, "grad_norm": 13.532718658447266, "learning_rate": 4.462009178990311e-06, "loss": 0.6038, "step": 101910 }, { "epoch": 17.32449430562638, "grad_norm": 14.639606475830078, "learning_rate": 4.459176157289365e-06, "loss": 0.6381, "step": 101920 }, { "epoch": 17.32619411864695, "grad_norm": 11.27409553527832, "learning_rate": 4.456343135588419e-06, "loss": 0.7128, "step": 101930 }, { "epoch": 17.327893931667518, "grad_norm": 15.087154388427734, "learning_rate": 4.453510113887472e-06, "loss": 0.7388, "step": 101940 }, { "epoch": 17.329593744688083, "grad_norm": 17.235118865966797, "learning_rate": 4.450677092186527e-06, "loss": 0.7839, "step": 101950 }, { "epoch": 17.33129355770865, "grad_norm": 16.497159957885742, "learning_rate": 4.44784407048558e-06, "loss": 0.7542, "step": 101960 }, { "epoch": 17.33299337072922, "grad_norm": 14.310306549072266, "learning_rate": 4.445011048784634e-06, "loss": 0.5782, "step": 101970 }, { "epoch": 17.33469318374979, "grad_norm": 14.15465259552002, "learning_rate": 4.442178027083688e-06, "loss": 0.6784, "step": 101980 }, { "epoch": 17.336392996770357, "grad_norm": 12.995322227478027, "learning_rate": 4.439345005382742e-06, "loss": 0.6624, "step": 101990 }, { "epoch": 17.338092809790922, "grad_norm": 13.319464683532715, "learning_rate": 4.436511983681795e-06, "loss": 0.7481, "step": 102000 }, { "epoch": 17.33979262281149, "grad_norm": 15.112850189208984, "learning_rate": 4.433678961980849e-06, "loss": 0.7868, "step": 102010 }, { "epoch": 17.34149243583206, "grad_norm": 13.718429565429688, "learning_rate": 4.430845940279902e-06, "loss": 0.6835, "step": 102020 }, { "epoch": 17.343192248852628, "grad_norm": 11.929017066955566, "learning_rate": 4.428012918578956e-06, "loss": 0.6856, "step": 102030 }, { "epoch": 17.344892061873193, "grad_norm": 15.493741989135742, "learning_rate": 4.42517989687801e-06, "loss": 0.7555, "step": 102040 }, { "epoch": 17.34659187489376, "grad_norm": 9.389705657958984, "learning_rate": 4.422346875177064e-06, "loss": 0.616, "step": 102050 }, { "epoch": 17.34829168791433, "grad_norm": 14.404352188110352, "learning_rate": 4.419513853476117e-06, "loss": 1.043, "step": 102060 }, { "epoch": 17.349991500934898, "grad_norm": 15.070425033569336, "learning_rate": 4.4166808317751715e-06, "loss": 0.6456, "step": 102070 }, { "epoch": 17.351691313955463, "grad_norm": 19.738788604736328, "learning_rate": 4.413847810074225e-06, "loss": 0.6702, "step": 102080 }, { "epoch": 17.35339112697603, "grad_norm": 15.841665267944336, "learning_rate": 4.4110147883732794e-06, "loss": 0.6715, "step": 102090 }, { "epoch": 17.3550909399966, "grad_norm": 12.727987289428711, "learning_rate": 4.408181766672333e-06, "loss": 0.7248, "step": 102100 }, { "epoch": 17.35679075301717, "grad_norm": 11.918527603149414, "learning_rate": 4.4053487449713865e-06, "loss": 0.6119, "step": 102110 }, { "epoch": 17.358490566037737, "grad_norm": 14.659696578979492, "learning_rate": 4.402515723270441e-06, "loss": 0.7, "step": 102120 }, { "epoch": 17.360190379058302, "grad_norm": 64.1727066040039, "learning_rate": 4.399682701569494e-06, "loss": 0.6558, "step": 102130 }, { "epoch": 17.36189019207887, "grad_norm": 12.536033630371094, "learning_rate": 4.396849679868548e-06, "loss": 0.6154, "step": 102140 }, { "epoch": 17.36359000509944, "grad_norm": 17.50798797607422, "learning_rate": 4.394016658167602e-06, "loss": 0.6885, "step": 102150 }, { "epoch": 17.365289818120008, "grad_norm": 11.857239723205566, "learning_rate": 4.391183636466656e-06, "loss": 0.7066, "step": 102160 }, { "epoch": 17.366989631140573, "grad_norm": 16.126678466796875, "learning_rate": 4.388350614765709e-06, "loss": 0.6446, "step": 102170 }, { "epoch": 17.36868944416114, "grad_norm": 8.931285858154297, "learning_rate": 4.385517593064763e-06, "loss": 0.6918, "step": 102180 }, { "epoch": 17.37038925718171, "grad_norm": 15.383011817932129, "learning_rate": 4.382684571363816e-06, "loss": 0.6707, "step": 102190 }, { "epoch": 17.37208907020228, "grad_norm": 17.20233154296875, "learning_rate": 4.37985154966287e-06, "loss": 0.791, "step": 102200 }, { "epoch": 17.373788883222847, "grad_norm": 14.217427253723145, "learning_rate": 4.377018527961924e-06, "loss": 0.6017, "step": 102210 }, { "epoch": 17.375488696243412, "grad_norm": 12.808609962463379, "learning_rate": 4.374185506260978e-06, "loss": 0.6802, "step": 102220 }, { "epoch": 17.37718850926398, "grad_norm": 8.957261085510254, "learning_rate": 4.371352484560031e-06, "loss": 0.7587, "step": 102230 }, { "epoch": 17.37888832228455, "grad_norm": 54.20676040649414, "learning_rate": 4.368519462859086e-06, "loss": 0.7847, "step": 102240 }, { "epoch": 17.380588135305118, "grad_norm": 14.283029556274414, "learning_rate": 4.365686441158139e-06, "loss": 0.641, "step": 102250 }, { "epoch": 17.382287948325683, "grad_norm": 15.914754867553711, "learning_rate": 4.3628534194571935e-06, "loss": 0.75, "step": 102260 }, { "epoch": 17.38398776134625, "grad_norm": 11.17397403717041, "learning_rate": 4.360020397756247e-06, "loss": 0.7445, "step": 102270 }, { "epoch": 17.38568757436682, "grad_norm": 11.682843208312988, "learning_rate": 4.357187376055301e-06, "loss": 0.6187, "step": 102280 }, { "epoch": 17.38738738738739, "grad_norm": 14.872224807739258, "learning_rate": 4.354354354354355e-06, "loss": 0.5656, "step": 102290 }, { "epoch": 17.389087200407957, "grad_norm": 14.437960624694824, "learning_rate": 4.3515213326534085e-06, "loss": 0.718, "step": 102300 }, { "epoch": 17.390787013428522, "grad_norm": 20.297344207763672, "learning_rate": 4.348688310952462e-06, "loss": 0.7581, "step": 102310 }, { "epoch": 17.39248682644909, "grad_norm": 15.704550743103027, "learning_rate": 4.345855289251516e-06, "loss": 0.6508, "step": 102320 }, { "epoch": 17.39418663946966, "grad_norm": 12.02389907836914, "learning_rate": 4.34302226755057e-06, "loss": 0.6703, "step": 102330 }, { "epoch": 17.395886452490227, "grad_norm": 12.730331420898438, "learning_rate": 4.340189245849623e-06, "loss": 0.9469, "step": 102340 }, { "epoch": 17.397586265510792, "grad_norm": 11.42950439453125, "learning_rate": 4.337356224148678e-06, "loss": 0.7682, "step": 102350 }, { "epoch": 17.39928607853136, "grad_norm": 15.1285982131958, "learning_rate": 4.3345232024477305e-06, "loss": 0.826, "step": 102360 }, { "epoch": 17.40098589155193, "grad_norm": 12.519782066345215, "learning_rate": 4.331690180746784e-06, "loss": 0.7332, "step": 102370 }, { "epoch": 17.402685704572498, "grad_norm": 13.679542541503906, "learning_rate": 4.328857159045838e-06, "loss": 0.6441, "step": 102380 }, { "epoch": 17.404385517593063, "grad_norm": 24.03104591369629, "learning_rate": 4.326024137344892e-06, "loss": 0.7338, "step": 102390 }, { "epoch": 17.40608533061363, "grad_norm": 16.220407485961914, "learning_rate": 4.323191115643945e-06, "loss": 0.7173, "step": 102400 }, { "epoch": 17.4077851436342, "grad_norm": 17.308238983154297, "learning_rate": 4.320358093943e-06, "loss": 0.7, "step": 102410 }, { "epoch": 17.40948495665477, "grad_norm": 18.152305603027344, "learning_rate": 4.317525072242053e-06, "loss": 0.6886, "step": 102420 }, { "epoch": 17.411184769675337, "grad_norm": 12.945956230163574, "learning_rate": 4.314692050541107e-06, "loss": 0.6559, "step": 102430 }, { "epoch": 17.412884582695902, "grad_norm": 17.716175079345703, "learning_rate": 4.311859028840161e-06, "loss": 0.9558, "step": 102440 }, { "epoch": 17.41458439571647, "grad_norm": 17.23040771484375, "learning_rate": 4.309026007139215e-06, "loss": 0.6551, "step": 102450 }, { "epoch": 17.41628420873704, "grad_norm": 9.412980079650879, "learning_rate": 4.306192985438269e-06, "loss": 0.5396, "step": 102460 }, { "epoch": 17.417984021757608, "grad_norm": 12.134913444519043, "learning_rate": 4.303359963737323e-06, "loss": 0.7443, "step": 102470 }, { "epoch": 17.419683834778173, "grad_norm": 21.09309959411621, "learning_rate": 4.300526942036376e-06, "loss": 0.9006, "step": 102480 }, { "epoch": 17.42138364779874, "grad_norm": 17.309358596801758, "learning_rate": 4.2976939203354305e-06, "loss": 0.8071, "step": 102490 }, { "epoch": 17.42308346081931, "grad_norm": 12.525099754333496, "learning_rate": 4.294860898634484e-06, "loss": 0.6176, "step": 102500 }, { "epoch": 17.42478327383988, "grad_norm": 15.177449226379395, "learning_rate": 4.2920278769335375e-06, "loss": 0.738, "step": 102510 }, { "epoch": 17.426483086860447, "grad_norm": 15.858476638793945, "learning_rate": 4.289194855232592e-06, "loss": 0.5799, "step": 102520 }, { "epoch": 17.428182899881012, "grad_norm": 12.375031471252441, "learning_rate": 4.2863618335316446e-06, "loss": 0.6065, "step": 102530 }, { "epoch": 17.42988271290158, "grad_norm": 9.666937828063965, "learning_rate": 4.283528811830698e-06, "loss": 0.8457, "step": 102540 }, { "epoch": 17.43158252592215, "grad_norm": 12.519702911376953, "learning_rate": 4.2806957901297524e-06, "loss": 0.6367, "step": 102550 }, { "epoch": 17.433282338942718, "grad_norm": 17.68781089782715, "learning_rate": 4.277862768428806e-06, "loss": 0.8015, "step": 102560 }, { "epoch": 17.434982151963283, "grad_norm": 13.992474555969238, "learning_rate": 4.2750297467278595e-06, "loss": 0.7957, "step": 102570 }, { "epoch": 17.43668196498385, "grad_norm": 13.67824935913086, "learning_rate": 4.272196725026914e-06, "loss": 0.6815, "step": 102580 }, { "epoch": 17.43838177800442, "grad_norm": 14.811814308166504, "learning_rate": 4.269363703325967e-06, "loss": 0.4673, "step": 102590 }, { "epoch": 17.44008159102499, "grad_norm": 12.921351432800293, "learning_rate": 4.266530681625021e-06, "loss": 0.7054, "step": 102600 }, { "epoch": 17.441781404045557, "grad_norm": 49.0130729675293, "learning_rate": 4.263697659924075e-06, "loss": 0.5941, "step": 102610 }, { "epoch": 17.44348121706612, "grad_norm": 15.631861686706543, "learning_rate": 4.260864638223129e-06, "loss": 0.6642, "step": 102620 }, { "epoch": 17.44518103008669, "grad_norm": 13.716772079467773, "learning_rate": 4.258031616522183e-06, "loss": 0.7392, "step": 102630 }, { "epoch": 17.44688084310726, "grad_norm": 16.29705810546875, "learning_rate": 4.255198594821237e-06, "loss": 0.6401, "step": 102640 }, { "epoch": 17.448580656127827, "grad_norm": 11.691187858581543, "learning_rate": 4.25236557312029e-06, "loss": 0.7184, "step": 102650 }, { "epoch": 17.450280469148392, "grad_norm": 12.229028701782227, "learning_rate": 4.249532551419345e-06, "loss": 0.7478, "step": 102660 }, { "epoch": 17.45198028216896, "grad_norm": 15.187227249145508, "learning_rate": 4.246699529718398e-06, "loss": 0.9611, "step": 102670 }, { "epoch": 17.45368009518953, "grad_norm": 10.10777759552002, "learning_rate": 4.243866508017452e-06, "loss": 0.6597, "step": 102680 }, { "epoch": 17.455379908210098, "grad_norm": 18.346288681030273, "learning_rate": 4.241033486316506e-06, "loss": 0.7893, "step": 102690 }, { "epoch": 17.457079721230663, "grad_norm": 128.91299438476562, "learning_rate": 4.238200464615559e-06, "loss": 0.7633, "step": 102700 }, { "epoch": 17.45877953425123, "grad_norm": 13.744795799255371, "learning_rate": 4.235367442914612e-06, "loss": 0.6111, "step": 102710 }, { "epoch": 17.4604793472718, "grad_norm": 16.18750762939453, "learning_rate": 4.2325344212136666e-06, "loss": 0.7267, "step": 102720 }, { "epoch": 17.46217916029237, "grad_norm": 16.016355514526367, "learning_rate": 4.22970139951272e-06, "loss": 0.7746, "step": 102730 }, { "epoch": 17.463878973312937, "grad_norm": 27.87750244140625, "learning_rate": 4.226868377811774e-06, "loss": 0.5804, "step": 102740 }, { "epoch": 17.465578786333502, "grad_norm": 15.493958473205566, "learning_rate": 4.224035356110828e-06, "loss": 0.9313, "step": 102750 }, { "epoch": 17.46727859935407, "grad_norm": 12.715394973754883, "learning_rate": 4.2212023344098815e-06, "loss": 0.7636, "step": 102760 }, { "epoch": 17.46897841237464, "grad_norm": 15.410639762878418, "learning_rate": 4.218369312708935e-06, "loss": 0.6381, "step": 102770 }, { "epoch": 17.470678225395208, "grad_norm": 9.516653060913086, "learning_rate": 4.215536291007989e-06, "loss": 0.7898, "step": 102780 }, { "epoch": 17.472378038415773, "grad_norm": 13.02995491027832, "learning_rate": 4.212703269307043e-06, "loss": 0.823, "step": 102790 }, { "epoch": 17.47407785143634, "grad_norm": 13.431717872619629, "learning_rate": 4.209870247606096e-06, "loss": 0.7454, "step": 102800 }, { "epoch": 17.47577766445691, "grad_norm": 14.3264799118042, "learning_rate": 4.207037225905151e-06, "loss": 0.7633, "step": 102810 }, { "epoch": 17.47747747747748, "grad_norm": 11.599747657775879, "learning_rate": 4.204204204204204e-06, "loss": 0.7375, "step": 102820 }, { "epoch": 17.479177290498047, "grad_norm": 16.23674201965332, "learning_rate": 4.201371182503259e-06, "loss": 0.7167, "step": 102830 }, { "epoch": 17.480877103518612, "grad_norm": 10.739264488220215, "learning_rate": 4.198538160802312e-06, "loss": 0.6481, "step": 102840 }, { "epoch": 17.48257691653918, "grad_norm": 17.534116744995117, "learning_rate": 4.195705139101366e-06, "loss": 0.6752, "step": 102850 }, { "epoch": 17.48427672955975, "grad_norm": 16.54071617126465, "learning_rate": 4.19287211740042e-06, "loss": 0.7406, "step": 102860 }, { "epoch": 17.485976542580318, "grad_norm": 96.51319885253906, "learning_rate": 4.190039095699474e-06, "loss": 0.7167, "step": 102870 }, { "epoch": 17.487676355600883, "grad_norm": 12.09533405303955, "learning_rate": 4.187206073998526e-06, "loss": 0.8102, "step": 102880 }, { "epoch": 17.48937616862145, "grad_norm": 11.101975440979004, "learning_rate": 4.184373052297581e-06, "loss": 0.6365, "step": 102890 }, { "epoch": 17.49107598164202, "grad_norm": 12.322977066040039, "learning_rate": 4.181540030596634e-06, "loss": 0.7259, "step": 102900 }, { "epoch": 17.492775794662588, "grad_norm": 16.99363136291504, "learning_rate": 4.178707008895688e-06, "loss": 0.5533, "step": 102910 }, { "epoch": 17.494475607683153, "grad_norm": 10.562629699707031, "learning_rate": 4.175873987194742e-06, "loss": 0.7577, "step": 102920 }, { "epoch": 17.49617542070372, "grad_norm": 13.235654830932617, "learning_rate": 4.173040965493796e-06, "loss": 0.6248, "step": 102930 }, { "epoch": 17.49787523372429, "grad_norm": 11.145697593688965, "learning_rate": 4.170207943792849e-06, "loss": 0.6265, "step": 102940 }, { "epoch": 17.49957504674486, "grad_norm": 14.5441255569458, "learning_rate": 4.1673749220919035e-06, "loss": 0.659, "step": 102950 }, { "epoch": 17.501274859765427, "grad_norm": 13.539626121520996, "learning_rate": 4.164541900390957e-06, "loss": 0.6694, "step": 102960 }, { "epoch": 17.502974672785992, "grad_norm": 13.414114952087402, "learning_rate": 4.1617088786900105e-06, "loss": 0.7634, "step": 102970 }, { "epoch": 17.50467448580656, "grad_norm": 13.286773681640625, "learning_rate": 4.158875856989065e-06, "loss": 0.8518, "step": 102980 }, { "epoch": 17.50637429882713, "grad_norm": 14.233899116516113, "learning_rate": 4.156042835288118e-06, "loss": 0.7639, "step": 102990 }, { "epoch": 17.508074111847698, "grad_norm": 13.843079566955566, "learning_rate": 4.153209813587173e-06, "loss": 0.6774, "step": 103000 }, { "epoch": 17.509773924868263, "grad_norm": 14.189385414123535, "learning_rate": 4.150376791886226e-06, "loss": 0.8188, "step": 103010 }, { "epoch": 17.51147373788883, "grad_norm": 14.415648460388184, "learning_rate": 4.14754377018528e-06, "loss": 0.7181, "step": 103020 }, { "epoch": 17.5131735509094, "grad_norm": 11.213665962219238, "learning_rate": 4.144710748484334e-06, "loss": 0.7031, "step": 103030 }, { "epoch": 17.51487336392997, "grad_norm": 13.277970314025879, "learning_rate": 4.141877726783388e-06, "loss": 0.6663, "step": 103040 }, { "epoch": 17.516573176950537, "grad_norm": 12.43310260772705, "learning_rate": 4.13904470508244e-06, "loss": 0.6551, "step": 103050 }, { "epoch": 17.518272989971102, "grad_norm": 17.969985961914062, "learning_rate": 4.136211683381495e-06, "loss": 0.7064, "step": 103060 }, { "epoch": 17.51997280299167, "grad_norm": 13.587836265563965, "learning_rate": 4.133378661680548e-06, "loss": 0.5431, "step": 103070 }, { "epoch": 17.52167261601224, "grad_norm": 19.48087501525879, "learning_rate": 4.130545639979602e-06, "loss": 0.7117, "step": 103080 }, { "epoch": 17.523372429032808, "grad_norm": 12.043667793273926, "learning_rate": 4.127712618278656e-06, "loss": 0.726, "step": 103090 }, { "epoch": 17.525072242053373, "grad_norm": 10.55085563659668, "learning_rate": 4.12487959657771e-06, "loss": 0.7427, "step": 103100 }, { "epoch": 17.52677205507394, "grad_norm": 15.00171184539795, "learning_rate": 4.122046574876763e-06, "loss": 0.6814, "step": 103110 }, { "epoch": 17.52847186809451, "grad_norm": 13.160447120666504, "learning_rate": 4.119213553175818e-06, "loss": 0.6968, "step": 103120 }, { "epoch": 17.53017168111508, "grad_norm": 13.385109901428223, "learning_rate": 4.116380531474871e-06, "loss": 0.7661, "step": 103130 }, { "epoch": 17.531871494135643, "grad_norm": 9.718530654907227, "learning_rate": 4.113547509773925e-06, "loss": 0.689, "step": 103140 }, { "epoch": 17.533571307156212, "grad_norm": 12.914352416992188, "learning_rate": 4.110714488072979e-06, "loss": 0.8397, "step": 103150 }, { "epoch": 17.53527112017678, "grad_norm": 9.281534194946289, "learning_rate": 4.1078814663720325e-06, "loss": 0.79, "step": 103160 }, { "epoch": 17.53697093319735, "grad_norm": 20.655244827270508, "learning_rate": 4.105048444671087e-06, "loss": 0.6084, "step": 103170 }, { "epoch": 17.538670746217917, "grad_norm": 11.416106224060059, "learning_rate": 4.10221542297014e-06, "loss": 0.7118, "step": 103180 }, { "epoch": 17.540370559238482, "grad_norm": 11.91920280456543, "learning_rate": 4.099382401269194e-06, "loss": 0.6534, "step": 103190 }, { "epoch": 17.54207037225905, "grad_norm": 15.200307846069336, "learning_rate": 4.096549379568248e-06, "loss": 0.5754, "step": 103200 }, { "epoch": 17.54377018527962, "grad_norm": 11.48892879486084, "learning_rate": 4.093716357867302e-06, "loss": 0.7281, "step": 103210 }, { "epoch": 17.545469998300188, "grad_norm": 149.5513916015625, "learning_rate": 4.0908833361663545e-06, "loss": 0.6743, "step": 103220 }, { "epoch": 17.547169811320753, "grad_norm": 11.575943946838379, "learning_rate": 4.088050314465409e-06, "loss": 0.641, "step": 103230 }, { "epoch": 17.54886962434132, "grad_norm": 12.186796188354492, "learning_rate": 4.085217292764462e-06, "loss": 0.742, "step": 103240 }, { "epoch": 17.55056943736189, "grad_norm": 12.335294723510742, "learning_rate": 4.082384271063516e-06, "loss": 0.6587, "step": 103250 }, { "epoch": 17.55226925038246, "grad_norm": 15.145692825317383, "learning_rate": 4.07955124936257e-06, "loss": 0.6453, "step": 103260 }, { "epoch": 17.553969063403027, "grad_norm": 11.530280113220215, "learning_rate": 4.076718227661624e-06, "loss": 0.6151, "step": 103270 }, { "epoch": 17.555668876423592, "grad_norm": 17.70909881591797, "learning_rate": 4.073885205960677e-06, "loss": 0.6929, "step": 103280 }, { "epoch": 17.55736868944416, "grad_norm": 12.992460250854492, "learning_rate": 4.071052184259732e-06, "loss": 0.9335, "step": 103290 }, { "epoch": 17.55906850246473, "grad_norm": 11.037459373474121, "learning_rate": 4.068219162558785e-06, "loss": 0.7445, "step": 103300 }, { "epoch": 17.560768315485298, "grad_norm": 15.4296293258667, "learning_rate": 4.065386140857839e-06, "loss": 0.6434, "step": 103310 }, { "epoch": 17.562468128505863, "grad_norm": 11.351753234863281, "learning_rate": 4.062553119156893e-06, "loss": 0.5869, "step": 103320 }, { "epoch": 17.56416794152643, "grad_norm": 13.226190567016602, "learning_rate": 4.059720097455947e-06, "loss": 0.597, "step": 103330 }, { "epoch": 17.565867754547, "grad_norm": 11.992058753967285, "learning_rate": 4.056887075755e-06, "loss": 0.7475, "step": 103340 }, { "epoch": 17.56756756756757, "grad_norm": 14.5780611038208, "learning_rate": 4.0540540540540545e-06, "loss": 0.8, "step": 103350 }, { "epoch": 17.569267380588137, "grad_norm": 13.67213249206543, "learning_rate": 4.051221032353108e-06, "loss": 0.8201, "step": 103360 }, { "epoch": 17.570967193608702, "grad_norm": 10.623649597167969, "learning_rate": 4.048388010652162e-06, "loss": 0.6336, "step": 103370 }, { "epoch": 17.57266700662927, "grad_norm": 11.891805648803711, "learning_rate": 4.045554988951216e-06, "loss": 0.5942, "step": 103380 }, { "epoch": 17.57436681964984, "grad_norm": 13.870966911315918, "learning_rate": 4.042721967250269e-06, "loss": 0.7093, "step": 103390 }, { "epoch": 17.576066632670408, "grad_norm": 13.220288276672363, "learning_rate": 4.039888945549323e-06, "loss": 0.7885, "step": 103400 }, { "epoch": 17.577766445690973, "grad_norm": 21.2207088470459, "learning_rate": 4.0370559238483765e-06, "loss": 0.6203, "step": 103410 }, { "epoch": 17.57946625871154, "grad_norm": 11.363943099975586, "learning_rate": 4.03422290214743e-06, "loss": 0.5214, "step": 103420 }, { "epoch": 17.58116607173211, "grad_norm": 11.817524909973145, "learning_rate": 4.031389880446484e-06, "loss": 0.5919, "step": 103430 }, { "epoch": 17.582865884752678, "grad_norm": 10.172895431518555, "learning_rate": 4.028556858745538e-06, "loss": 0.6758, "step": 103440 }, { "epoch": 17.584565697773243, "grad_norm": 10.524170875549316, "learning_rate": 4.0257238370445914e-06, "loss": 0.7831, "step": 103450 }, { "epoch": 17.58626551079381, "grad_norm": 18.678190231323242, "learning_rate": 4.022890815343646e-06, "loss": 0.6942, "step": 103460 }, { "epoch": 17.58796532381438, "grad_norm": 9.344780921936035, "learning_rate": 4.020057793642699e-06, "loss": 0.6246, "step": 103470 }, { "epoch": 17.58966513683495, "grad_norm": 9.603100776672363, "learning_rate": 4.017224771941753e-06, "loss": 0.6249, "step": 103480 }, { "epoch": 17.591364949855517, "grad_norm": 12.663158416748047, "learning_rate": 4.014391750240807e-06, "loss": 0.7936, "step": 103490 }, { "epoch": 17.593064762876082, "grad_norm": 15.944435119628906, "learning_rate": 4.011558728539861e-06, "loss": 0.4422, "step": 103500 }, { "epoch": 17.59476457589665, "grad_norm": 15.547619819641113, "learning_rate": 4.008725706838914e-06, "loss": 0.7063, "step": 103510 }, { "epoch": 17.59646438891722, "grad_norm": 15.405937194824219, "learning_rate": 4.005892685137969e-06, "loss": 0.6276, "step": 103520 }, { "epoch": 17.598164201937788, "grad_norm": 11.246795654296875, "learning_rate": 4.003059663437022e-06, "loss": 0.8372, "step": 103530 }, { "epoch": 17.599864014958353, "grad_norm": 29.431459426879883, "learning_rate": 4.0002266417360765e-06, "loss": 0.9189, "step": 103540 }, { "epoch": 17.60156382797892, "grad_norm": 13.294694900512695, "learning_rate": 3.99739362003513e-06, "loss": 0.698, "step": 103550 }, { "epoch": 17.60326364099949, "grad_norm": 15.136523246765137, "learning_rate": 3.9945605983341836e-06, "loss": 0.76, "step": 103560 }, { "epoch": 17.60496345402006, "grad_norm": 13.88595962524414, "learning_rate": 3.991727576633237e-06, "loss": 0.6522, "step": 103570 }, { "epoch": 17.606663267040627, "grad_norm": 12.619670867919922, "learning_rate": 3.988894554932291e-06, "loss": 0.8804, "step": 103580 }, { "epoch": 17.608363080061192, "grad_norm": 13.759866714477539, "learning_rate": 3.986061533231344e-06, "loss": 0.9798, "step": 103590 }, { "epoch": 17.61006289308176, "grad_norm": 12.978583335876465, "learning_rate": 3.9832285115303985e-06, "loss": 0.7729, "step": 103600 }, { "epoch": 17.61176270610233, "grad_norm": 13.41532039642334, "learning_rate": 3.980395489829452e-06, "loss": 0.7062, "step": 103610 }, { "epoch": 17.613462519122898, "grad_norm": 12.360429763793945, "learning_rate": 3.9775624681285055e-06, "loss": 0.7614, "step": 103620 }, { "epoch": 17.615162332143463, "grad_norm": 16.471216201782227, "learning_rate": 3.97472944642756e-06, "loss": 0.58, "step": 103630 }, { "epoch": 17.61686214516403, "grad_norm": 16.348501205444336, "learning_rate": 3.9718964247266134e-06, "loss": 0.6624, "step": 103640 }, { "epoch": 17.6185619581846, "grad_norm": 16.760787963867188, "learning_rate": 3.969063403025667e-06, "loss": 0.5761, "step": 103650 }, { "epoch": 17.62026177120517, "grad_norm": 12.818652153015137, "learning_rate": 3.966230381324721e-06, "loss": 0.5829, "step": 103660 }, { "epoch": 17.621961584225737, "grad_norm": 17.231082916259766, "learning_rate": 3.963397359623775e-06, "loss": 0.7515, "step": 103670 }, { "epoch": 17.623661397246302, "grad_norm": 16.235353469848633, "learning_rate": 3.960564337922828e-06, "loss": 0.6675, "step": 103680 }, { "epoch": 17.62536121026687, "grad_norm": 9.962067604064941, "learning_rate": 3.957731316221883e-06, "loss": 0.7241, "step": 103690 }, { "epoch": 17.62706102328744, "grad_norm": 8.346356391906738, "learning_rate": 3.954898294520936e-06, "loss": 0.5129, "step": 103700 }, { "epoch": 17.628760836308007, "grad_norm": 17.1232967376709, "learning_rate": 3.95206527281999e-06, "loss": 0.7167, "step": 103710 }, { "epoch": 17.630460649328572, "grad_norm": 21.154544830322266, "learning_rate": 3.949232251119044e-06, "loss": 0.6628, "step": 103720 }, { "epoch": 17.63216046234914, "grad_norm": 14.553926467895508, "learning_rate": 3.946399229418098e-06, "loss": 0.66, "step": 103730 }, { "epoch": 17.63386027536971, "grad_norm": 11.361396789550781, "learning_rate": 3.943566207717151e-06, "loss": 0.8349, "step": 103740 }, { "epoch": 17.635560088390278, "grad_norm": 15.112435340881348, "learning_rate": 3.940733186016205e-06, "loss": 0.7618, "step": 103750 }, { "epoch": 17.637259901410843, "grad_norm": 21.032262802124023, "learning_rate": 3.937900164315258e-06, "loss": 0.8399, "step": 103760 }, { "epoch": 17.63895971443141, "grad_norm": 16.731794357299805, "learning_rate": 3.935067142614313e-06, "loss": 0.8059, "step": 103770 }, { "epoch": 17.64065952745198, "grad_norm": 16.68196678161621, "learning_rate": 3.932234120913366e-06, "loss": 0.831, "step": 103780 }, { "epoch": 17.64235934047255, "grad_norm": 13.55766773223877, "learning_rate": 3.92940109921242e-06, "loss": 0.8187, "step": 103790 }, { "epoch": 17.644059153493117, "grad_norm": 16.131183624267578, "learning_rate": 3.926568077511474e-06, "loss": 0.8005, "step": 103800 }, { "epoch": 17.645758966513682, "grad_norm": 11.257421493530273, "learning_rate": 3.9237350558105275e-06, "loss": 0.6793, "step": 103810 }, { "epoch": 17.64745877953425, "grad_norm": 17.607053756713867, "learning_rate": 3.920902034109581e-06, "loss": 0.6398, "step": 103820 }, { "epoch": 17.64915859255482, "grad_norm": 15.578592300415039, "learning_rate": 3.9180690124086354e-06, "loss": 0.8427, "step": 103830 }, { "epoch": 17.650858405575388, "grad_norm": 12.52917766571045, "learning_rate": 3.915235990707689e-06, "loss": 0.615, "step": 103840 }, { "epoch": 17.652558218595953, "grad_norm": 16.337465286254883, "learning_rate": 3.9124029690067425e-06, "loss": 0.6103, "step": 103850 }, { "epoch": 17.65425803161652, "grad_norm": 14.152292251586914, "learning_rate": 3.909569947305797e-06, "loss": 0.7013, "step": 103860 }, { "epoch": 17.65595784463709, "grad_norm": 29.694150924682617, "learning_rate": 3.90673692560485e-06, "loss": 0.7289, "step": 103870 }, { "epoch": 17.65765765765766, "grad_norm": 15.328985214233398, "learning_rate": 3.903903903903904e-06, "loss": 0.7146, "step": 103880 }, { "epoch": 17.659357470678227, "grad_norm": 14.686665534973145, "learning_rate": 3.901070882202958e-06, "loss": 0.6894, "step": 103890 }, { "epoch": 17.661057283698792, "grad_norm": 10.100244522094727, "learning_rate": 3.898237860502012e-06, "loss": 0.6703, "step": 103900 }, { "epoch": 17.66275709671936, "grad_norm": 12.45425796508789, "learning_rate": 3.895404838801065e-06, "loss": 0.5981, "step": 103910 }, { "epoch": 17.66445690973993, "grad_norm": 11.266380310058594, "learning_rate": 3.892571817100119e-06, "loss": 0.6759, "step": 103920 }, { "epoch": 17.666156722760498, "grad_norm": 20.897136688232422, "learning_rate": 3.889738795399172e-06, "loss": 0.8186, "step": 103930 }, { "epoch": 17.667856535781063, "grad_norm": 11.05966567993164, "learning_rate": 3.886905773698227e-06, "loss": 0.7633, "step": 103940 }, { "epoch": 17.66955634880163, "grad_norm": 12.633343696594238, "learning_rate": 3.88407275199728e-06, "loss": 0.8879, "step": 103950 }, { "epoch": 17.6712561618222, "grad_norm": 12.343586921691895, "learning_rate": 3.881239730296334e-06, "loss": 0.6246, "step": 103960 }, { "epoch": 17.67295597484277, "grad_norm": 16.978626251220703, "learning_rate": 3.878406708595388e-06, "loss": 0.6589, "step": 103970 }, { "epoch": 17.674655787863337, "grad_norm": 11.974552154541016, "learning_rate": 3.875573686894442e-06, "loss": 0.598, "step": 103980 }, { "epoch": 17.676355600883902, "grad_norm": 11.916306495666504, "learning_rate": 3.872740665193495e-06, "loss": 0.6979, "step": 103990 }, { "epoch": 17.67805541390447, "grad_norm": 21.593774795532227, "learning_rate": 3.8699076434925495e-06, "loss": 0.7141, "step": 104000 }, { "epoch": 17.67975522692504, "grad_norm": 12.056642532348633, "learning_rate": 3.867074621791603e-06, "loss": 0.6489, "step": 104010 }, { "epoch": 17.681455039945607, "grad_norm": 20.028030395507812, "learning_rate": 3.864241600090657e-06, "loss": 0.6244, "step": 104020 }, { "epoch": 17.683154852966172, "grad_norm": 20.408449172973633, "learning_rate": 3.861408578389711e-06, "loss": 0.5591, "step": 104030 }, { "epoch": 17.68485466598674, "grad_norm": 11.14585018157959, "learning_rate": 3.8585755566887645e-06, "loss": 0.7077, "step": 104040 }, { "epoch": 17.68655447900731, "grad_norm": 14.49847412109375, "learning_rate": 3.855742534987818e-06, "loss": 0.655, "step": 104050 }, { "epoch": 17.688254292027878, "grad_norm": 18.82436752319336, "learning_rate": 3.852909513286872e-06, "loss": 0.6065, "step": 104060 }, { "epoch": 17.689954105048443, "grad_norm": 15.221710205078125, "learning_rate": 3.850076491585926e-06, "loss": 0.817, "step": 104070 }, { "epoch": 17.69165391806901, "grad_norm": 15.357320785522461, "learning_rate": 3.847243469884979e-06, "loss": 0.6107, "step": 104080 }, { "epoch": 17.69335373108958, "grad_norm": 13.527127265930176, "learning_rate": 3.844410448184033e-06, "loss": 0.5686, "step": 104090 }, { "epoch": 17.69505354411015, "grad_norm": 13.535289764404297, "learning_rate": 3.8415774264830864e-06, "loss": 0.8084, "step": 104100 }, { "epoch": 17.696753357130717, "grad_norm": 11.819549560546875, "learning_rate": 3.838744404782141e-06, "loss": 0.5728, "step": 104110 }, { "epoch": 17.698453170151282, "grad_norm": 8.252289772033691, "learning_rate": 3.835911383081194e-06, "loss": 0.7342, "step": 104120 }, { "epoch": 17.70015298317185, "grad_norm": 16.347219467163086, "learning_rate": 3.833078361380248e-06, "loss": 0.8094, "step": 104130 }, { "epoch": 17.70185279619242, "grad_norm": 11.038966178894043, "learning_rate": 3.830245339679302e-06, "loss": 0.6389, "step": 104140 }, { "epoch": 17.703552609212988, "grad_norm": 12.563838005065918, "learning_rate": 3.827412317978356e-06, "loss": 0.5612, "step": 104150 }, { "epoch": 17.705252422233553, "grad_norm": 12.234809875488281, "learning_rate": 3.824579296277409e-06, "loss": 0.8157, "step": 104160 }, { "epoch": 17.70695223525412, "grad_norm": 13.56314468383789, "learning_rate": 3.821746274576464e-06, "loss": 0.5829, "step": 104170 }, { "epoch": 17.70865204827469, "grad_norm": 17.412057876586914, "learning_rate": 3.818913252875517e-06, "loss": 0.613, "step": 104180 }, { "epoch": 17.71035186129526, "grad_norm": 12.337813377380371, "learning_rate": 3.816080231174571e-06, "loss": 0.6132, "step": 104190 }, { "epoch": 17.712051674315827, "grad_norm": 14.580780029296875, "learning_rate": 3.8132472094736246e-06, "loss": 0.7471, "step": 104200 }, { "epoch": 17.713751487336392, "grad_norm": 10.944339752197266, "learning_rate": 3.8104141877726786e-06, "loss": 0.5685, "step": 104210 }, { "epoch": 17.71545130035696, "grad_norm": 11.11061954498291, "learning_rate": 3.8075811660717325e-06, "loss": 0.6604, "step": 104220 }, { "epoch": 17.71715111337753, "grad_norm": 16.778305053710938, "learning_rate": 3.8047481443707865e-06, "loss": 0.8119, "step": 104230 }, { "epoch": 17.718850926398098, "grad_norm": 15.064587593078613, "learning_rate": 3.80191512266984e-06, "loss": 0.6757, "step": 104240 }, { "epoch": 17.720550739418663, "grad_norm": 14.762970924377441, "learning_rate": 3.799082100968894e-06, "loss": 0.7093, "step": 104250 }, { "epoch": 17.72225055243923, "grad_norm": 13.645492553710938, "learning_rate": 3.796249079267947e-06, "loss": 0.7487, "step": 104260 }, { "epoch": 17.7239503654598, "grad_norm": 20.745853424072266, "learning_rate": 3.7934160575670006e-06, "loss": 0.6846, "step": 104270 }, { "epoch": 17.725650178480368, "grad_norm": 12.786123275756836, "learning_rate": 3.7905830358660545e-06, "loss": 0.6691, "step": 104280 }, { "epoch": 17.727349991500937, "grad_norm": 10.046359062194824, "learning_rate": 3.7877500141651084e-06, "loss": 0.6606, "step": 104290 }, { "epoch": 17.7290498045215, "grad_norm": 15.504886627197266, "learning_rate": 3.784916992464162e-06, "loss": 0.652, "step": 104300 }, { "epoch": 17.73074961754207, "grad_norm": 12.34064769744873, "learning_rate": 3.782083970763216e-06, "loss": 0.8591, "step": 104310 }, { "epoch": 17.73244943056264, "grad_norm": 13.33407211303711, "learning_rate": 3.77925094906227e-06, "loss": 0.7972, "step": 104320 }, { "epoch": 17.734149243583207, "grad_norm": 10.169025421142578, "learning_rate": 3.776417927361324e-06, "loss": 0.8072, "step": 104330 }, { "epoch": 17.735849056603772, "grad_norm": 12.680291175842285, "learning_rate": 3.7735849056603773e-06, "loss": 0.7535, "step": 104340 }, { "epoch": 17.73754886962434, "grad_norm": 14.979872703552246, "learning_rate": 3.7707518839594313e-06, "loss": 0.813, "step": 104350 }, { "epoch": 17.73924868264491, "grad_norm": 14.689408302307129, "learning_rate": 3.7679188622584852e-06, "loss": 0.6626, "step": 104360 }, { "epoch": 17.740948495665478, "grad_norm": 12.768845558166504, "learning_rate": 3.7650858405575387e-06, "loss": 0.6964, "step": 104370 }, { "epoch": 17.742648308686043, "grad_norm": 14.83143424987793, "learning_rate": 3.7622528188565927e-06, "loss": 0.7585, "step": 104380 }, { "epoch": 17.74434812170661, "grad_norm": 16.749500274658203, "learning_rate": 3.7594197971556466e-06, "loss": 0.6715, "step": 104390 }, { "epoch": 17.74604793472718, "grad_norm": 10.611236572265625, "learning_rate": 3.7565867754547006e-06, "loss": 0.5698, "step": 104400 }, { "epoch": 17.74774774774775, "grad_norm": 12.666668891906738, "learning_rate": 3.753753753753754e-06, "loss": 0.6779, "step": 104410 }, { "epoch": 17.749447560768317, "grad_norm": 25.74435806274414, "learning_rate": 3.750920732052808e-06, "loss": 0.6808, "step": 104420 }, { "epoch": 17.751147373788882, "grad_norm": 14.345909118652344, "learning_rate": 3.7480877103518616e-06, "loss": 0.6944, "step": 104430 }, { "epoch": 17.75284718680945, "grad_norm": 13.912409782409668, "learning_rate": 3.745254688650915e-06, "loss": 0.8076, "step": 104440 }, { "epoch": 17.75454699983002, "grad_norm": 11.353962898254395, "learning_rate": 3.742421666949969e-06, "loss": 0.6454, "step": 104450 }, { "epoch": 17.756246812850588, "grad_norm": 11.463956832885742, "learning_rate": 3.739588645249023e-06, "loss": 0.7642, "step": 104460 }, { "epoch": 17.757946625871153, "grad_norm": 14.926384925842285, "learning_rate": 3.7367556235480765e-06, "loss": 0.8175, "step": 104470 }, { "epoch": 17.75964643889172, "grad_norm": 16.75812530517578, "learning_rate": 3.73392260184713e-06, "loss": 0.6519, "step": 104480 }, { "epoch": 17.76134625191229, "grad_norm": 74.79618835449219, "learning_rate": 3.731089580146184e-06, "loss": 0.7766, "step": 104490 }, { "epoch": 17.76304606493286, "grad_norm": 10.267406463623047, "learning_rate": 3.728256558445238e-06, "loss": 0.6871, "step": 104500 }, { "epoch": 17.764745877953423, "grad_norm": 11.391715049743652, "learning_rate": 3.7254235367442914e-06, "loss": 0.7552, "step": 104510 }, { "epoch": 17.766445690973992, "grad_norm": 14.45934009552002, "learning_rate": 3.7225905150433454e-06, "loss": 0.6278, "step": 104520 }, { "epoch": 17.76814550399456, "grad_norm": 16.486774444580078, "learning_rate": 3.7197574933423993e-06, "loss": 0.621, "step": 104530 }, { "epoch": 17.76984531701513, "grad_norm": 14.338544845581055, "learning_rate": 3.716924471641453e-06, "loss": 0.668, "step": 104540 }, { "epoch": 17.771545130035697, "grad_norm": 12.249195098876953, "learning_rate": 3.7140914499405068e-06, "loss": 0.4811, "step": 104550 }, { "epoch": 17.773244943056262, "grad_norm": 12.642634391784668, "learning_rate": 3.7112584282395603e-06, "loss": 0.7154, "step": 104560 }, { "epoch": 17.77494475607683, "grad_norm": 14.684289932250977, "learning_rate": 3.708425406538614e-06, "loss": 0.7893, "step": 104570 }, { "epoch": 17.7766445690974, "grad_norm": 13.97522258758545, "learning_rate": 3.7055923848376678e-06, "loss": 0.6563, "step": 104580 }, { "epoch": 17.778344382117968, "grad_norm": 10.08414077758789, "learning_rate": 3.7027593631367217e-06, "loss": 0.6158, "step": 104590 }, { "epoch": 17.780044195138533, "grad_norm": 13.223708152770996, "learning_rate": 3.6999263414357757e-06, "loss": 0.6142, "step": 104600 }, { "epoch": 17.7817440081591, "grad_norm": 11.41716194152832, "learning_rate": 3.697093319734829e-06, "loss": 0.7613, "step": 104610 }, { "epoch": 17.78344382117967, "grad_norm": 11.76408576965332, "learning_rate": 3.694260298033883e-06, "loss": 0.8266, "step": 104620 }, { "epoch": 17.78514363420024, "grad_norm": 12.52025318145752, "learning_rate": 3.691427276332937e-06, "loss": 0.6729, "step": 104630 }, { "epoch": 17.786843447220807, "grad_norm": 10.330828666687012, "learning_rate": 3.6885942546319906e-06, "loss": 0.8038, "step": 104640 }, { "epoch": 17.788543260241372, "grad_norm": 16.729116439819336, "learning_rate": 3.685761232931044e-06, "loss": 0.6572, "step": 104650 }, { "epoch": 17.79024307326194, "grad_norm": 12.835416793823242, "learning_rate": 3.682928211230098e-06, "loss": 0.6602, "step": 104660 }, { "epoch": 17.79194288628251, "grad_norm": 13.200695991516113, "learning_rate": 3.680095189529152e-06, "loss": 0.6011, "step": 104670 }, { "epoch": 17.793642699303078, "grad_norm": 10.895194053649902, "learning_rate": 3.6772621678282055e-06, "loss": 0.6784, "step": 104680 }, { "epoch": 17.795342512323643, "grad_norm": 13.638483047485352, "learning_rate": 3.6744291461272595e-06, "loss": 0.8442, "step": 104690 }, { "epoch": 17.79704232534421, "grad_norm": 13.905214309692383, "learning_rate": 3.6715961244263134e-06, "loss": 0.6856, "step": 104700 }, { "epoch": 17.79874213836478, "grad_norm": 14.869966506958008, "learning_rate": 3.668763102725367e-06, "loss": 0.8225, "step": 104710 }, { "epoch": 17.80044195138535, "grad_norm": 11.866884231567383, "learning_rate": 3.665930081024421e-06, "loss": 0.6359, "step": 104720 }, { "epoch": 17.802141764405917, "grad_norm": 15.110898971557617, "learning_rate": 3.663097059323475e-06, "loss": 0.8832, "step": 104730 }, { "epoch": 17.803841577426482, "grad_norm": 14.186384201049805, "learning_rate": 3.660264037622528e-06, "loss": 0.609, "step": 104740 }, { "epoch": 17.80554139044705, "grad_norm": 14.31552791595459, "learning_rate": 3.657431015921582e-06, "loss": 0.6774, "step": 104750 }, { "epoch": 17.80724120346762, "grad_norm": 15.74963092803955, "learning_rate": 3.654597994220636e-06, "loss": 0.8022, "step": 104760 }, { "epoch": 17.808941016488188, "grad_norm": 19.74069595336914, "learning_rate": 3.6517649725196898e-06, "loss": 0.7095, "step": 104770 }, { "epoch": 17.810640829508753, "grad_norm": 10.515810012817383, "learning_rate": 3.6489319508187433e-06, "loss": 0.7597, "step": 104780 }, { "epoch": 17.81234064252932, "grad_norm": 16.336538314819336, "learning_rate": 3.6460989291177972e-06, "loss": 0.6277, "step": 104790 }, { "epoch": 17.81404045554989, "grad_norm": 32.48435974121094, "learning_rate": 3.643265907416851e-06, "loss": 0.5681, "step": 104800 }, { "epoch": 17.81574026857046, "grad_norm": 14.268546104431152, "learning_rate": 3.6404328857159047e-06, "loss": 0.7713, "step": 104810 }, { "epoch": 17.817440081591023, "grad_norm": 9.485014915466309, "learning_rate": 3.6375998640149582e-06, "loss": 0.6567, "step": 104820 }, { "epoch": 17.81913989461159, "grad_norm": 11.428064346313477, "learning_rate": 3.634766842314012e-06, "loss": 0.7571, "step": 104830 }, { "epoch": 17.82083970763216, "grad_norm": 14.911940574645996, "learning_rate": 3.6319338206130657e-06, "loss": 0.59, "step": 104840 }, { "epoch": 17.82253952065273, "grad_norm": 12.477972984313965, "learning_rate": 3.6291007989121196e-06, "loss": 0.7875, "step": 104850 }, { "epoch": 17.824239333673297, "grad_norm": 10.92928409576416, "learning_rate": 3.6262677772111736e-06, "loss": 0.6838, "step": 104860 }, { "epoch": 17.825939146693862, "grad_norm": 27.990978240966797, "learning_rate": 3.6234347555102275e-06, "loss": 0.6621, "step": 104870 }, { "epoch": 17.82763895971443, "grad_norm": 10.727097511291504, "learning_rate": 3.620601733809281e-06, "loss": 0.9655, "step": 104880 }, { "epoch": 17.829338772735, "grad_norm": 15.364718437194824, "learning_rate": 3.617768712108335e-06, "loss": 0.735, "step": 104890 }, { "epoch": 17.831038585755568, "grad_norm": 30.664642333984375, "learning_rate": 3.614935690407389e-06, "loss": 0.5808, "step": 104900 }, { "epoch": 17.832738398776133, "grad_norm": 11.479806900024414, "learning_rate": 3.612102668706442e-06, "loss": 0.7766, "step": 104910 }, { "epoch": 17.8344382117967, "grad_norm": 15.442485809326172, "learning_rate": 3.609269647005496e-06, "loss": 0.7409, "step": 104920 }, { "epoch": 17.83613802481727, "grad_norm": 18.653499603271484, "learning_rate": 3.60643662530455e-06, "loss": 0.5872, "step": 104930 }, { "epoch": 17.83783783783784, "grad_norm": 13.689105987548828, "learning_rate": 3.6036036036036035e-06, "loss": 0.5229, "step": 104940 }, { "epoch": 17.839537650858407, "grad_norm": 18.233762741088867, "learning_rate": 3.6007705819026574e-06, "loss": 0.8037, "step": 104950 }, { "epoch": 17.841237463878972, "grad_norm": 13.145312309265137, "learning_rate": 3.5979375602017113e-06, "loss": 0.7363, "step": 104960 }, { "epoch": 17.84293727689954, "grad_norm": 11.34133243560791, "learning_rate": 3.5951045385007653e-06, "loss": 0.7381, "step": 104970 }, { "epoch": 17.84463708992011, "grad_norm": 11.480025291442871, "learning_rate": 3.592271516799819e-06, "loss": 0.8302, "step": 104980 }, { "epoch": 17.846336902940678, "grad_norm": 12.656524658203125, "learning_rate": 3.5894384950988723e-06, "loss": 0.7202, "step": 104990 }, { "epoch": 17.848036715961243, "grad_norm": 16.547489166259766, "learning_rate": 3.5866054733979263e-06, "loss": 0.737, "step": 105000 }, { "epoch": 17.84973652898181, "grad_norm": 15.60332202911377, "learning_rate": 3.58377245169698e-06, "loss": 0.7543, "step": 105010 }, { "epoch": 17.85143634200238, "grad_norm": 14.234878540039062, "learning_rate": 3.5809394299960337e-06, "loss": 0.7257, "step": 105020 }, { "epoch": 17.85313615502295, "grad_norm": 12.042972564697266, "learning_rate": 3.5781064082950877e-06, "loss": 0.6696, "step": 105030 }, { "epoch": 17.854835968043517, "grad_norm": 14.010059356689453, "learning_rate": 3.5752733865941416e-06, "loss": 0.5012, "step": 105040 }, { "epoch": 17.856535781064082, "grad_norm": 11.880012512207031, "learning_rate": 3.572440364893195e-06, "loss": 0.6528, "step": 105050 }, { "epoch": 17.85823559408465, "grad_norm": 15.322239875793457, "learning_rate": 3.569607343192249e-06, "loss": 0.6367, "step": 105060 }, { "epoch": 17.85993540710522, "grad_norm": 80.05103302001953, "learning_rate": 3.566774321491303e-06, "loss": 0.784, "step": 105070 }, { "epoch": 17.861635220125788, "grad_norm": 10.412836074829102, "learning_rate": 3.563941299790356e-06, "loss": 0.5794, "step": 105080 }, { "epoch": 17.863335033146353, "grad_norm": 21.371959686279297, "learning_rate": 3.56110827808941e-06, "loss": 0.7181, "step": 105090 }, { "epoch": 17.86503484616692, "grad_norm": 8.720879554748535, "learning_rate": 3.558275256388464e-06, "loss": 0.7745, "step": 105100 }, { "epoch": 17.86673465918749, "grad_norm": 11.196144104003906, "learning_rate": 3.5554422346875176e-06, "loss": 0.6549, "step": 105110 }, { "epoch": 17.868434472208058, "grad_norm": 14.017193794250488, "learning_rate": 3.5526092129865715e-06, "loss": 0.6882, "step": 105120 }, { "epoch": 17.870134285228623, "grad_norm": 11.374069213867188, "learning_rate": 3.5497761912856255e-06, "loss": 0.6629, "step": 105130 }, { "epoch": 17.87183409824919, "grad_norm": 14.548428535461426, "learning_rate": 3.5469431695846794e-06, "loss": 0.6377, "step": 105140 }, { "epoch": 17.87353391126976, "grad_norm": 10.91201114654541, "learning_rate": 3.544110147883733e-06, "loss": 0.6664, "step": 105150 }, { "epoch": 17.87523372429033, "grad_norm": 12.516351699829102, "learning_rate": 3.541277126182787e-06, "loss": 0.7853, "step": 105160 }, { "epoch": 17.876933537310897, "grad_norm": 9.977371215820312, "learning_rate": 3.5384441044818404e-06, "loss": 0.6675, "step": 105170 }, { "epoch": 17.878633350331462, "grad_norm": 17.598163604736328, "learning_rate": 3.535611082780894e-06, "loss": 0.8089, "step": 105180 }, { "epoch": 17.88033316335203, "grad_norm": 26.424732208251953, "learning_rate": 3.532778061079948e-06, "loss": 0.6878, "step": 105190 }, { "epoch": 17.8820329763726, "grad_norm": 13.261884689331055, "learning_rate": 3.529945039379002e-06, "loss": 0.6188, "step": 105200 }, { "epoch": 17.883732789393168, "grad_norm": 17.243396759033203, "learning_rate": 3.5271120176780553e-06, "loss": 0.7134, "step": 105210 }, { "epoch": 17.885432602413733, "grad_norm": 14.67490291595459, "learning_rate": 3.5242789959771093e-06, "loss": 0.7658, "step": 105220 }, { "epoch": 17.8871324154343, "grad_norm": 17.043109893798828, "learning_rate": 3.5214459742761632e-06, "loss": 0.6624, "step": 105230 }, { "epoch": 17.88883222845487, "grad_norm": 14.323284149169922, "learning_rate": 3.518612952575217e-06, "loss": 0.8225, "step": 105240 }, { "epoch": 17.89053204147544, "grad_norm": 14.189586639404297, "learning_rate": 3.5157799308742703e-06, "loss": 0.7005, "step": 105250 }, { "epoch": 17.892231854496007, "grad_norm": 13.757676124572754, "learning_rate": 3.512946909173324e-06, "loss": 0.7001, "step": 105260 }, { "epoch": 17.893931667516572, "grad_norm": 16.591550827026367, "learning_rate": 3.510113887472378e-06, "loss": 0.7074, "step": 105270 }, { "epoch": 17.89563148053714, "grad_norm": 14.605254173278809, "learning_rate": 3.5072808657714317e-06, "loss": 0.7159, "step": 105280 }, { "epoch": 17.89733129355771, "grad_norm": 10.32448959350586, "learning_rate": 3.5044478440704856e-06, "loss": 0.7734, "step": 105290 }, { "epoch": 17.899031106578278, "grad_norm": 14.767251968383789, "learning_rate": 3.5016148223695396e-06, "loss": 0.6191, "step": 105300 }, { "epoch": 17.900730919598843, "grad_norm": 10.428812026977539, "learning_rate": 3.4987818006685935e-06, "loss": 0.7296, "step": 105310 }, { "epoch": 17.90243073261941, "grad_norm": 15.465906143188477, "learning_rate": 3.495948778967647e-06, "loss": 0.6518, "step": 105320 }, { "epoch": 17.90413054563998, "grad_norm": 14.303313255310059, "learning_rate": 3.493115757266701e-06, "loss": 0.5531, "step": 105330 }, { "epoch": 17.90583035866055, "grad_norm": 11.091917991638184, "learning_rate": 3.4902827355657545e-06, "loss": 0.7465, "step": 105340 }, { "epoch": 17.907530171681117, "grad_norm": 12.267991065979004, "learning_rate": 3.487449713864808e-06, "loss": 0.7267, "step": 105350 }, { "epoch": 17.909229984701682, "grad_norm": 15.59743595123291, "learning_rate": 3.484616692163862e-06, "loss": 0.6297, "step": 105360 }, { "epoch": 17.91092979772225, "grad_norm": 12.674735069274902, "learning_rate": 3.481783670462916e-06, "loss": 0.6607, "step": 105370 }, { "epoch": 17.91262961074282, "grad_norm": 9.259995460510254, "learning_rate": 3.4789506487619694e-06, "loss": 0.6359, "step": 105380 }, { "epoch": 17.914329423763387, "grad_norm": 14.789612770080566, "learning_rate": 3.4761176270610234e-06, "loss": 0.7079, "step": 105390 }, { "epoch": 17.916029236783952, "grad_norm": 11.774147987365723, "learning_rate": 3.4732846053600773e-06, "loss": 0.7398, "step": 105400 }, { "epoch": 17.91772904980452, "grad_norm": 12.902629852294922, "learning_rate": 3.4704515836591313e-06, "loss": 0.6496, "step": 105410 }, { "epoch": 17.91942886282509, "grad_norm": 15.58703899383545, "learning_rate": 3.4676185619581848e-06, "loss": 0.7738, "step": 105420 }, { "epoch": 17.921128675845658, "grad_norm": 11.563376426696777, "learning_rate": 3.4647855402572383e-06, "loss": 0.6899, "step": 105430 }, { "epoch": 17.922828488866223, "grad_norm": 14.070679664611816, "learning_rate": 3.4619525185562923e-06, "loss": 0.8642, "step": 105440 }, { "epoch": 17.92452830188679, "grad_norm": 13.673259735107422, "learning_rate": 3.4591194968553458e-06, "loss": 0.7192, "step": 105450 }, { "epoch": 17.92622811490736, "grad_norm": 11.899333953857422, "learning_rate": 3.4562864751543997e-06, "loss": 0.9407, "step": 105460 }, { "epoch": 17.92792792792793, "grad_norm": 8.873486518859863, "learning_rate": 3.4534534534534537e-06, "loss": 0.7455, "step": 105470 }, { "epoch": 17.929627740948497, "grad_norm": 24.922866821289062, "learning_rate": 3.450620431752507e-06, "loss": 0.7286, "step": 105480 }, { "epoch": 17.931327553969062, "grad_norm": 11.24691104888916, "learning_rate": 3.447787410051561e-06, "loss": 0.6197, "step": 105490 }, { "epoch": 17.93302736698963, "grad_norm": 10.793559074401855, "learning_rate": 3.444954388350615e-06, "loss": 0.5555, "step": 105500 }, { "epoch": 17.9347271800102, "grad_norm": 13.219667434692383, "learning_rate": 3.4421213666496686e-06, "loss": 0.6057, "step": 105510 }, { "epoch": 17.936426993030768, "grad_norm": 13.024702072143555, "learning_rate": 3.439288344948722e-06, "loss": 0.7829, "step": 105520 }, { "epoch": 17.938126806051333, "grad_norm": 56.50273132324219, "learning_rate": 3.436455323247776e-06, "loss": 0.7211, "step": 105530 }, { "epoch": 17.9398266190719, "grad_norm": 14.755928993225098, "learning_rate": 3.43362230154683e-06, "loss": 0.7787, "step": 105540 }, { "epoch": 17.94152643209247, "grad_norm": 18.590919494628906, "learning_rate": 3.4307892798458835e-06, "loss": 0.8032, "step": 105550 }, { "epoch": 17.94322624511304, "grad_norm": 25.028886795043945, "learning_rate": 3.4279562581449375e-06, "loss": 0.688, "step": 105560 }, { "epoch": 17.944926058133607, "grad_norm": 12.174605369567871, "learning_rate": 3.4251232364439914e-06, "loss": 0.6867, "step": 105570 }, { "epoch": 17.946625871154172, "grad_norm": 21.267141342163086, "learning_rate": 3.422290214743045e-06, "loss": 0.8066, "step": 105580 }, { "epoch": 17.94832568417474, "grad_norm": 12.618656158447266, "learning_rate": 3.419457193042099e-06, "loss": 0.7532, "step": 105590 }, { "epoch": 17.95002549719531, "grad_norm": 15.410808563232422, "learning_rate": 3.4166241713411524e-06, "loss": 0.6336, "step": 105600 }, { "epoch": 17.951725310215878, "grad_norm": 13.343132972717285, "learning_rate": 3.4137911496402064e-06, "loss": 0.7135, "step": 105610 }, { "epoch": 17.953425123236443, "grad_norm": 15.196329116821289, "learning_rate": 3.41095812793926e-06, "loss": 0.6828, "step": 105620 }, { "epoch": 17.95512493625701, "grad_norm": 19.779146194458008, "learning_rate": 3.408125106238314e-06, "loss": 0.5896, "step": 105630 }, { "epoch": 17.95682474927758, "grad_norm": 13.053189277648926, "learning_rate": 3.4052920845373678e-06, "loss": 0.7855, "step": 105640 }, { "epoch": 17.958524562298148, "grad_norm": 14.996746063232422, "learning_rate": 3.4024590628364213e-06, "loss": 0.5627, "step": 105650 }, { "epoch": 17.960224375318717, "grad_norm": 14.061668395996094, "learning_rate": 3.3996260411354752e-06, "loss": 0.8121, "step": 105660 }, { "epoch": 17.96192418833928, "grad_norm": 26.783655166625977, "learning_rate": 3.396793019434529e-06, "loss": 0.5987, "step": 105670 }, { "epoch": 17.96362400135985, "grad_norm": 11.823741912841797, "learning_rate": 3.393959997733583e-06, "loss": 0.7525, "step": 105680 }, { "epoch": 17.96532381438042, "grad_norm": 11.497852325439453, "learning_rate": 3.3911269760326362e-06, "loss": 0.8249, "step": 105690 }, { "epoch": 17.967023627400987, "grad_norm": 12.565598487854004, "learning_rate": 3.38829395433169e-06, "loss": 0.8364, "step": 105700 }, { "epoch": 17.968723440421552, "grad_norm": 11.797224998474121, "learning_rate": 3.385460932630744e-06, "loss": 0.723, "step": 105710 }, { "epoch": 17.97042325344212, "grad_norm": 11.367481231689453, "learning_rate": 3.3826279109297976e-06, "loss": 0.6882, "step": 105720 }, { "epoch": 17.97212306646269, "grad_norm": 13.96083927154541, "learning_rate": 3.3797948892288516e-06, "loss": 0.6252, "step": 105730 }, { "epoch": 17.973822879483258, "grad_norm": 14.617650032043457, "learning_rate": 3.3769618675279055e-06, "loss": 0.8119, "step": 105740 }, { "epoch": 17.975522692503823, "grad_norm": 12.558250427246094, "learning_rate": 3.374128845826959e-06, "loss": 0.6555, "step": 105750 }, { "epoch": 17.97722250552439, "grad_norm": 12.95213508605957, "learning_rate": 3.371295824126013e-06, "loss": 0.7106, "step": 105760 }, { "epoch": 17.97892231854496, "grad_norm": 89.66090393066406, "learning_rate": 3.3684628024250665e-06, "loss": 0.6674, "step": 105770 }, { "epoch": 17.98062213156553, "grad_norm": 15.876022338867188, "learning_rate": 3.3656297807241205e-06, "loss": 0.6496, "step": 105780 }, { "epoch": 17.982321944586097, "grad_norm": 19.39858627319336, "learning_rate": 3.362796759023174e-06, "loss": 0.7342, "step": 105790 }, { "epoch": 17.984021757606662, "grad_norm": 28.263473510742188, "learning_rate": 3.359963737322228e-06, "loss": 0.6603, "step": 105800 }, { "epoch": 17.98572157062723, "grad_norm": 11.66361141204834, "learning_rate": 3.357130715621282e-06, "loss": 0.6301, "step": 105810 }, { "epoch": 17.9874213836478, "grad_norm": 11.357054710388184, "learning_rate": 3.3542976939203354e-06, "loss": 0.7112, "step": 105820 }, { "epoch": 17.989121196668368, "grad_norm": 11.209905624389648, "learning_rate": 3.3514646722193893e-06, "loss": 0.6001, "step": 105830 }, { "epoch": 17.990821009688933, "grad_norm": 14.595479965209961, "learning_rate": 3.3486316505184433e-06, "loss": 0.6395, "step": 105840 }, { "epoch": 17.9925208227095, "grad_norm": 16.812231063842773, "learning_rate": 3.345798628817497e-06, "loss": 0.5388, "step": 105850 }, { "epoch": 17.99422063573007, "grad_norm": 14.639120101928711, "learning_rate": 3.3429656071165503e-06, "loss": 0.6062, "step": 105860 }, { "epoch": 17.99592044875064, "grad_norm": 24.554298400878906, "learning_rate": 3.3401325854156043e-06, "loss": 0.7897, "step": 105870 }, { "epoch": 17.997620261771207, "grad_norm": 17.50150489807129, "learning_rate": 3.3372995637146582e-06, "loss": 0.7056, "step": 105880 }, { "epoch": 17.999320074791772, "grad_norm": 12.815673828125, "learning_rate": 3.3344665420137117e-06, "loss": 0.7878, "step": 105890 }, { "epoch": 18.0, "eval_cer": 1.0, "eval_loss": 2.630465030670166, "eval_runtime": 1982.7595, "eval_samples_per_second": 0.238, "eval_steps_per_second": 0.238, "step": 105894 }, { "epoch": 18.00101988781234, "grad_norm": 11.000694274902344, "learning_rate": 3.3316335203127657e-06, "loss": 0.6827, "step": 105900 }, { "epoch": 18.00271970083291, "grad_norm": 20.2462158203125, "learning_rate": 3.3288004986118196e-06, "loss": 0.6738, "step": 105910 }, { "epoch": 18.004419513853477, "grad_norm": 10.19456672668457, "learning_rate": 3.325967476910873e-06, "loss": 0.5156, "step": 105920 }, { "epoch": 18.006119326874042, "grad_norm": 14.905196189880371, "learning_rate": 3.323134455209927e-06, "loss": 0.6324, "step": 105930 }, { "epoch": 18.00781913989461, "grad_norm": 20.51091766357422, "learning_rate": 3.320301433508981e-06, "loss": 0.7908, "step": 105940 }, { "epoch": 18.00951895291518, "grad_norm": 11.675190925598145, "learning_rate": 3.3174684118080346e-06, "loss": 0.6534, "step": 105950 }, { "epoch": 18.011218765935748, "grad_norm": 9.86892318725586, "learning_rate": 3.314635390107088e-06, "loss": 0.4748, "step": 105960 }, { "epoch": 18.012918578956313, "grad_norm": 18.010103225708008, "learning_rate": 3.311802368406142e-06, "loss": 0.6599, "step": 105970 }, { "epoch": 18.01461839197688, "grad_norm": 13.685086250305176, "learning_rate": 3.308969346705196e-06, "loss": 0.949, "step": 105980 }, { "epoch": 18.01631820499745, "grad_norm": 12.689372062683105, "learning_rate": 3.3061363250042495e-06, "loss": 0.7518, "step": 105990 }, { "epoch": 18.01801801801802, "grad_norm": 21.6133975982666, "learning_rate": 3.3033033033033035e-06, "loss": 0.5938, "step": 106000 }, { "epoch": 18.019717831038587, "grad_norm": 13.711371421813965, "learning_rate": 3.3004702816023574e-06, "loss": 0.7732, "step": 106010 }, { "epoch": 18.021417644059152, "grad_norm": 10.325464248657227, "learning_rate": 3.297637259901411e-06, "loss": 0.67, "step": 106020 }, { "epoch": 18.02311745707972, "grad_norm": 19.346036911010742, "learning_rate": 3.2948042382004644e-06, "loss": 0.998, "step": 106030 }, { "epoch": 18.02481727010029, "grad_norm": 17.494190216064453, "learning_rate": 3.2919712164995184e-06, "loss": 0.711, "step": 106040 }, { "epoch": 18.026517083120858, "grad_norm": 12.022750854492188, "learning_rate": 3.2891381947985723e-06, "loss": 0.6945, "step": 106050 }, { "epoch": 18.028216896141423, "grad_norm": 14.038985252380371, "learning_rate": 3.286305173097626e-06, "loss": 0.6516, "step": 106060 }, { "epoch": 18.02991670916199, "grad_norm": 15.566807746887207, "learning_rate": 3.28347215139668e-06, "loss": 0.666, "step": 106070 }, { "epoch": 18.03161652218256, "grad_norm": 9.108607292175293, "learning_rate": 3.2806391296957337e-06, "loss": 0.6789, "step": 106080 }, { "epoch": 18.03331633520313, "grad_norm": 19.5233154296875, "learning_rate": 3.2778061079947873e-06, "loss": 0.6059, "step": 106090 }, { "epoch": 18.035016148223697, "grad_norm": 9.745096206665039, "learning_rate": 3.274973086293841e-06, "loss": 0.5971, "step": 106100 }, { "epoch": 18.036715961244262, "grad_norm": 17.614845275878906, "learning_rate": 3.272140064592895e-06, "loss": 0.7704, "step": 106110 }, { "epoch": 18.03841577426483, "grad_norm": 12.784191131591797, "learning_rate": 3.2693070428919483e-06, "loss": 0.8307, "step": 106120 }, { "epoch": 18.0401155872854, "grad_norm": 18.407987594604492, "learning_rate": 3.266474021191002e-06, "loss": 0.6986, "step": 106130 }, { "epoch": 18.041815400305968, "grad_norm": 22.38176727294922, "learning_rate": 3.263640999490056e-06, "loss": 0.8192, "step": 106140 }, { "epoch": 18.043515213326533, "grad_norm": 16.018383026123047, "learning_rate": 3.26080797778911e-06, "loss": 0.5952, "step": 106150 }, { "epoch": 18.0452150263471, "grad_norm": 23.71875762939453, "learning_rate": 3.2579749560881636e-06, "loss": 0.6583, "step": 106160 }, { "epoch": 18.04691483936767, "grad_norm": 12.802960395812988, "learning_rate": 3.2551419343872176e-06, "loss": 0.6098, "step": 106170 }, { "epoch": 18.04861465238824, "grad_norm": 18.16434097290039, "learning_rate": 3.2523089126862715e-06, "loss": 0.6129, "step": 106180 }, { "epoch": 18.050314465408807, "grad_norm": 11.592076301574707, "learning_rate": 3.249475890985325e-06, "loss": 0.6283, "step": 106190 }, { "epoch": 18.052014278429372, "grad_norm": 9.579781532287598, "learning_rate": 3.246642869284379e-06, "loss": 0.7019, "step": 106200 }, { "epoch": 18.05371409144994, "grad_norm": 10.969815254211426, "learning_rate": 3.2438098475834325e-06, "loss": 0.6207, "step": 106210 }, { "epoch": 18.05541390447051, "grad_norm": 15.177177429199219, "learning_rate": 3.2409768258824864e-06, "loss": 0.6988, "step": 106220 }, { "epoch": 18.057113717491077, "grad_norm": 19.62632179260254, "learning_rate": 3.23814380418154e-06, "loss": 0.6056, "step": 106230 }, { "epoch": 18.058813530511642, "grad_norm": 12.45175838470459, "learning_rate": 3.235310782480594e-06, "loss": 0.7117, "step": 106240 }, { "epoch": 18.06051334353221, "grad_norm": 34.315589904785156, "learning_rate": 3.232477760779648e-06, "loss": 0.6802, "step": 106250 }, { "epoch": 18.06221315655278, "grad_norm": 17.415864944458008, "learning_rate": 3.2296447390787014e-06, "loss": 0.6496, "step": 106260 }, { "epoch": 18.063912969573348, "grad_norm": 18.958396911621094, "learning_rate": 3.2268117173777553e-06, "loss": 0.564, "step": 106270 }, { "epoch": 18.065612782593913, "grad_norm": 15.301645278930664, "learning_rate": 3.2239786956768093e-06, "loss": 0.6372, "step": 106280 }, { "epoch": 18.06731259561448, "grad_norm": 17.233144760131836, "learning_rate": 3.2211456739758624e-06, "loss": 0.8306, "step": 106290 }, { "epoch": 18.06901240863505, "grad_norm": 10.980719566345215, "learning_rate": 3.2183126522749163e-06, "loss": 0.6454, "step": 106300 }, { "epoch": 18.07071222165562, "grad_norm": 12.127683639526367, "learning_rate": 3.2154796305739702e-06, "loss": 0.6, "step": 106310 }, { "epoch": 18.072412034676187, "grad_norm": 10.305431365966797, "learning_rate": 3.212646608873024e-06, "loss": 0.7354, "step": 106320 }, { "epoch": 18.074111847696752, "grad_norm": 10.689987182617188, "learning_rate": 3.2098135871720777e-06, "loss": 0.5846, "step": 106330 }, { "epoch": 18.07581166071732, "grad_norm": 15.981642723083496, "learning_rate": 3.2069805654711317e-06, "loss": 0.6629, "step": 106340 }, { "epoch": 18.07751147373789, "grad_norm": 11.110024452209473, "learning_rate": 3.2041475437701856e-06, "loss": 0.6287, "step": 106350 }, { "epoch": 18.079211286758458, "grad_norm": 14.011601448059082, "learning_rate": 3.201314522069239e-06, "loss": 0.7041, "step": 106360 }, { "epoch": 18.080911099779023, "grad_norm": 15.604575157165527, "learning_rate": 3.198481500368293e-06, "loss": 0.5147, "step": 106370 }, { "epoch": 18.08261091279959, "grad_norm": 10.67956829071045, "learning_rate": 3.1956484786673466e-06, "loss": 0.7815, "step": 106380 }, { "epoch": 18.08431072582016, "grad_norm": 13.505992889404297, "learning_rate": 3.1928154569664e-06, "loss": 0.5798, "step": 106390 }, { "epoch": 18.08601053884073, "grad_norm": 16.25925064086914, "learning_rate": 3.189982435265454e-06, "loss": 0.7174, "step": 106400 }, { "epoch": 18.087710351861297, "grad_norm": 7.917768955230713, "learning_rate": 3.187149413564508e-06, "loss": 0.5343, "step": 106410 }, { "epoch": 18.089410164881862, "grad_norm": 11.90796184539795, "learning_rate": 3.184316391863562e-06, "loss": 0.6412, "step": 106420 }, { "epoch": 18.09110997790243, "grad_norm": 17.673419952392578, "learning_rate": 3.1814833701626155e-06, "loss": 0.6514, "step": 106430 }, { "epoch": 18.092809790923, "grad_norm": 14.456878662109375, "learning_rate": 3.1786503484616694e-06, "loss": 0.7202, "step": 106440 }, { "epoch": 18.094509603943568, "grad_norm": 13.498906135559082, "learning_rate": 3.1758173267607234e-06, "loss": 0.6379, "step": 106450 }, { "epoch": 18.096209416964133, "grad_norm": 13.25750732421875, "learning_rate": 3.172984305059777e-06, "loss": 0.6266, "step": 106460 }, { "epoch": 18.0979092299847, "grad_norm": 13.973183631896973, "learning_rate": 3.1701512833588304e-06, "loss": 0.6396, "step": 106470 }, { "epoch": 18.09960904300527, "grad_norm": 14.139656066894531, "learning_rate": 3.1673182616578844e-06, "loss": 0.7673, "step": 106480 }, { "epoch": 18.101308856025838, "grad_norm": 10.45577621459961, "learning_rate": 3.164485239956938e-06, "loss": 0.693, "step": 106490 }, { "epoch": 18.103008669046403, "grad_norm": 14.121512413024902, "learning_rate": 3.161652218255992e-06, "loss": 0.6819, "step": 106500 }, { "epoch": 18.10470848206697, "grad_norm": 15.538132667541504, "learning_rate": 3.1588191965550458e-06, "loss": 0.5467, "step": 106510 }, { "epoch": 18.10640829508754, "grad_norm": 17.457921981811523, "learning_rate": 3.1559861748540997e-06, "loss": 0.8262, "step": 106520 }, { "epoch": 18.10810810810811, "grad_norm": 12.541842460632324, "learning_rate": 3.1531531531531532e-06, "loss": 0.6087, "step": 106530 }, { "epoch": 18.109807921128677, "grad_norm": 12.329061508178711, "learning_rate": 3.150320131452207e-06, "loss": 0.6939, "step": 106540 }, { "epoch": 18.111507734149242, "grad_norm": 9.444287300109863, "learning_rate": 3.1474871097512607e-06, "loss": 0.6161, "step": 106550 }, { "epoch": 18.11320754716981, "grad_norm": 14.515714645385742, "learning_rate": 3.1446540880503142e-06, "loss": 0.6517, "step": 106560 }, { "epoch": 18.11490736019038, "grad_norm": 11.995407104492188, "learning_rate": 3.141821066349368e-06, "loss": 0.7414, "step": 106570 }, { "epoch": 18.116607173210948, "grad_norm": 26.776424407958984, "learning_rate": 3.138988044648422e-06, "loss": 0.8261, "step": 106580 }, { "epoch": 18.118306986231513, "grad_norm": 16.64508628845215, "learning_rate": 3.136155022947476e-06, "loss": 0.8034, "step": 106590 }, { "epoch": 18.12000679925208, "grad_norm": 10.141459465026855, "learning_rate": 3.1333220012465296e-06, "loss": 0.5604, "step": 106600 }, { "epoch": 18.12170661227265, "grad_norm": 16.121334075927734, "learning_rate": 3.1304889795455835e-06, "loss": 0.6638, "step": 106610 }, { "epoch": 18.12340642529322, "grad_norm": 10.889538764953613, "learning_rate": 3.1276559578446375e-06, "loss": 0.6651, "step": 106620 }, { "epoch": 18.125106238313787, "grad_norm": 11.323532104492188, "learning_rate": 3.124822936143691e-06, "loss": 0.7754, "step": 106630 }, { "epoch": 18.126806051334352, "grad_norm": 13.761784553527832, "learning_rate": 3.1219899144427445e-06, "loss": 0.753, "step": 106640 }, { "epoch": 18.12850586435492, "grad_norm": 63.2503547668457, "learning_rate": 3.1191568927417985e-06, "loss": 0.7873, "step": 106650 }, { "epoch": 18.13020567737549, "grad_norm": 13.713269233703613, "learning_rate": 3.116323871040852e-06, "loss": 0.718, "step": 106660 }, { "epoch": 18.131905490396058, "grad_norm": 7.999202251434326, "learning_rate": 3.113490849339906e-06, "loss": 0.4579, "step": 106670 }, { "epoch": 18.133605303416623, "grad_norm": 11.235584259033203, "learning_rate": 3.11065782763896e-06, "loss": 0.6263, "step": 106680 }, { "epoch": 18.13530511643719, "grad_norm": 12.407565116882324, "learning_rate": 3.107824805938014e-06, "loss": 0.7427, "step": 106690 }, { "epoch": 18.13700492945776, "grad_norm": 8.980663299560547, "learning_rate": 3.1049917842370673e-06, "loss": 0.5546, "step": 106700 }, { "epoch": 18.13870474247833, "grad_norm": 11.0708646774292, "learning_rate": 3.1021587625361213e-06, "loss": 0.5667, "step": 106710 }, { "epoch": 18.140404555498897, "grad_norm": 11.618905067443848, "learning_rate": 3.099325740835175e-06, "loss": 0.69, "step": 106720 }, { "epoch": 18.142104368519462, "grad_norm": 13.510001182556152, "learning_rate": 3.0964927191342283e-06, "loss": 0.608, "step": 106730 }, { "epoch": 18.14380418154003, "grad_norm": 13.825822830200195, "learning_rate": 3.0936596974332823e-06, "loss": 0.7514, "step": 106740 }, { "epoch": 18.1455039945606, "grad_norm": 13.549057006835938, "learning_rate": 3.0908266757323362e-06, "loss": 0.6829, "step": 106750 }, { "epoch": 18.147203807581167, "grad_norm": 14.518909454345703, "learning_rate": 3.0879936540313897e-06, "loss": 0.7758, "step": 106760 }, { "epoch": 18.148903620601732, "grad_norm": 22.58654022216797, "learning_rate": 3.0851606323304437e-06, "loss": 0.7929, "step": 106770 }, { "epoch": 18.1506034336223, "grad_norm": 13.78080940246582, "learning_rate": 3.0823276106294976e-06, "loss": 0.7035, "step": 106780 }, { "epoch": 18.15230324664287, "grad_norm": 22.640066146850586, "learning_rate": 3.0794945889285516e-06, "loss": 0.7277, "step": 106790 }, { "epoch": 18.154003059663438, "grad_norm": 39.402706146240234, "learning_rate": 3.076661567227605e-06, "loss": 0.612, "step": 106800 }, { "epoch": 18.155702872684003, "grad_norm": 14.718213081359863, "learning_rate": 3.0738285455266586e-06, "loss": 0.5469, "step": 106810 }, { "epoch": 18.15740268570457, "grad_norm": 18.387678146362305, "learning_rate": 3.0709955238257126e-06, "loss": 0.6998, "step": 106820 }, { "epoch": 18.15910249872514, "grad_norm": 13.059969902038574, "learning_rate": 3.068162502124766e-06, "loss": 0.8451, "step": 106830 }, { "epoch": 18.16080231174571, "grad_norm": 13.28002643585205, "learning_rate": 3.06532948042382e-06, "loss": 0.6472, "step": 106840 }, { "epoch": 18.162502124766277, "grad_norm": 12.772909164428711, "learning_rate": 3.062496458722874e-06, "loss": 0.751, "step": 106850 }, { "epoch": 18.164201937786842, "grad_norm": 16.67518424987793, "learning_rate": 3.059663437021928e-06, "loss": 0.6849, "step": 106860 }, { "epoch": 18.16590175080741, "grad_norm": 10.478543281555176, "learning_rate": 3.0568304153209814e-06, "loss": 0.6107, "step": 106870 }, { "epoch": 18.16760156382798, "grad_norm": 13.076752662658691, "learning_rate": 3.0539973936200354e-06, "loss": 0.5667, "step": 106880 }, { "epoch": 18.169301376848548, "grad_norm": 7.804294586181641, "learning_rate": 3.0511643719190893e-06, "loss": 0.6329, "step": 106890 }, { "epoch": 18.171001189869113, "grad_norm": 9.651110649108887, "learning_rate": 3.0483313502181424e-06, "loss": 0.6966, "step": 106900 }, { "epoch": 18.17270100288968, "grad_norm": 13.129249572753906, "learning_rate": 3.0454983285171964e-06, "loss": 0.7691, "step": 106910 }, { "epoch": 18.17440081591025, "grad_norm": 11.250322341918945, "learning_rate": 3.0426653068162503e-06, "loss": 0.5938, "step": 106920 }, { "epoch": 18.17610062893082, "grad_norm": 13.655542373657227, "learning_rate": 3.039832285115304e-06, "loss": 0.6928, "step": 106930 }, { "epoch": 18.177800441951387, "grad_norm": 15.709924697875977, "learning_rate": 3.036999263414358e-06, "loss": 0.6395, "step": 106940 }, { "epoch": 18.179500254971952, "grad_norm": 11.729962348937988, "learning_rate": 3.0341662417134117e-06, "loss": 0.7683, "step": 106950 }, { "epoch": 18.18120006799252, "grad_norm": 19.453842163085938, "learning_rate": 3.0313332200124657e-06, "loss": 0.5912, "step": 106960 }, { "epoch": 18.18289988101309, "grad_norm": 18.851133346557617, "learning_rate": 3.028500198311519e-06, "loss": 0.694, "step": 106970 }, { "epoch": 18.184599694033658, "grad_norm": 12.659942626953125, "learning_rate": 3.0256671766105727e-06, "loss": 0.6166, "step": 106980 }, { "epoch": 18.186299507054223, "grad_norm": 57.55571746826172, "learning_rate": 3.0228341549096267e-06, "loss": 0.7651, "step": 106990 }, { "epoch": 18.18799932007479, "grad_norm": 14.379255294799805, "learning_rate": 3.02000113320868e-06, "loss": 0.5721, "step": 107000 }, { "epoch": 18.18969913309536, "grad_norm": 22.388093948364258, "learning_rate": 3.017168111507734e-06, "loss": 0.7048, "step": 107010 }, { "epoch": 18.19139894611593, "grad_norm": 13.681445121765137, "learning_rate": 3.014335089806788e-06, "loss": 0.7535, "step": 107020 }, { "epoch": 18.193098759136497, "grad_norm": 17.222505569458008, "learning_rate": 3.0115020681058416e-06, "loss": 0.5281, "step": 107030 }, { "epoch": 18.19479857215706, "grad_norm": 14.087355613708496, "learning_rate": 3.0086690464048956e-06, "loss": 0.667, "step": 107040 }, { "epoch": 18.19649838517763, "grad_norm": 12.394309997558594, "learning_rate": 3.0058360247039495e-06, "loss": 0.7214, "step": 107050 }, { "epoch": 18.1981981981982, "grad_norm": 10.935067176818848, "learning_rate": 3.0030030030030034e-06, "loss": 0.668, "step": 107060 }, { "epoch": 18.199898011218767, "grad_norm": 14.308534622192383, "learning_rate": 3.0001699813020565e-06, "loss": 0.839, "step": 107070 }, { "epoch": 18.201597824239332, "grad_norm": 11.268843650817871, "learning_rate": 2.9973369596011105e-06, "loss": 0.696, "step": 107080 }, { "epoch": 18.2032976372599, "grad_norm": 16.921566009521484, "learning_rate": 2.9945039379001644e-06, "loss": 0.5502, "step": 107090 }, { "epoch": 18.20499745028047, "grad_norm": 20.29339027404785, "learning_rate": 2.991670916199218e-06, "loss": 0.8605, "step": 107100 }, { "epoch": 18.206697263301038, "grad_norm": 12.279505729675293, "learning_rate": 2.988837894498272e-06, "loss": 0.6077, "step": 107110 }, { "epoch": 18.208397076321603, "grad_norm": 21.356128692626953, "learning_rate": 2.986004872797326e-06, "loss": 0.6043, "step": 107120 }, { "epoch": 18.21009688934217, "grad_norm": 10.606879234313965, "learning_rate": 2.9831718510963794e-06, "loss": 0.689, "step": 107130 }, { "epoch": 18.21179670236274, "grad_norm": 17.245664596557617, "learning_rate": 2.9803388293954333e-06, "loss": 0.6709, "step": 107140 }, { "epoch": 18.21349651538331, "grad_norm": 10.174454689025879, "learning_rate": 2.9775058076944873e-06, "loss": 0.6555, "step": 107150 }, { "epoch": 18.215196328403877, "grad_norm": 12.51811408996582, "learning_rate": 2.9746727859935408e-06, "loss": 0.589, "step": 107160 }, { "epoch": 18.216896141424442, "grad_norm": 11.585125923156738, "learning_rate": 2.9718397642925943e-06, "loss": 0.5265, "step": 107170 }, { "epoch": 18.21859595444501, "grad_norm": 10.43516731262207, "learning_rate": 2.9690067425916482e-06, "loss": 0.6395, "step": 107180 }, { "epoch": 18.22029576746558, "grad_norm": 13.17324447631836, "learning_rate": 2.966173720890702e-06, "loss": 0.6682, "step": 107190 }, { "epoch": 18.221995580486148, "grad_norm": 7.090033054351807, "learning_rate": 2.9633406991897557e-06, "loss": 0.6174, "step": 107200 }, { "epoch": 18.223695393506713, "grad_norm": 22.474855422973633, "learning_rate": 2.9605076774888097e-06, "loss": 0.6985, "step": 107210 }, { "epoch": 18.22539520652728, "grad_norm": 8.794525146484375, "learning_rate": 2.9576746557878636e-06, "loss": 0.8517, "step": 107220 }, { "epoch": 18.22709501954785, "grad_norm": 19.75768280029297, "learning_rate": 2.9548416340869176e-06, "loss": 0.5292, "step": 107230 }, { "epoch": 18.22879483256842, "grad_norm": 9.534897804260254, "learning_rate": 2.9520086123859706e-06, "loss": 0.7547, "step": 107240 }, { "epoch": 18.230494645588987, "grad_norm": 12.410676956176758, "learning_rate": 2.9491755906850246e-06, "loss": 0.6152, "step": 107250 }, { "epoch": 18.232194458609552, "grad_norm": 11.906488418579102, "learning_rate": 2.9463425689840785e-06, "loss": 0.5922, "step": 107260 }, { "epoch": 18.23389427163012, "grad_norm": 13.370402336120605, "learning_rate": 2.943509547283132e-06, "loss": 0.7614, "step": 107270 }, { "epoch": 18.23559408465069, "grad_norm": 14.291935920715332, "learning_rate": 2.940676525582186e-06, "loss": 0.7097, "step": 107280 }, { "epoch": 18.237293897671258, "grad_norm": 11.011914253234863, "learning_rate": 2.93784350388124e-06, "loss": 0.8015, "step": 107290 }, { "epoch": 18.238993710691823, "grad_norm": 9.487515449523926, "learning_rate": 2.9350104821802935e-06, "loss": 0.6933, "step": 107300 }, { "epoch": 18.24069352371239, "grad_norm": 12.287381172180176, "learning_rate": 2.9321774604793474e-06, "loss": 0.5902, "step": 107310 }, { "epoch": 18.24239333673296, "grad_norm": 16.00690269470215, "learning_rate": 2.9293444387784014e-06, "loss": 0.6492, "step": 107320 }, { "epoch": 18.244093149753528, "grad_norm": 16.421634674072266, "learning_rate": 2.926511417077455e-06, "loss": 0.6658, "step": 107330 }, { "epoch": 18.245792962774093, "grad_norm": 13.97956371307373, "learning_rate": 2.9236783953765084e-06, "loss": 0.5533, "step": 107340 }, { "epoch": 18.24749277579466, "grad_norm": 11.126823425292969, "learning_rate": 2.9208453736755624e-06, "loss": 0.5498, "step": 107350 }, { "epoch": 18.24919258881523, "grad_norm": 13.29001235961914, "learning_rate": 2.9180123519746163e-06, "loss": 0.8304, "step": 107360 }, { "epoch": 18.2508924018358, "grad_norm": 12.645674705505371, "learning_rate": 2.91517933027367e-06, "loss": 0.75, "step": 107370 }, { "epoch": 18.252592214856367, "grad_norm": 15.518985748291016, "learning_rate": 2.9123463085727238e-06, "loss": 0.6008, "step": 107380 }, { "epoch": 18.254292027876932, "grad_norm": 11.740767478942871, "learning_rate": 2.9095132868717777e-06, "loss": 0.6425, "step": 107390 }, { "epoch": 18.2559918408975, "grad_norm": 11.340502738952637, "learning_rate": 2.9066802651708312e-06, "loss": 0.662, "step": 107400 }, { "epoch": 18.25769165391807, "grad_norm": 10.732854843139648, "learning_rate": 2.903847243469885e-06, "loss": 0.6612, "step": 107410 }, { "epoch": 18.259391466938638, "grad_norm": 14.430486679077148, "learning_rate": 2.9010142217689387e-06, "loss": 0.6763, "step": 107420 }, { "epoch": 18.261091279959203, "grad_norm": 9.144486427307129, "learning_rate": 2.8981812000679926e-06, "loss": 0.6483, "step": 107430 }, { "epoch": 18.26279109297977, "grad_norm": 11.97325611114502, "learning_rate": 2.895348178367046e-06, "loss": 0.7048, "step": 107440 }, { "epoch": 18.26449090600034, "grad_norm": 20.323078155517578, "learning_rate": 2.8925151566661e-06, "loss": 0.7068, "step": 107450 }, { "epoch": 18.26619071902091, "grad_norm": 18.154857635498047, "learning_rate": 2.889682134965154e-06, "loss": 0.7662, "step": 107460 }, { "epoch": 18.267890532041477, "grad_norm": 11.847582817077637, "learning_rate": 2.8868491132642076e-06, "loss": 0.6143, "step": 107470 }, { "epoch": 18.269590345062042, "grad_norm": 13.240612983703613, "learning_rate": 2.8840160915632615e-06, "loss": 0.662, "step": 107480 }, { "epoch": 18.27129015808261, "grad_norm": 18.48116683959961, "learning_rate": 2.8811830698623155e-06, "loss": 0.6813, "step": 107490 }, { "epoch": 18.27298997110318, "grad_norm": 16.678546905517578, "learning_rate": 2.878350048161369e-06, "loss": 0.6382, "step": 107500 }, { "epoch": 18.274689784123748, "grad_norm": 10.287698745727539, "learning_rate": 2.8755170264604225e-06, "loss": 0.6946, "step": 107510 }, { "epoch": 18.276389597144313, "grad_norm": 21.597179412841797, "learning_rate": 2.8726840047594765e-06, "loss": 0.693, "step": 107520 }, { "epoch": 18.27808941016488, "grad_norm": 19.85974884033203, "learning_rate": 2.8698509830585304e-06, "loss": 0.6452, "step": 107530 }, { "epoch": 18.27978922318545, "grad_norm": 19.13505744934082, "learning_rate": 2.867017961357584e-06, "loss": 0.6495, "step": 107540 }, { "epoch": 18.28148903620602, "grad_norm": 11.91862678527832, "learning_rate": 2.864184939656638e-06, "loss": 0.7883, "step": 107550 }, { "epoch": 18.283188849226583, "grad_norm": 11.369434356689453, "learning_rate": 2.861351917955692e-06, "loss": 0.6067, "step": 107560 }, { "epoch": 18.284888662247152, "grad_norm": 13.73724365234375, "learning_rate": 2.8585188962547453e-06, "loss": 0.5645, "step": 107570 }, { "epoch": 18.28658847526772, "grad_norm": 37.526695251464844, "learning_rate": 2.8556858745537993e-06, "loss": 0.6946, "step": 107580 }, { "epoch": 18.28828828828829, "grad_norm": 24.170068740844727, "learning_rate": 2.852852852852853e-06, "loss": 0.7053, "step": 107590 }, { "epoch": 18.289988101308857, "grad_norm": 16.014610290527344, "learning_rate": 2.8500198311519068e-06, "loss": 0.7041, "step": 107600 }, { "epoch": 18.291687914329422, "grad_norm": 11.980517387390137, "learning_rate": 2.8471868094509603e-06, "loss": 0.5972, "step": 107610 }, { "epoch": 18.29338772734999, "grad_norm": 14.180099487304688, "learning_rate": 2.8443537877500142e-06, "loss": 0.7468, "step": 107620 }, { "epoch": 18.29508754037056, "grad_norm": 21.697254180908203, "learning_rate": 2.841520766049068e-06, "loss": 0.6454, "step": 107630 }, { "epoch": 18.296787353391128, "grad_norm": 9.541812896728516, "learning_rate": 2.8386877443481217e-06, "loss": 0.6558, "step": 107640 }, { "epoch": 18.298487166411693, "grad_norm": 12.992751121520996, "learning_rate": 2.8358547226471756e-06, "loss": 0.5546, "step": 107650 }, { "epoch": 18.30018697943226, "grad_norm": 14.076074600219727, "learning_rate": 2.8330217009462296e-06, "loss": 0.676, "step": 107660 }, { "epoch": 18.30188679245283, "grad_norm": 11.490182876586914, "learning_rate": 2.830188679245283e-06, "loss": 0.7097, "step": 107670 }, { "epoch": 18.3035866054734, "grad_norm": 17.43284797668457, "learning_rate": 2.8273556575443366e-06, "loss": 0.5702, "step": 107680 }, { "epoch": 18.305286418493967, "grad_norm": 9.990279197692871, "learning_rate": 2.8245226358433906e-06, "loss": 0.6851, "step": 107690 }, { "epoch": 18.306986231514532, "grad_norm": 13.420347213745117, "learning_rate": 2.8216896141424445e-06, "loss": 0.5868, "step": 107700 }, { "epoch": 18.3086860445351, "grad_norm": 12.146523475646973, "learning_rate": 2.818856592441498e-06, "loss": 0.676, "step": 107710 }, { "epoch": 18.31038585755567, "grad_norm": 11.837635040283203, "learning_rate": 2.816023570740552e-06, "loss": 0.6165, "step": 107720 }, { "epoch": 18.312085670576238, "grad_norm": 11.212791442871094, "learning_rate": 2.813190549039606e-06, "loss": 0.7253, "step": 107730 }, { "epoch": 18.313785483596803, "grad_norm": 21.933732986450195, "learning_rate": 2.8103575273386594e-06, "loss": 0.7009, "step": 107740 }, { "epoch": 18.31548529661737, "grad_norm": 10.897342681884766, "learning_rate": 2.8075245056377134e-06, "loss": 0.7441, "step": 107750 }, { "epoch": 18.31718510963794, "grad_norm": 12.588489532470703, "learning_rate": 2.804691483936767e-06, "loss": 0.8481, "step": 107760 }, { "epoch": 18.31888492265851, "grad_norm": 13.397385597229004, "learning_rate": 2.801858462235821e-06, "loss": 0.6104, "step": 107770 }, { "epoch": 18.320584735679077, "grad_norm": 9.090060234069824, "learning_rate": 2.7990254405348744e-06, "loss": 0.6566, "step": 107780 }, { "epoch": 18.322284548699642, "grad_norm": 14.056296348571777, "learning_rate": 2.7961924188339283e-06, "loss": 0.5463, "step": 107790 }, { "epoch": 18.32398436172021, "grad_norm": 11.857784271240234, "learning_rate": 2.7933593971329823e-06, "loss": 0.7173, "step": 107800 }, { "epoch": 18.32568417474078, "grad_norm": 15.510668754577637, "learning_rate": 2.790526375432036e-06, "loss": 0.5311, "step": 107810 }, { "epoch": 18.327383987761348, "grad_norm": 12.322892189025879, "learning_rate": 2.7876933537310897e-06, "loss": 0.6275, "step": 107820 }, { "epoch": 18.329083800781913, "grad_norm": 12.927490234375, "learning_rate": 2.7848603320301437e-06, "loss": 0.5981, "step": 107830 }, { "epoch": 18.33078361380248, "grad_norm": 14.996673583984375, "learning_rate": 2.782027310329197e-06, "loss": 0.6871, "step": 107840 }, { "epoch": 18.33248342682305, "grad_norm": 13.43359375, "learning_rate": 2.7791942886282507e-06, "loss": 0.6497, "step": 107850 }, { "epoch": 18.334183239843618, "grad_norm": 15.111001014709473, "learning_rate": 2.7763612669273047e-06, "loss": 0.567, "step": 107860 }, { "epoch": 18.335883052864183, "grad_norm": 16.346271514892578, "learning_rate": 2.7735282452263586e-06, "loss": 0.7235, "step": 107870 }, { "epoch": 18.33758286588475, "grad_norm": 11.639496803283691, "learning_rate": 2.770695223525412e-06, "loss": 0.7395, "step": 107880 }, { "epoch": 18.33928267890532, "grad_norm": 16.45425033569336, "learning_rate": 2.767862201824466e-06, "loss": 0.6121, "step": 107890 }, { "epoch": 18.34098249192589, "grad_norm": 10.936417579650879, "learning_rate": 2.76502918012352e-06, "loss": 0.5447, "step": 107900 }, { "epoch": 18.342682304946457, "grad_norm": 10.016031265258789, "learning_rate": 2.7621961584225736e-06, "loss": 0.7275, "step": 107910 }, { "epoch": 18.344382117967022, "grad_norm": 13.526524543762207, "learning_rate": 2.7593631367216275e-06, "loss": 0.6703, "step": 107920 }, { "epoch": 18.34608193098759, "grad_norm": 16.06247329711914, "learning_rate": 2.7565301150206814e-06, "loss": 0.6096, "step": 107930 }, { "epoch": 18.34778174400816, "grad_norm": 12.133719444274902, "learning_rate": 2.7536970933197345e-06, "loss": 0.6971, "step": 107940 }, { "epoch": 18.349481557028728, "grad_norm": 15.301513671875, "learning_rate": 2.7508640716187885e-06, "loss": 0.6937, "step": 107950 }, { "epoch": 18.351181370049293, "grad_norm": 15.380694389343262, "learning_rate": 2.7480310499178424e-06, "loss": 0.7333, "step": 107960 }, { "epoch": 18.35288118306986, "grad_norm": 8.933706283569336, "learning_rate": 2.7451980282168964e-06, "loss": 0.8096, "step": 107970 }, { "epoch": 18.35458099609043, "grad_norm": 14.256694793701172, "learning_rate": 2.74236500651595e-06, "loss": 0.5858, "step": 107980 }, { "epoch": 18.356280809111, "grad_norm": 15.199570655822754, "learning_rate": 2.739531984815004e-06, "loss": 0.6611, "step": 107990 }, { "epoch": 18.357980622131567, "grad_norm": 13.134126663208008, "learning_rate": 2.736698963114058e-06, "loss": 0.7318, "step": 108000 }, { "epoch": 18.359680435152132, "grad_norm": 18.280319213867188, "learning_rate": 2.7338659414131113e-06, "loss": 0.6112, "step": 108010 }, { "epoch": 18.3613802481727, "grad_norm": 16.551227569580078, "learning_rate": 2.731032919712165e-06, "loss": 0.7158, "step": 108020 }, { "epoch": 18.36308006119327, "grad_norm": 14.206374168395996, "learning_rate": 2.7281998980112188e-06, "loss": 0.5971, "step": 108030 }, { "epoch": 18.364779874213838, "grad_norm": 8.636414527893066, "learning_rate": 2.7253668763102723e-06, "loss": 0.5452, "step": 108040 }, { "epoch": 18.366479687234403, "grad_norm": 12.593174934387207, "learning_rate": 2.7225338546093262e-06, "loss": 0.7586, "step": 108050 }, { "epoch": 18.36817950025497, "grad_norm": 10.590043067932129, "learning_rate": 2.71970083290838e-06, "loss": 0.6064, "step": 108060 }, { "epoch": 18.36987931327554, "grad_norm": 10.79879093170166, "learning_rate": 2.716867811207434e-06, "loss": 0.6558, "step": 108070 }, { "epoch": 18.37157912629611, "grad_norm": 13.720645904541016, "learning_rate": 2.7140347895064877e-06, "loss": 0.5127, "step": 108080 }, { "epoch": 18.373278939316677, "grad_norm": 15.533486366271973, "learning_rate": 2.7112017678055416e-06, "loss": 0.7887, "step": 108090 }, { "epoch": 18.374978752337242, "grad_norm": 14.08851432800293, "learning_rate": 2.7083687461045955e-06, "loss": 0.5873, "step": 108100 }, { "epoch": 18.37667856535781, "grad_norm": 10.914384841918945, "learning_rate": 2.7055357244036486e-06, "loss": 0.8102, "step": 108110 }, { "epoch": 18.37837837837838, "grad_norm": 14.843244552612305, "learning_rate": 2.7027027027027026e-06, "loss": 0.7655, "step": 108120 }, { "epoch": 18.380078191398947, "grad_norm": 12.093708038330078, "learning_rate": 2.6998696810017565e-06, "loss": 0.6984, "step": 108130 }, { "epoch": 18.381778004419512, "grad_norm": 9.837991714477539, "learning_rate": 2.6970366593008105e-06, "loss": 0.6252, "step": 108140 }, { "epoch": 18.38347781744008, "grad_norm": 14.486950874328613, "learning_rate": 2.694203637599864e-06, "loss": 0.7353, "step": 108150 }, { "epoch": 18.38517763046065, "grad_norm": 11.60461711883545, "learning_rate": 2.691370615898918e-06, "loss": 0.7571, "step": 108160 }, { "epoch": 18.386877443481218, "grad_norm": 15.025263786315918, "learning_rate": 2.688537594197972e-06, "loss": 0.6558, "step": 108170 }, { "epoch": 18.388577256501783, "grad_norm": 13.142189025878906, "learning_rate": 2.6857045724970254e-06, "loss": 0.5719, "step": 108180 }, { "epoch": 18.39027706952235, "grad_norm": 8.198689460754395, "learning_rate": 2.682871550796079e-06, "loss": 0.4967, "step": 108190 }, { "epoch": 18.39197688254292, "grad_norm": 12.931389808654785, "learning_rate": 2.680038529095133e-06, "loss": 0.7384, "step": 108200 }, { "epoch": 18.39367669556349, "grad_norm": 20.73943328857422, "learning_rate": 2.6772055073941864e-06, "loss": 0.7175, "step": 108210 }, { "epoch": 18.395376508584057, "grad_norm": 10.647001266479492, "learning_rate": 2.6743724856932404e-06, "loss": 0.7197, "step": 108220 }, { "epoch": 18.397076321604622, "grad_norm": 13.022782325744629, "learning_rate": 2.6715394639922943e-06, "loss": 0.7006, "step": 108230 }, { "epoch": 18.39877613462519, "grad_norm": 12.775113105773926, "learning_rate": 2.6687064422913482e-06, "loss": 0.7946, "step": 108240 }, { "epoch": 18.40047594764576, "grad_norm": 10.997644424438477, "learning_rate": 2.6658734205904018e-06, "loss": 0.6841, "step": 108250 }, { "epoch": 18.402175760666328, "grad_norm": 13.833998680114746, "learning_rate": 2.6630403988894557e-06, "loss": 0.627, "step": 108260 }, { "epoch": 18.403875573686893, "grad_norm": 13.4607572555542, "learning_rate": 2.6602073771885097e-06, "loss": 0.8253, "step": 108270 }, { "epoch": 18.40557538670746, "grad_norm": 12.73213005065918, "learning_rate": 2.6573743554875628e-06, "loss": 0.6102, "step": 108280 }, { "epoch": 18.40727519972803, "grad_norm": 11.686298370361328, "learning_rate": 2.6545413337866167e-06, "loss": 0.7265, "step": 108290 }, { "epoch": 18.4089750127486, "grad_norm": 18.290796279907227, "learning_rate": 2.6517083120856706e-06, "loss": 0.7325, "step": 108300 }, { "epoch": 18.410674825769167, "grad_norm": 13.45712661743164, "learning_rate": 2.648875290384724e-06, "loss": 0.6868, "step": 108310 }, { "epoch": 18.412374638789732, "grad_norm": 18.483461380004883, "learning_rate": 2.646042268683778e-06, "loss": 0.4554, "step": 108320 }, { "epoch": 18.4140744518103, "grad_norm": 122.50652313232422, "learning_rate": 2.643209246982832e-06, "loss": 0.6437, "step": 108330 }, { "epoch": 18.41577426483087, "grad_norm": 11.72729206085205, "learning_rate": 2.640376225281886e-06, "loss": 0.6426, "step": 108340 }, { "epoch": 18.417474077851438, "grad_norm": 10.779498100280762, "learning_rate": 2.6375432035809395e-06, "loss": 0.6389, "step": 108350 }, { "epoch": 18.419173890872003, "grad_norm": 13.033455848693848, "learning_rate": 2.6347101818799935e-06, "loss": 0.6439, "step": 108360 }, { "epoch": 18.42087370389257, "grad_norm": 12.640581130981445, "learning_rate": 2.631877160179047e-06, "loss": 0.5816, "step": 108370 }, { "epoch": 18.42257351691314, "grad_norm": 10.3375883102417, "learning_rate": 2.6290441384781005e-06, "loss": 0.5387, "step": 108380 }, { "epoch": 18.42427332993371, "grad_norm": 12.399112701416016, "learning_rate": 2.6262111167771545e-06, "loss": 0.8357, "step": 108390 }, { "epoch": 18.425973142954277, "grad_norm": 25.735260009765625, "learning_rate": 2.6233780950762084e-06, "loss": 0.7084, "step": 108400 }, { "epoch": 18.427672955974842, "grad_norm": 10.990311622619629, "learning_rate": 2.6205450733752623e-06, "loss": 0.6481, "step": 108410 }, { "epoch": 18.42937276899541, "grad_norm": 9.909876823425293, "learning_rate": 2.617712051674316e-06, "loss": 0.6645, "step": 108420 }, { "epoch": 18.43107258201598, "grad_norm": 13.750862121582031, "learning_rate": 2.61487902997337e-06, "loss": 0.6319, "step": 108430 }, { "epoch": 18.432772395036547, "grad_norm": 13.387417793273926, "learning_rate": 2.6120460082724238e-06, "loss": 0.6563, "step": 108440 }, { "epoch": 18.434472208057112, "grad_norm": 13.119730949401855, "learning_rate": 2.609212986571477e-06, "loss": 0.8304, "step": 108450 }, { "epoch": 18.43617202107768, "grad_norm": 10.133941650390625, "learning_rate": 2.606379964870531e-06, "loss": 0.614, "step": 108460 }, { "epoch": 18.43787183409825, "grad_norm": 16.82797622680664, "learning_rate": 2.6035469431695847e-06, "loss": 0.676, "step": 108470 }, { "epoch": 18.439571647118818, "grad_norm": 10.12150764465332, "learning_rate": 2.6007139214686383e-06, "loss": 0.6388, "step": 108480 }, { "epoch": 18.441271460139383, "grad_norm": 9.948561668395996, "learning_rate": 2.5978808997676922e-06, "loss": 0.7428, "step": 108490 }, { "epoch": 18.44297127315995, "grad_norm": 8.986380577087402, "learning_rate": 2.595047878066746e-06, "loss": 0.587, "step": 108500 }, { "epoch": 18.44467108618052, "grad_norm": 11.883316993713379, "learning_rate": 2.5922148563658e-06, "loss": 0.5666, "step": 108510 }, { "epoch": 18.44637089920109, "grad_norm": 18.260845184326172, "learning_rate": 2.5893818346648536e-06, "loss": 0.657, "step": 108520 }, { "epoch": 18.448070712221657, "grad_norm": 11.532139778137207, "learning_rate": 2.5865488129639076e-06, "loss": 0.5474, "step": 108530 }, { "epoch": 18.449770525242222, "grad_norm": 11.721071243286133, "learning_rate": 2.583715791262961e-06, "loss": 0.498, "step": 108540 }, { "epoch": 18.45147033826279, "grad_norm": 15.876815795898438, "learning_rate": 2.5808827695620146e-06, "loss": 0.5787, "step": 108550 }, { "epoch": 18.45317015128336, "grad_norm": 23.63764762878418, "learning_rate": 2.5780497478610686e-06, "loss": 0.7508, "step": 108560 }, { "epoch": 18.454869964303928, "grad_norm": 11.425383567810059, "learning_rate": 2.5752167261601225e-06, "loss": 0.6549, "step": 108570 }, { "epoch": 18.456569777324493, "grad_norm": 20.819517135620117, "learning_rate": 2.572383704459176e-06, "loss": 0.755, "step": 108580 }, { "epoch": 18.45826959034506, "grad_norm": 15.32142162322998, "learning_rate": 2.56955068275823e-06, "loss": 0.6523, "step": 108590 }, { "epoch": 18.45996940336563, "grad_norm": 13.710079193115234, "learning_rate": 2.566717661057284e-06, "loss": 0.7814, "step": 108600 }, { "epoch": 18.4616692163862, "grad_norm": 13.529488563537598, "learning_rate": 2.563884639356338e-06, "loss": 0.622, "step": 108610 }, { "epoch": 18.463369029406767, "grad_norm": 17.07630729675293, "learning_rate": 2.5610516176553914e-06, "loss": 0.6503, "step": 108620 }, { "epoch": 18.465068842427332, "grad_norm": 10.824841499328613, "learning_rate": 2.558218595954445e-06, "loss": 0.7746, "step": 108630 }, { "epoch": 18.4667686554479, "grad_norm": 8.927742004394531, "learning_rate": 2.555385574253499e-06, "loss": 0.7326, "step": 108640 }, { "epoch": 18.46846846846847, "grad_norm": 13.189002990722656, "learning_rate": 2.5525525525525524e-06, "loss": 0.5609, "step": 108650 }, { "epoch": 18.470168281489038, "grad_norm": 11.987892150878906, "learning_rate": 2.5497195308516063e-06, "loss": 0.8195, "step": 108660 }, { "epoch": 18.471868094509603, "grad_norm": 14.120084762573242, "learning_rate": 2.5468865091506603e-06, "loss": 1.0098, "step": 108670 }, { "epoch": 18.47356790753017, "grad_norm": 15.410679817199707, "learning_rate": 2.544053487449714e-06, "loss": 0.6076, "step": 108680 }, { "epoch": 18.47526772055074, "grad_norm": 13.846610069274902, "learning_rate": 2.5412204657487677e-06, "loss": 0.8108, "step": 108690 }, { "epoch": 18.476967533571308, "grad_norm": 14.58398723602295, "learning_rate": 2.5383874440478217e-06, "loss": 0.684, "step": 108700 }, { "epoch": 18.478667346591873, "grad_norm": 16.13322639465332, "learning_rate": 2.535554422346875e-06, "loss": 0.7677, "step": 108710 }, { "epoch": 18.48036715961244, "grad_norm": 13.523923873901367, "learning_rate": 2.5327214006459287e-06, "loss": 0.5916, "step": 108720 }, { "epoch": 18.48206697263301, "grad_norm": 10.155628204345703, "learning_rate": 2.5298883789449827e-06, "loss": 0.5779, "step": 108730 }, { "epoch": 18.48376678565358, "grad_norm": 14.649252891540527, "learning_rate": 2.5270553572440366e-06, "loss": 0.5046, "step": 108740 }, { "epoch": 18.485466598674147, "grad_norm": 20.289897918701172, "learning_rate": 2.52422233554309e-06, "loss": 0.8173, "step": 108750 }, { "epoch": 18.487166411694712, "grad_norm": 15.990422248840332, "learning_rate": 2.521389313842144e-06, "loss": 0.6659, "step": 108760 }, { "epoch": 18.48886622471528, "grad_norm": 10.741677284240723, "learning_rate": 2.518556292141198e-06, "loss": 0.7303, "step": 108770 }, { "epoch": 18.49056603773585, "grad_norm": 15.40678596496582, "learning_rate": 2.515723270440252e-06, "loss": 0.6958, "step": 108780 }, { "epoch": 18.492265850756418, "grad_norm": 13.806096076965332, "learning_rate": 2.5128902487393055e-06, "loss": 0.8985, "step": 108790 }, { "epoch": 18.493965663776983, "grad_norm": 20.964221954345703, "learning_rate": 2.510057227038359e-06, "loss": 0.784, "step": 108800 }, { "epoch": 18.49566547679755, "grad_norm": 12.284660339355469, "learning_rate": 2.507224205337413e-06, "loss": 0.617, "step": 108810 }, { "epoch": 18.49736528981812, "grad_norm": 11.547100067138672, "learning_rate": 2.5043911836364665e-06, "loss": 0.5735, "step": 108820 }, { "epoch": 18.49906510283869, "grad_norm": 22.47297477722168, "learning_rate": 2.5015581619355204e-06, "loss": 0.7858, "step": 108830 }, { "epoch": 18.500764915859257, "grad_norm": 21.607078552246094, "learning_rate": 2.4987251402345744e-06, "loss": 0.6482, "step": 108840 }, { "epoch": 18.502464728879822, "grad_norm": 11.869945526123047, "learning_rate": 2.495892118533628e-06, "loss": 0.68, "step": 108850 }, { "epoch": 18.50416454190039, "grad_norm": 111.62750244140625, "learning_rate": 2.493059096832682e-06, "loss": 0.7409, "step": 108860 }, { "epoch": 18.50586435492096, "grad_norm": 9.917304039001465, "learning_rate": 2.4902260751317358e-06, "loss": 0.6159, "step": 108870 }, { "epoch": 18.507564167941528, "grad_norm": 13.88718318939209, "learning_rate": 2.4873930534307897e-06, "loss": 0.7537, "step": 108880 }, { "epoch": 18.509263980962093, "grad_norm": 13.025751113891602, "learning_rate": 2.484560031729843e-06, "loss": 0.8606, "step": 108890 }, { "epoch": 18.51096379398266, "grad_norm": 13.317256927490234, "learning_rate": 2.4817270100288968e-06, "loss": 0.6772, "step": 108900 }, { "epoch": 18.51266360700323, "grad_norm": 17.096160888671875, "learning_rate": 2.4788939883279507e-06, "loss": 0.6428, "step": 108910 }, { "epoch": 18.5143634200238, "grad_norm": 16.91983985900879, "learning_rate": 2.4760609666270042e-06, "loss": 0.5376, "step": 108920 }, { "epoch": 18.516063233044363, "grad_norm": 12.583788871765137, "learning_rate": 2.473227944926058e-06, "loss": 0.7012, "step": 108930 }, { "epoch": 18.517763046064932, "grad_norm": 11.654969215393066, "learning_rate": 2.470394923225112e-06, "loss": 0.5839, "step": 108940 }, { "epoch": 18.5194628590855, "grad_norm": 18.50761604309082, "learning_rate": 2.4675619015241657e-06, "loss": 0.6078, "step": 108950 }, { "epoch": 18.52116267210607, "grad_norm": 22.546052932739258, "learning_rate": 2.4647288798232196e-06, "loss": 0.6223, "step": 108960 }, { "epoch": 18.522862485126637, "grad_norm": 28.604217529296875, "learning_rate": 2.461895858122273e-06, "loss": 0.6347, "step": 108970 }, { "epoch": 18.524562298147202, "grad_norm": 10.883044242858887, "learning_rate": 2.459062836421327e-06, "loss": 0.5054, "step": 108980 }, { "epoch": 18.52626211116777, "grad_norm": 9.305388450622559, "learning_rate": 2.4562298147203806e-06, "loss": 0.7294, "step": 108990 }, { "epoch": 18.52796192418834, "grad_norm": 15.42892074584961, "learning_rate": 2.4533967930194345e-06, "loss": 0.7753, "step": 109000 }, { "epoch": 18.529661737208908, "grad_norm": 17.317686080932617, "learning_rate": 2.4505637713184885e-06, "loss": 0.514, "step": 109010 }, { "epoch": 18.531361550229473, "grad_norm": 17.247617721557617, "learning_rate": 2.447730749617542e-06, "loss": 0.7753, "step": 109020 }, { "epoch": 18.53306136325004, "grad_norm": 13.278580665588379, "learning_rate": 2.444897727916596e-06, "loss": 0.7077, "step": 109030 }, { "epoch": 18.53476117627061, "grad_norm": 10.4600191116333, "learning_rate": 2.44206470621565e-06, "loss": 0.7029, "step": 109040 }, { "epoch": 18.53646098929118, "grad_norm": 12.242192268371582, "learning_rate": 2.439231684514704e-06, "loss": 0.6757, "step": 109050 }, { "epoch": 18.538160802311747, "grad_norm": 11.299118041992188, "learning_rate": 2.436398662813757e-06, "loss": 0.6891, "step": 109060 }, { "epoch": 18.539860615332312, "grad_norm": 16.052860260009766, "learning_rate": 2.433565641112811e-06, "loss": 0.7194, "step": 109070 }, { "epoch": 18.54156042835288, "grad_norm": 10.784053802490234, "learning_rate": 2.430732619411865e-06, "loss": 0.7477, "step": 109080 }, { "epoch": 18.54326024137345, "grad_norm": 10.121094703674316, "learning_rate": 2.4278995977109183e-06, "loss": 0.6258, "step": 109090 }, { "epoch": 18.544960054394018, "grad_norm": 16.88119125366211, "learning_rate": 2.4250665760099723e-06, "loss": 0.7678, "step": 109100 }, { "epoch": 18.546659867414583, "grad_norm": 10.083139419555664, "learning_rate": 2.4222335543090262e-06, "loss": 0.5763, "step": 109110 }, { "epoch": 18.54835968043515, "grad_norm": 9.913654327392578, "learning_rate": 2.4194005326080798e-06, "loss": 0.6445, "step": 109120 }, { "epoch": 18.55005949345572, "grad_norm": 9.652383804321289, "learning_rate": 2.4165675109071337e-06, "loss": 0.7156, "step": 109130 }, { "epoch": 18.55175930647629, "grad_norm": 9.949335098266602, "learning_rate": 2.4137344892061877e-06, "loss": 0.6384, "step": 109140 }, { "epoch": 18.553459119496857, "grad_norm": 12.110393524169922, "learning_rate": 2.410901467505241e-06, "loss": 0.6785, "step": 109150 }, { "epoch": 18.555158932517422, "grad_norm": 12.173349380493164, "learning_rate": 2.4080684458042947e-06, "loss": 0.6406, "step": 109160 }, { "epoch": 18.55685874553799, "grad_norm": 10.49935245513916, "learning_rate": 2.4052354241033486e-06, "loss": 0.5441, "step": 109170 }, { "epoch": 18.55855855855856, "grad_norm": 11.543756484985352, "learning_rate": 2.4024024024024026e-06, "loss": 0.7146, "step": 109180 }, { "epoch": 18.560258371579128, "grad_norm": 14.814289093017578, "learning_rate": 2.399569380701456e-06, "loss": 0.6805, "step": 109190 }, { "epoch": 18.561958184599693, "grad_norm": 27.99530029296875, "learning_rate": 2.39673635900051e-06, "loss": 0.6195, "step": 109200 }, { "epoch": 18.56365799762026, "grad_norm": 11.375823974609375, "learning_rate": 2.393903337299564e-06, "loss": 0.5962, "step": 109210 }, { "epoch": 18.56535781064083, "grad_norm": 15.666093826293945, "learning_rate": 2.3910703155986175e-06, "loss": 0.7858, "step": 109220 }, { "epoch": 18.5670576236614, "grad_norm": 19.255434036254883, "learning_rate": 2.388237293897671e-06, "loss": 0.5608, "step": 109230 }, { "epoch": 18.568757436681963, "grad_norm": 13.468907356262207, "learning_rate": 2.385404272196725e-06, "loss": 0.6875, "step": 109240 }, { "epoch": 18.57045724970253, "grad_norm": 10.30046272277832, "learning_rate": 2.382571250495779e-06, "loss": 0.647, "step": 109250 }, { "epoch": 18.5721570627231, "grad_norm": 16.99895477294922, "learning_rate": 2.3797382287948325e-06, "loss": 0.6797, "step": 109260 }, { "epoch": 18.57385687574367, "grad_norm": 11.345566749572754, "learning_rate": 2.3769052070938864e-06, "loss": 0.7717, "step": 109270 }, { "epoch": 18.575556688764237, "grad_norm": 13.06345272064209, "learning_rate": 2.3740721853929403e-06, "loss": 0.6102, "step": 109280 }, { "epoch": 18.577256501784802, "grad_norm": 10.756585121154785, "learning_rate": 2.371239163691994e-06, "loss": 0.7964, "step": 109290 }, { "epoch": 18.57895631480537, "grad_norm": 11.005313873291016, "learning_rate": 2.368406141991048e-06, "loss": 0.7428, "step": 109300 }, { "epoch": 18.58065612782594, "grad_norm": 22.927339553833008, "learning_rate": 2.3655731202901018e-06, "loss": 0.6821, "step": 109310 }, { "epoch": 18.582355940846508, "grad_norm": 12.699686050415039, "learning_rate": 2.3627400985891553e-06, "loss": 0.6364, "step": 109320 }, { "epoch": 18.584055753867073, "grad_norm": 17.207353591918945, "learning_rate": 2.359907076888209e-06, "loss": 0.7218, "step": 109330 }, { "epoch": 18.58575556688764, "grad_norm": 13.30526351928711, "learning_rate": 2.3570740551872627e-06, "loss": 0.6486, "step": 109340 }, { "epoch": 18.58745537990821, "grad_norm": 15.583955764770508, "learning_rate": 2.3542410334863167e-06, "loss": 0.5705, "step": 109350 }, { "epoch": 18.58915519292878, "grad_norm": 14.278075218200684, "learning_rate": 2.3514080117853702e-06, "loss": 0.7553, "step": 109360 }, { "epoch": 18.590855005949347, "grad_norm": 13.13748550415039, "learning_rate": 2.348574990084424e-06, "loss": 0.5689, "step": 109370 }, { "epoch": 18.592554818969912, "grad_norm": 13.871536254882812, "learning_rate": 2.345741968383478e-06, "loss": 0.618, "step": 109380 }, { "epoch": 18.59425463199048, "grad_norm": 16.988460540771484, "learning_rate": 2.3429089466825316e-06, "loss": 0.6121, "step": 109390 }, { "epoch": 18.59595444501105, "grad_norm": 9.91679859161377, "learning_rate": 2.3400759249815856e-06, "loss": 0.5212, "step": 109400 }, { "epoch": 18.597654258031618, "grad_norm": 15.037026405334473, "learning_rate": 2.337242903280639e-06, "loss": 0.7817, "step": 109410 }, { "epoch": 18.599354071052183, "grad_norm": 10.841473579406738, "learning_rate": 2.334409881579693e-06, "loss": 0.5533, "step": 109420 }, { "epoch": 18.60105388407275, "grad_norm": 10.824895858764648, "learning_rate": 2.3315768598787466e-06, "loss": 0.7961, "step": 109430 }, { "epoch": 18.60275369709332, "grad_norm": 13.46741008758545, "learning_rate": 2.3287438381778005e-06, "loss": 0.6915, "step": 109440 }, { "epoch": 18.60445351011389, "grad_norm": 12.321673393249512, "learning_rate": 2.3259108164768545e-06, "loss": 0.5894, "step": 109450 }, { "epoch": 18.606153323134457, "grad_norm": 15.89731502532959, "learning_rate": 2.323077794775908e-06, "loss": 0.7161, "step": 109460 }, { "epoch": 18.607853136155022, "grad_norm": 14.323237419128418, "learning_rate": 2.320244773074962e-06, "loss": 0.8893, "step": 109470 }, { "epoch": 18.60955294917559, "grad_norm": 17.787879943847656, "learning_rate": 2.317411751374016e-06, "loss": 0.5728, "step": 109480 }, { "epoch": 18.61125276219616, "grad_norm": 19.783720016479492, "learning_rate": 2.314578729673069e-06, "loss": 0.6578, "step": 109490 }, { "epoch": 18.612952575216728, "grad_norm": 20.488187789916992, "learning_rate": 2.311745707972123e-06, "loss": 0.6146, "step": 109500 }, { "epoch": 18.614652388237293, "grad_norm": 14.610930442810059, "learning_rate": 2.308912686271177e-06, "loss": 0.8509, "step": 109510 }, { "epoch": 18.61635220125786, "grad_norm": 8.54058837890625, "learning_rate": 2.306079664570231e-06, "loss": 0.8039, "step": 109520 }, { "epoch": 18.61805201427843, "grad_norm": 14.553244590759277, "learning_rate": 2.3032466428692843e-06, "loss": 0.8149, "step": 109530 }, { "epoch": 18.619751827298998, "grad_norm": 15.115680694580078, "learning_rate": 2.3004136211683383e-06, "loss": 0.5345, "step": 109540 }, { "epoch": 18.621451640319563, "grad_norm": 12.29875373840332, "learning_rate": 2.297580599467392e-06, "loss": 0.6745, "step": 109550 }, { "epoch": 18.62315145334013, "grad_norm": 17.59662437438965, "learning_rate": 2.2947475777664457e-06, "loss": 0.6849, "step": 109560 }, { "epoch": 18.6248512663607, "grad_norm": 17.503572463989258, "learning_rate": 2.2919145560654997e-06, "loss": 0.7107, "step": 109570 }, { "epoch": 18.62655107938127, "grad_norm": 10.959151268005371, "learning_rate": 2.289081534364553e-06, "loss": 0.6345, "step": 109580 }, { "epoch": 18.628250892401837, "grad_norm": 11.695282936096191, "learning_rate": 2.2862485126636067e-06, "loss": 0.4948, "step": 109590 }, { "epoch": 18.629950705422402, "grad_norm": 12.096957206726074, "learning_rate": 2.2834154909626607e-06, "loss": 0.5728, "step": 109600 }, { "epoch": 18.63165051844297, "grad_norm": 11.488286972045898, "learning_rate": 2.2805824692617146e-06, "loss": 0.6753, "step": 109610 }, { "epoch": 18.63335033146354, "grad_norm": 41.49195098876953, "learning_rate": 2.2777494475607686e-06, "loss": 0.7712, "step": 109620 }, { "epoch": 18.635050144484108, "grad_norm": 12.270947456359863, "learning_rate": 2.274916425859822e-06, "loss": 0.6105, "step": 109630 }, { "epoch": 18.636749957504673, "grad_norm": 21.177804946899414, "learning_rate": 2.272083404158876e-06, "loss": 0.6682, "step": 109640 }, { "epoch": 18.63844977052524, "grad_norm": 14.427142143249512, "learning_rate": 2.26925038245793e-06, "loss": 0.7446, "step": 109650 }, { "epoch": 18.64014958354581, "grad_norm": 11.31387710571289, "learning_rate": 2.2664173607569835e-06, "loss": 0.8193, "step": 109660 }, { "epoch": 18.64184939656638, "grad_norm": 12.640203475952148, "learning_rate": 2.263584339056037e-06, "loss": 0.667, "step": 109670 }, { "epoch": 18.643549209586947, "grad_norm": 13.049812316894531, "learning_rate": 2.260751317355091e-06, "loss": 0.4732, "step": 109680 }, { "epoch": 18.645249022607512, "grad_norm": 11.693132400512695, "learning_rate": 2.257918295654145e-06, "loss": 0.757, "step": 109690 }, { "epoch": 18.64694883562808, "grad_norm": 10.748641967773438, "learning_rate": 2.2550852739531984e-06, "loss": 0.6753, "step": 109700 }, { "epoch": 18.64864864864865, "grad_norm": 11.927252769470215, "learning_rate": 2.2522522522522524e-06, "loss": 0.6061, "step": 109710 }, { "epoch": 18.650348461669218, "grad_norm": 16.162654876708984, "learning_rate": 2.2494192305513063e-06, "loss": 0.5932, "step": 109720 }, { "epoch": 18.652048274689783, "grad_norm": 15.435338020324707, "learning_rate": 2.24658620885036e-06, "loss": 0.7929, "step": 109730 }, { "epoch": 18.65374808771035, "grad_norm": 11.213724136352539, "learning_rate": 2.2437531871494138e-06, "loss": 0.4873, "step": 109740 }, { "epoch": 18.65544790073092, "grad_norm": 12.665424346923828, "learning_rate": 2.2409201654484673e-06, "loss": 0.6501, "step": 109750 }, { "epoch": 18.65714771375149, "grad_norm": 12.71850299835205, "learning_rate": 2.238087143747521e-06, "loss": 0.7415, "step": 109760 }, { "epoch": 18.658847526772057, "grad_norm": 8.437766075134277, "learning_rate": 2.2352541220465748e-06, "loss": 0.5112, "step": 109770 }, { "epoch": 18.660547339792622, "grad_norm": 14.072125434875488, "learning_rate": 2.2324211003456287e-06, "loss": 0.801, "step": 109780 }, { "epoch": 18.66224715281319, "grad_norm": 9.459383010864258, "learning_rate": 2.2295880786446827e-06, "loss": 0.6555, "step": 109790 }, { "epoch": 18.66394696583376, "grad_norm": 13.479005813598633, "learning_rate": 2.226755056943736e-06, "loss": 0.7989, "step": 109800 }, { "epoch": 18.665646778854327, "grad_norm": 10.062246322631836, "learning_rate": 2.22392203524279e-06, "loss": 0.6678, "step": 109810 }, { "epoch": 18.667346591874892, "grad_norm": 11.978510856628418, "learning_rate": 2.221089013541844e-06, "loss": 0.7346, "step": 109820 }, { "epoch": 18.66904640489546, "grad_norm": 13.906804084777832, "learning_rate": 2.2182559918408976e-06, "loss": 0.6539, "step": 109830 }, { "epoch": 18.67074621791603, "grad_norm": 11.731850624084473, "learning_rate": 2.215422970139951e-06, "loss": 0.5808, "step": 109840 }, { "epoch": 18.672446030936598, "grad_norm": 12.071517944335938, "learning_rate": 2.212589948439005e-06, "loss": 0.656, "step": 109850 }, { "epoch": 18.674145843957163, "grad_norm": 17.826459884643555, "learning_rate": 2.2097569267380586e-06, "loss": 0.628, "step": 109860 }, { "epoch": 18.67584565697773, "grad_norm": 13.675308227539062, "learning_rate": 2.2069239050371125e-06, "loss": 0.7058, "step": 109870 }, { "epoch": 18.6775454699983, "grad_norm": 13.8456449508667, "learning_rate": 2.2040908833361665e-06, "loss": 0.6118, "step": 109880 }, { "epoch": 18.67924528301887, "grad_norm": 12.457904815673828, "learning_rate": 2.2012578616352204e-06, "loss": 0.6733, "step": 109890 }, { "epoch": 18.680945096039437, "grad_norm": 9.920245170593262, "learning_rate": 2.198424839934274e-06, "loss": 0.6441, "step": 109900 }, { "epoch": 18.682644909060002, "grad_norm": 13.515439987182617, "learning_rate": 2.195591818233328e-06, "loss": 0.6596, "step": 109910 }, { "epoch": 18.68434472208057, "grad_norm": 15.127456665039062, "learning_rate": 2.1927587965323814e-06, "loss": 0.6519, "step": 109920 }, { "epoch": 18.68604453510114, "grad_norm": 10.099104881286621, "learning_rate": 2.189925774831435e-06, "loss": 0.7293, "step": 109930 }, { "epoch": 18.687744348121708, "grad_norm": 11.273537635803223, "learning_rate": 2.187092753130489e-06, "loss": 0.7656, "step": 109940 }, { "epoch": 18.689444161142273, "grad_norm": 11.31480598449707, "learning_rate": 2.184259731429543e-06, "loss": 0.4995, "step": 109950 }, { "epoch": 18.69114397416284, "grad_norm": 12.454044342041016, "learning_rate": 2.1814267097285968e-06, "loss": 0.7734, "step": 109960 }, { "epoch": 18.69284378718341, "grad_norm": 25.399314880371094, "learning_rate": 2.1785936880276503e-06, "loss": 0.7169, "step": 109970 }, { "epoch": 18.69454360020398, "grad_norm": 10.608716011047363, "learning_rate": 2.1757606663267042e-06, "loss": 0.6661, "step": 109980 }, { "epoch": 18.696243413224547, "grad_norm": 12.474044799804688, "learning_rate": 2.172927644625758e-06, "loss": 0.7151, "step": 109990 }, { "epoch": 18.697943226245112, "grad_norm": 10.231868743896484, "learning_rate": 2.1700946229248117e-06, "loss": 0.6031, "step": 110000 }, { "epoch": 18.69964303926568, "grad_norm": 13.714916229248047, "learning_rate": 2.1672616012238652e-06, "loss": 0.7069, "step": 110010 }, { "epoch": 18.70134285228625, "grad_norm": 15.989179611206055, "learning_rate": 2.164428579522919e-06, "loss": 0.8699, "step": 110020 }, { "epoch": 18.703042665306818, "grad_norm": 17.825794219970703, "learning_rate": 2.1615955578219727e-06, "loss": 0.5156, "step": 110030 }, { "epoch": 18.704742478327383, "grad_norm": 13.260734558105469, "learning_rate": 2.1587625361210266e-06, "loss": 0.7579, "step": 110040 }, { "epoch": 18.70644229134795, "grad_norm": 14.525283813476562, "learning_rate": 2.1559295144200806e-06, "loss": 0.7568, "step": 110050 }, { "epoch": 18.70814210436852, "grad_norm": 10.941263198852539, "learning_rate": 2.1530964927191345e-06, "loss": 0.5092, "step": 110060 }, { "epoch": 18.709841917389088, "grad_norm": 11.27065658569336, "learning_rate": 2.150263471018188e-06, "loss": 0.7078, "step": 110070 }, { "epoch": 18.711541730409657, "grad_norm": 16.218107223510742, "learning_rate": 2.147430449317242e-06, "loss": 0.6369, "step": 110080 }, { "epoch": 18.71324154343022, "grad_norm": 9.754741668701172, "learning_rate": 2.144597427616296e-06, "loss": 0.6906, "step": 110090 }, { "epoch": 18.71494135645079, "grad_norm": 24.522598266601562, "learning_rate": 2.141764405915349e-06, "loss": 0.6175, "step": 110100 }, { "epoch": 18.71664116947136, "grad_norm": 21.64860725402832, "learning_rate": 2.138931384214403e-06, "loss": 0.6383, "step": 110110 }, { "epoch": 18.718340982491927, "grad_norm": 11.830244064331055, "learning_rate": 2.136098362513457e-06, "loss": 0.6538, "step": 110120 }, { "epoch": 18.720040795512492, "grad_norm": 18.165788650512695, "learning_rate": 2.1332653408125105e-06, "loss": 0.7225, "step": 110130 }, { "epoch": 18.72174060853306, "grad_norm": 13.247288703918457, "learning_rate": 2.1304323191115644e-06, "loss": 0.7074, "step": 110140 }, { "epoch": 18.72344042155363, "grad_norm": 13.423798561096191, "learning_rate": 2.1275992974106183e-06, "loss": 0.5426, "step": 110150 }, { "epoch": 18.725140234574198, "grad_norm": 13.718534469604492, "learning_rate": 2.1247662757096723e-06, "loss": 0.7663, "step": 110160 }, { "epoch": 18.726840047594763, "grad_norm": 10.638605117797852, "learning_rate": 2.121933254008726e-06, "loss": 0.852, "step": 110170 }, { "epoch": 18.72853986061533, "grad_norm": 13.210268020629883, "learning_rate": 2.1191002323077793e-06, "loss": 0.5981, "step": 110180 }, { "epoch": 18.7302396736359, "grad_norm": 14.005560874938965, "learning_rate": 2.1162672106068333e-06, "loss": 0.6819, "step": 110190 }, { "epoch": 18.73193948665647, "grad_norm": 10.873401641845703, "learning_rate": 2.113434188905887e-06, "loss": 0.6454, "step": 110200 }, { "epoch": 18.733639299677037, "grad_norm": 12.198970794677734, "learning_rate": 2.1106011672049407e-06, "loss": 0.6277, "step": 110210 }, { "epoch": 18.735339112697602, "grad_norm": 14.920246124267578, "learning_rate": 2.1077681455039947e-06, "loss": 0.6371, "step": 110220 }, { "epoch": 18.73703892571817, "grad_norm": 14.85406494140625, "learning_rate": 2.104935123803048e-06, "loss": 0.7077, "step": 110230 }, { "epoch": 18.73873873873874, "grad_norm": 10.878179550170898, "learning_rate": 2.102102102102102e-06, "loss": 0.5966, "step": 110240 }, { "epoch": 18.740438551759308, "grad_norm": 8.644532203674316, "learning_rate": 2.099269080401156e-06, "loss": 0.6667, "step": 110250 }, { "epoch": 18.742138364779873, "grad_norm": 22.665328979492188, "learning_rate": 2.09643605870021e-06, "loss": 0.645, "step": 110260 }, { "epoch": 18.74383817780044, "grad_norm": 17.79946517944336, "learning_rate": 2.093603036999263e-06, "loss": 0.813, "step": 110270 }, { "epoch": 18.74553799082101, "grad_norm": 14.32571029663086, "learning_rate": 2.090770015298317e-06, "loss": 0.6762, "step": 110280 }, { "epoch": 18.74723780384158, "grad_norm": 13.185443878173828, "learning_rate": 2.087936993597371e-06, "loss": 0.6052, "step": 110290 }, { "epoch": 18.748937616862147, "grad_norm": 14.969944953918457, "learning_rate": 2.0851039718964246e-06, "loss": 0.8023, "step": 110300 }, { "epoch": 18.750637429882712, "grad_norm": 12.217276573181152, "learning_rate": 2.0822709501954785e-06, "loss": 0.6059, "step": 110310 }, { "epoch": 18.75233724290328, "grad_norm": 14.957794189453125, "learning_rate": 2.0794379284945324e-06, "loss": 0.6144, "step": 110320 }, { "epoch": 18.75403705592385, "grad_norm": 32.80960464477539, "learning_rate": 2.0766049067935864e-06, "loss": 0.7177, "step": 110330 }, { "epoch": 18.755736868944417, "grad_norm": 13.402711868286133, "learning_rate": 2.07377188509264e-06, "loss": 0.6396, "step": 110340 }, { "epoch": 18.757436681964982, "grad_norm": 12.421414375305176, "learning_rate": 2.070938863391694e-06, "loss": 0.892, "step": 110350 }, { "epoch": 18.75913649498555, "grad_norm": 16.102054595947266, "learning_rate": 2.0681058416907474e-06, "loss": 0.7195, "step": 110360 }, { "epoch": 18.76083630800612, "grad_norm": 13.859889030456543, "learning_rate": 2.065272819989801e-06, "loss": 0.6524, "step": 110370 }, { "epoch": 18.762536121026688, "grad_norm": 44.92290496826172, "learning_rate": 2.062439798288855e-06, "loss": 0.6702, "step": 110380 }, { "epoch": 18.764235934047253, "grad_norm": 12.178366661071777, "learning_rate": 2.059606776587909e-06, "loss": 0.6641, "step": 110390 }, { "epoch": 18.76593574706782, "grad_norm": 16.848005294799805, "learning_rate": 2.0567737548869623e-06, "loss": 0.6517, "step": 110400 }, { "epoch": 18.76763556008839, "grad_norm": 9.571760177612305, "learning_rate": 2.0539407331860163e-06, "loss": 0.5826, "step": 110410 }, { "epoch": 18.76933537310896, "grad_norm": 16.882617950439453, "learning_rate": 2.05110771148507e-06, "loss": 0.5964, "step": 110420 }, { "epoch": 18.771035186129527, "grad_norm": 12.014266967773438, "learning_rate": 2.048274689784124e-06, "loss": 0.5608, "step": 110430 }, { "epoch": 18.772734999150092, "grad_norm": 15.168807983398438, "learning_rate": 2.0454416680831773e-06, "loss": 0.6577, "step": 110440 }, { "epoch": 18.77443481217066, "grad_norm": 21.920045852661133, "learning_rate": 2.042608646382231e-06, "loss": 0.5758, "step": 110450 }, { "epoch": 18.77613462519123, "grad_norm": 13.731527328491211, "learning_rate": 2.039775624681285e-06, "loss": 0.6308, "step": 110460 }, { "epoch": 18.777834438211798, "grad_norm": 16.043991088867188, "learning_rate": 2.0369426029803387e-06, "loss": 0.5852, "step": 110470 }, { "epoch": 18.779534251232363, "grad_norm": 10.492969512939453, "learning_rate": 2.0341095812793926e-06, "loss": 0.6952, "step": 110480 }, { "epoch": 18.78123406425293, "grad_norm": 19.50147819519043, "learning_rate": 2.0312765595784466e-06, "loss": 0.7173, "step": 110490 }, { "epoch": 18.7829338772735, "grad_norm": 16.79318618774414, "learning_rate": 2.0284435378775e-06, "loss": 0.678, "step": 110500 }, { "epoch": 18.78463369029407, "grad_norm": 11.909316062927246, "learning_rate": 2.025610516176554e-06, "loss": 0.6992, "step": 110510 }, { "epoch": 18.786333503314637, "grad_norm": 15.521341323852539, "learning_rate": 2.022777494475608e-06, "loss": 0.5882, "step": 110520 }, { "epoch": 18.788033316335202, "grad_norm": 13.423686981201172, "learning_rate": 2.0199444727746615e-06, "loss": 0.7963, "step": 110530 }, { "epoch": 18.78973312935577, "grad_norm": 13.357772827148438, "learning_rate": 2.017111451073715e-06, "loss": 0.6405, "step": 110540 }, { "epoch": 18.79143294237634, "grad_norm": 12.318059921264648, "learning_rate": 2.014278429372769e-06, "loss": 0.7482, "step": 110550 }, { "epoch": 18.793132755396908, "grad_norm": 15.91976261138916, "learning_rate": 2.011445407671823e-06, "loss": 0.8568, "step": 110560 }, { "epoch": 18.794832568417473, "grad_norm": 13.402362823486328, "learning_rate": 2.0086123859708764e-06, "loss": 0.7809, "step": 110570 }, { "epoch": 18.79653238143804, "grad_norm": 17.234228134155273, "learning_rate": 2.0057793642699304e-06, "loss": 0.7763, "step": 110580 }, { "epoch": 18.79823219445861, "grad_norm": 12.50505542755127, "learning_rate": 2.0029463425689843e-06, "loss": 0.7019, "step": 110590 }, { "epoch": 18.79993200747918, "grad_norm": 12.889326095581055, "learning_rate": 2.0001133208680383e-06, "loss": 0.6081, "step": 110600 }, { "epoch": 18.801631820499743, "grad_norm": 19.797929763793945, "learning_rate": 1.9972802991670918e-06, "loss": 0.7289, "step": 110610 }, { "epoch": 18.803331633520312, "grad_norm": 12.4297456741333, "learning_rate": 1.9944472774661453e-06, "loss": 0.6667, "step": 110620 }, { "epoch": 18.80503144654088, "grad_norm": 12.399968147277832, "learning_rate": 1.9916142557651992e-06, "loss": 0.7249, "step": 110630 }, { "epoch": 18.80673125956145, "grad_norm": 13.935201644897461, "learning_rate": 1.9887812340642528e-06, "loss": 0.6586, "step": 110640 }, { "epoch": 18.808431072582017, "grad_norm": 14.483742713928223, "learning_rate": 1.9859482123633067e-06, "loss": 0.656, "step": 110650 }, { "epoch": 18.810130885602582, "grad_norm": 16.13223648071289, "learning_rate": 1.9831151906623607e-06, "loss": 0.6232, "step": 110660 }, { "epoch": 18.81183069862315, "grad_norm": 17.09195899963379, "learning_rate": 1.980282168961414e-06, "loss": 0.7793, "step": 110670 }, { "epoch": 18.81353051164372, "grad_norm": 17.807621002197266, "learning_rate": 1.977449147260468e-06, "loss": 0.7262, "step": 110680 }, { "epoch": 18.815230324664288, "grad_norm": 13.0647611618042, "learning_rate": 1.974616125559522e-06, "loss": 0.5656, "step": 110690 }, { "epoch": 18.816930137684853, "grad_norm": 14.410876274108887, "learning_rate": 1.9717831038585756e-06, "loss": 0.6501, "step": 110700 }, { "epoch": 18.81862995070542, "grad_norm": 14.130738258361816, "learning_rate": 1.968950082157629e-06, "loss": 0.5663, "step": 110710 }, { "epoch": 18.82032976372599, "grad_norm": 10.033867835998535, "learning_rate": 1.966117060456683e-06, "loss": 0.5525, "step": 110720 }, { "epoch": 18.82202957674656, "grad_norm": 16.896194458007812, "learning_rate": 1.963284038755737e-06, "loss": 0.8, "step": 110730 }, { "epoch": 18.823729389767127, "grad_norm": 12.753549575805664, "learning_rate": 1.9604510170547905e-06, "loss": 0.5964, "step": 110740 }, { "epoch": 18.825429202787692, "grad_norm": 13.110586166381836, "learning_rate": 1.9576179953538445e-06, "loss": 0.7513, "step": 110750 }, { "epoch": 18.82712901580826, "grad_norm": 10.530759811401367, "learning_rate": 1.9547849736528984e-06, "loss": 0.6127, "step": 110760 }, { "epoch": 18.82882882882883, "grad_norm": 39.927494049072266, "learning_rate": 1.951951951951952e-06, "loss": 0.8138, "step": 110770 }, { "epoch": 18.830528641849398, "grad_norm": 11.24643611907959, "learning_rate": 1.949118930251006e-06, "loss": 0.6447, "step": 110780 }, { "epoch": 18.832228454869963, "grad_norm": 13.251119613647461, "learning_rate": 1.9462859085500594e-06, "loss": 0.7587, "step": 110790 }, { "epoch": 18.83392826789053, "grad_norm": 11.720712661743164, "learning_rate": 1.9434528868491134e-06, "loss": 0.6908, "step": 110800 }, { "epoch": 18.8356280809111, "grad_norm": 18.73200798034668, "learning_rate": 1.940619865148167e-06, "loss": 0.5365, "step": 110810 }, { "epoch": 18.83732789393167, "grad_norm": 11.441621780395508, "learning_rate": 1.937786843447221e-06, "loss": 0.7229, "step": 110820 }, { "epoch": 18.839027706952237, "grad_norm": 28.008195877075195, "learning_rate": 1.9349538217462748e-06, "loss": 0.5625, "step": 110830 }, { "epoch": 18.840727519972802, "grad_norm": 13.281769752502441, "learning_rate": 1.9321208000453283e-06, "loss": 0.5946, "step": 110840 }, { "epoch": 18.84242733299337, "grad_norm": 11.231316566467285, "learning_rate": 1.9292877783443822e-06, "loss": 0.6508, "step": 110850 }, { "epoch": 18.84412714601394, "grad_norm": 10.360543251037598, "learning_rate": 1.926454756643436e-06, "loss": 0.5404, "step": 110860 }, { "epoch": 18.845826959034508, "grad_norm": 12.200214385986328, "learning_rate": 1.9236217349424897e-06, "loss": 0.6819, "step": 110870 }, { "epoch": 18.847526772055073, "grad_norm": 10.49976921081543, "learning_rate": 1.9207887132415432e-06, "loss": 0.5377, "step": 110880 }, { "epoch": 18.84922658507564, "grad_norm": 15.075289726257324, "learning_rate": 1.917955691540597e-06, "loss": 0.708, "step": 110890 }, { "epoch": 18.85092639809621, "grad_norm": 17.16350746154785, "learning_rate": 1.915122669839651e-06, "loss": 0.6877, "step": 110900 }, { "epoch": 18.852626211116778, "grad_norm": 13.770737648010254, "learning_rate": 1.9122896481387046e-06, "loss": 0.5897, "step": 110910 }, { "epoch": 18.854326024137343, "grad_norm": 12.028693199157715, "learning_rate": 1.9094566264377586e-06, "loss": 0.6268, "step": 110920 }, { "epoch": 18.85602583715791, "grad_norm": 10.674054145812988, "learning_rate": 1.9066236047368123e-06, "loss": 0.6976, "step": 110930 }, { "epoch": 18.85772565017848, "grad_norm": 12.003605842590332, "learning_rate": 1.9037905830358663e-06, "loss": 0.7083, "step": 110940 }, { "epoch": 18.85942546319905, "grad_norm": 14.279606819152832, "learning_rate": 1.90095756133492e-06, "loss": 0.6174, "step": 110950 }, { "epoch": 18.861125276219617, "grad_norm": 12.998806953430176, "learning_rate": 1.8981245396339735e-06, "loss": 0.617, "step": 110960 }, { "epoch": 18.862825089240182, "grad_norm": 38.58188247680664, "learning_rate": 1.8952915179330272e-06, "loss": 0.674, "step": 110970 }, { "epoch": 18.86452490226075, "grad_norm": 11.713929176330566, "learning_rate": 1.892458496232081e-06, "loss": 0.6348, "step": 110980 }, { "epoch": 18.86622471528132, "grad_norm": 13.387984275817871, "learning_rate": 1.889625474531135e-06, "loss": 0.6543, "step": 110990 }, { "epoch": 18.867924528301888, "grad_norm": 20.275489807128906, "learning_rate": 1.8867924528301887e-06, "loss": 0.6534, "step": 111000 }, { "epoch": 18.869624341322453, "grad_norm": 11.890373229980469, "learning_rate": 1.8839594311292426e-06, "loss": 0.6502, "step": 111010 }, { "epoch": 18.87132415434302, "grad_norm": 14.067229270935059, "learning_rate": 1.8811264094282963e-06, "loss": 0.5357, "step": 111020 }, { "epoch": 18.87302396736359, "grad_norm": 11.521566390991211, "learning_rate": 1.8782933877273503e-06, "loss": 0.5604, "step": 111030 }, { "epoch": 18.87472378038416, "grad_norm": 16.961999893188477, "learning_rate": 1.875460366026404e-06, "loss": 0.6704, "step": 111040 }, { "epoch": 18.876423593404727, "grad_norm": 15.589764595031738, "learning_rate": 1.8726273443254575e-06, "loss": 0.6669, "step": 111050 }, { "epoch": 18.878123406425292, "grad_norm": 12.276371002197266, "learning_rate": 1.8697943226245115e-06, "loss": 0.5785, "step": 111060 }, { "epoch": 18.87982321944586, "grad_norm": 11.8825101852417, "learning_rate": 1.866961300923565e-06, "loss": 0.617, "step": 111070 }, { "epoch": 18.88152303246643, "grad_norm": 21.43486213684082, "learning_rate": 1.864128279222619e-06, "loss": 0.7375, "step": 111080 }, { "epoch": 18.883222845486998, "grad_norm": 13.707063674926758, "learning_rate": 1.8612952575216727e-06, "loss": 0.7249, "step": 111090 }, { "epoch": 18.884922658507563, "grad_norm": 17.227659225463867, "learning_rate": 1.8584622358207264e-06, "loss": 0.7167, "step": 111100 }, { "epoch": 18.88662247152813, "grad_norm": 16.623701095581055, "learning_rate": 1.8556292141197802e-06, "loss": 0.614, "step": 111110 }, { "epoch": 18.8883222845487, "grad_norm": 17.963207244873047, "learning_rate": 1.8527961924188339e-06, "loss": 0.7745, "step": 111120 }, { "epoch": 18.89002209756927, "grad_norm": 11.816703796386719, "learning_rate": 1.8499631707178878e-06, "loss": 0.8145, "step": 111130 }, { "epoch": 18.891721910589837, "grad_norm": 14.137568473815918, "learning_rate": 1.8471301490169416e-06, "loss": 0.593, "step": 111140 }, { "epoch": 18.893421723610402, "grad_norm": 13.640721321105957, "learning_rate": 1.8442971273159953e-06, "loss": 0.6475, "step": 111150 }, { "epoch": 18.89512153663097, "grad_norm": 26.855289459228516, "learning_rate": 1.841464105615049e-06, "loss": 0.6752, "step": 111160 }, { "epoch": 18.89682134965154, "grad_norm": 11.026144027709961, "learning_rate": 1.8386310839141028e-06, "loss": 0.8442, "step": 111170 }, { "epoch": 18.898521162672107, "grad_norm": 13.930720329284668, "learning_rate": 1.8357980622131567e-06, "loss": 0.6707, "step": 111180 }, { "epoch": 18.900220975692672, "grad_norm": 15.222601890563965, "learning_rate": 1.8329650405122104e-06, "loss": 0.5251, "step": 111190 }, { "epoch": 18.90192078871324, "grad_norm": 14.079147338867188, "learning_rate": 1.830132018811264e-06, "loss": 0.5913, "step": 111200 }, { "epoch": 18.90362060173381, "grad_norm": 13.255675315856934, "learning_rate": 1.827298997110318e-06, "loss": 0.7773, "step": 111210 }, { "epoch": 18.905320414754378, "grad_norm": 15.104127883911133, "learning_rate": 1.8244659754093716e-06, "loss": 0.8393, "step": 111220 }, { "epoch": 18.907020227774943, "grad_norm": 12.7626314163208, "learning_rate": 1.8216329537084256e-06, "loss": 0.6897, "step": 111230 }, { "epoch": 18.90872004079551, "grad_norm": 17.830108642578125, "learning_rate": 1.8187999320074791e-06, "loss": 0.7597, "step": 111240 }, { "epoch": 18.91041985381608, "grad_norm": 10.277591705322266, "learning_rate": 1.8159669103065328e-06, "loss": 0.7722, "step": 111250 }, { "epoch": 18.91211966683665, "grad_norm": 13.444733619689941, "learning_rate": 1.8131338886055868e-06, "loss": 0.7084, "step": 111260 }, { "epoch": 18.913819479857217, "grad_norm": 10.326125144958496, "learning_rate": 1.8103008669046405e-06, "loss": 0.4982, "step": 111270 }, { "epoch": 18.915519292877782, "grad_norm": 16.41874122619629, "learning_rate": 1.8074678452036945e-06, "loss": 0.6003, "step": 111280 }, { "epoch": 18.91721910589835, "grad_norm": 17.491626739501953, "learning_rate": 1.804634823502748e-06, "loss": 0.5391, "step": 111290 }, { "epoch": 18.91891891891892, "grad_norm": 16.75188446044922, "learning_rate": 1.8018018018018017e-06, "loss": 0.589, "step": 111300 }, { "epoch": 18.920618731939488, "grad_norm": 11.617806434631348, "learning_rate": 1.7989687801008557e-06, "loss": 0.6039, "step": 111310 }, { "epoch": 18.922318544960053, "grad_norm": 13.805721282958984, "learning_rate": 1.7961357583999094e-06, "loss": 0.6658, "step": 111320 }, { "epoch": 18.92401835798062, "grad_norm": 15.10008430480957, "learning_rate": 1.7933027366989631e-06, "loss": 0.746, "step": 111330 }, { "epoch": 18.92571817100119, "grad_norm": 29.52974510192871, "learning_rate": 1.7904697149980169e-06, "loss": 0.6601, "step": 111340 }, { "epoch": 18.92741798402176, "grad_norm": 13.609736442565918, "learning_rate": 1.7876366932970708e-06, "loss": 0.5329, "step": 111350 }, { "epoch": 18.929117797042327, "grad_norm": 20.294498443603516, "learning_rate": 1.7848036715961246e-06, "loss": 0.7299, "step": 111360 }, { "epoch": 18.930817610062892, "grad_norm": 14.434770584106445, "learning_rate": 1.781970649895178e-06, "loss": 0.5891, "step": 111370 }, { "epoch": 18.93251742308346, "grad_norm": 11.493654251098633, "learning_rate": 1.779137628194232e-06, "loss": 0.7747, "step": 111380 }, { "epoch": 18.93421723610403, "grad_norm": 11.693155288696289, "learning_rate": 1.7763046064932858e-06, "loss": 0.6584, "step": 111390 }, { "epoch": 18.935917049124598, "grad_norm": 12.405132293701172, "learning_rate": 1.7734715847923397e-06, "loss": 0.7393, "step": 111400 }, { "epoch": 18.937616862145163, "grad_norm": 14.754964828491211, "learning_rate": 1.7706385630913934e-06, "loss": 0.6788, "step": 111410 }, { "epoch": 18.93931667516573, "grad_norm": 13.334320068359375, "learning_rate": 1.767805541390447e-06, "loss": 0.5156, "step": 111420 }, { "epoch": 18.9410164881863, "grad_norm": 10.886025428771973, "learning_rate": 1.764972519689501e-06, "loss": 0.621, "step": 111430 }, { "epoch": 18.942716301206868, "grad_norm": 12.523581504821777, "learning_rate": 1.7621394979885546e-06, "loss": 0.6545, "step": 111440 }, { "epoch": 18.944416114227437, "grad_norm": 12.612953186035156, "learning_rate": 1.7593064762876086e-06, "loss": 0.6218, "step": 111450 }, { "epoch": 18.946115927248, "grad_norm": 13.183771133422852, "learning_rate": 1.756473454586662e-06, "loss": 0.5875, "step": 111460 }, { "epoch": 18.94781574026857, "grad_norm": 12.027849197387695, "learning_rate": 1.7536404328857158e-06, "loss": 0.72, "step": 111470 }, { "epoch": 18.94951555328914, "grad_norm": 18.069047927856445, "learning_rate": 1.7508074111847698e-06, "loss": 0.6724, "step": 111480 }, { "epoch": 18.951215366309707, "grad_norm": 10.898741722106934, "learning_rate": 1.7479743894838235e-06, "loss": 0.6119, "step": 111490 }, { "epoch": 18.952915179330272, "grad_norm": 17.086376190185547, "learning_rate": 1.7451413677828772e-06, "loss": 0.5792, "step": 111500 }, { "epoch": 18.95461499235084, "grad_norm": 11.092029571533203, "learning_rate": 1.742308346081931e-06, "loss": 0.7485, "step": 111510 }, { "epoch": 18.95631480537141, "grad_norm": 7.049516677856445, "learning_rate": 1.7394753243809847e-06, "loss": 0.6237, "step": 111520 }, { "epoch": 18.958014618391978, "grad_norm": 13.328556060791016, "learning_rate": 1.7366423026800387e-06, "loss": 0.6106, "step": 111530 }, { "epoch": 18.959714431412543, "grad_norm": 13.669257164001465, "learning_rate": 1.7338092809790924e-06, "loss": 0.7444, "step": 111540 }, { "epoch": 18.96141424443311, "grad_norm": 10.99621295928955, "learning_rate": 1.7309762592781461e-06, "loss": 0.7718, "step": 111550 }, { "epoch": 18.96311405745368, "grad_norm": 10.408812522888184, "learning_rate": 1.7281432375771999e-06, "loss": 0.7229, "step": 111560 }, { "epoch": 18.96481387047425, "grad_norm": 16.29388999938965, "learning_rate": 1.7253102158762536e-06, "loss": 0.6684, "step": 111570 }, { "epoch": 18.966513683494817, "grad_norm": 24.344772338867188, "learning_rate": 1.7224771941753075e-06, "loss": 0.6575, "step": 111580 }, { "epoch": 18.968213496515382, "grad_norm": 12.048636436462402, "learning_rate": 1.719644172474361e-06, "loss": 0.6764, "step": 111590 }, { "epoch": 18.96991330953595, "grad_norm": 12.470648765563965, "learning_rate": 1.716811150773415e-06, "loss": 0.8845, "step": 111600 }, { "epoch": 18.97161312255652, "grad_norm": 15.771713256835938, "learning_rate": 1.7139781290724687e-06, "loss": 0.7653, "step": 111610 }, { "epoch": 18.973312935577088, "grad_norm": 14.787109375, "learning_rate": 1.7111451073715225e-06, "loss": 0.8135, "step": 111620 }, { "epoch": 18.975012748597653, "grad_norm": 15.680684089660645, "learning_rate": 1.7083120856705762e-06, "loss": 0.789, "step": 111630 }, { "epoch": 18.97671256161822, "grad_norm": 13.110064506530762, "learning_rate": 1.70547906396963e-06, "loss": 0.7085, "step": 111640 }, { "epoch": 18.97841237463879, "grad_norm": 12.273590087890625, "learning_rate": 1.7026460422686839e-06, "loss": 0.7612, "step": 111650 }, { "epoch": 18.98011218765936, "grad_norm": 12.26488971710205, "learning_rate": 1.6998130205677376e-06, "loss": 0.5844, "step": 111660 }, { "epoch": 18.981812000679927, "grad_norm": 13.243977546691895, "learning_rate": 1.6969799988667916e-06, "loss": 0.6377, "step": 111670 }, { "epoch": 18.983511813700492, "grad_norm": 19.24701690673828, "learning_rate": 1.694146977165845e-06, "loss": 0.7694, "step": 111680 }, { "epoch": 18.98521162672106, "grad_norm": 17.861431121826172, "learning_rate": 1.6913139554648988e-06, "loss": 0.5923, "step": 111690 }, { "epoch": 18.98691143974163, "grad_norm": 15.8347806930542, "learning_rate": 1.6884809337639528e-06, "loss": 0.6941, "step": 111700 }, { "epoch": 18.988611252762198, "grad_norm": 10.103588104248047, "learning_rate": 1.6856479120630065e-06, "loss": 0.6939, "step": 111710 }, { "epoch": 18.990311065782763, "grad_norm": 22.348562240600586, "learning_rate": 1.6828148903620602e-06, "loss": 0.661, "step": 111720 }, { "epoch": 18.99201087880333, "grad_norm": 10.907184600830078, "learning_rate": 1.679981868661114e-06, "loss": 0.6685, "step": 111730 }, { "epoch": 18.9937106918239, "grad_norm": 12.159514427185059, "learning_rate": 1.6771488469601677e-06, "loss": 0.4414, "step": 111740 }, { "epoch": 18.995410504844468, "grad_norm": 15.4380464553833, "learning_rate": 1.6743158252592216e-06, "loss": 0.5703, "step": 111750 }, { "epoch": 18.997110317865037, "grad_norm": 14.799690246582031, "learning_rate": 1.6714828035582752e-06, "loss": 0.6929, "step": 111760 }, { "epoch": 18.9988101308856, "grad_norm": 16.4720458984375, "learning_rate": 1.6686497818573291e-06, "loss": 0.6919, "step": 111770 }, { "epoch": 19.0, "eval_cer": 1.0, "eval_loss": 2.57195782661438, "eval_runtime": 1962.1474, "eval_samples_per_second": 0.24, "eval_steps_per_second": 0.24, "step": 111777 }, { "epoch": 19.00050994390617, "grad_norm": 13.03705883026123, "learning_rate": 1.6658167601563828e-06, "loss": 0.6532, "step": 111780 }, { "epoch": 19.00220975692674, "grad_norm": 10.241721153259277, "learning_rate": 1.6629837384554366e-06, "loss": 0.7706, "step": 111790 }, { "epoch": 19.003909569947307, "grad_norm": 12.017573356628418, "learning_rate": 1.6601507167544905e-06, "loss": 0.6348, "step": 111800 }, { "epoch": 19.005609382967872, "grad_norm": 9.484066009521484, "learning_rate": 1.657317695053544e-06, "loss": 0.6796, "step": 111810 }, { "epoch": 19.00730919598844, "grad_norm": 13.943903923034668, "learning_rate": 1.654484673352598e-06, "loss": 0.6684, "step": 111820 }, { "epoch": 19.00900900900901, "grad_norm": 13.806229591369629, "learning_rate": 1.6516516516516517e-06, "loss": 0.7226, "step": 111830 }, { "epoch": 19.010708822029578, "grad_norm": 13.068727493286133, "learning_rate": 1.6488186299507055e-06, "loss": 0.848, "step": 111840 }, { "epoch": 19.012408635050143, "grad_norm": 9.946537017822266, "learning_rate": 1.6459856082497592e-06, "loss": 0.6136, "step": 111850 }, { "epoch": 19.01410844807071, "grad_norm": 10.025041580200195, "learning_rate": 1.643152586548813e-06, "loss": 0.6702, "step": 111860 }, { "epoch": 19.01580826109128, "grad_norm": 11.95444393157959, "learning_rate": 1.6403195648478669e-06, "loss": 0.6051, "step": 111870 }, { "epoch": 19.01750807411185, "grad_norm": 20.212459564208984, "learning_rate": 1.6374865431469206e-06, "loss": 0.6563, "step": 111880 }, { "epoch": 19.019207887132417, "grad_norm": 14.346957206726074, "learning_rate": 1.6346535214459741e-06, "loss": 0.4629, "step": 111890 }, { "epoch": 19.020907700152982, "grad_norm": 14.04133415222168, "learning_rate": 1.631820499745028e-06, "loss": 0.6492, "step": 111900 }, { "epoch": 19.02260751317355, "grad_norm": 13.384540557861328, "learning_rate": 1.6289874780440818e-06, "loss": 0.7553, "step": 111910 }, { "epoch": 19.02430732619412, "grad_norm": 17.373037338256836, "learning_rate": 1.6261544563431358e-06, "loss": 0.6182, "step": 111920 }, { "epoch": 19.026007139214688, "grad_norm": 13.385838508605957, "learning_rate": 1.6233214346421895e-06, "loss": 0.6521, "step": 111930 }, { "epoch": 19.027706952235253, "grad_norm": 17.811870574951172, "learning_rate": 1.6204884129412432e-06, "loss": 0.6703, "step": 111940 }, { "epoch": 19.02940676525582, "grad_norm": 13.086507797241211, "learning_rate": 1.617655391240297e-06, "loss": 0.6001, "step": 111950 }, { "epoch": 19.03110657827639, "grad_norm": 9.462029457092285, "learning_rate": 1.6148223695393507e-06, "loss": 0.6285, "step": 111960 }, { "epoch": 19.03280639129696, "grad_norm": 12.266698837280273, "learning_rate": 1.6119893478384046e-06, "loss": 0.6742, "step": 111970 }, { "epoch": 19.034506204317527, "grad_norm": 16.883514404296875, "learning_rate": 1.6091563261374582e-06, "loss": 0.6575, "step": 111980 }, { "epoch": 19.036206017338092, "grad_norm": 9.68209171295166, "learning_rate": 1.606323304436512e-06, "loss": 0.4821, "step": 111990 }, { "epoch": 19.03790583035866, "grad_norm": 11.735025405883789, "learning_rate": 1.6034902827355658e-06, "loss": 0.5494, "step": 112000 }, { "epoch": 19.03960564337923, "grad_norm": 12.875890731811523, "learning_rate": 1.6006572610346196e-06, "loss": 0.5487, "step": 112010 }, { "epoch": 19.041305456399797, "grad_norm": 11.831185340881348, "learning_rate": 1.5978242393336733e-06, "loss": 0.7933, "step": 112020 }, { "epoch": 19.043005269420362, "grad_norm": 10.616230964660645, "learning_rate": 1.594991217632727e-06, "loss": 0.633, "step": 112030 }, { "epoch": 19.04470508244093, "grad_norm": 11.852148056030273, "learning_rate": 1.592158195931781e-06, "loss": 0.63, "step": 112040 }, { "epoch": 19.0464048954615, "grad_norm": 14.918673515319824, "learning_rate": 1.5893251742308347e-06, "loss": 0.6753, "step": 112050 }, { "epoch": 19.048104708482068, "grad_norm": 20.260093688964844, "learning_rate": 1.5864921525298884e-06, "loss": 0.6828, "step": 112060 }, { "epoch": 19.049804521502633, "grad_norm": 15.249872207641602, "learning_rate": 1.5836591308289422e-06, "loss": 0.6944, "step": 112070 }, { "epoch": 19.0515043345232, "grad_norm": 12.781586647033691, "learning_rate": 1.580826109127996e-06, "loss": 0.6336, "step": 112080 }, { "epoch": 19.05320414754377, "grad_norm": 13.453261375427246, "learning_rate": 1.5779930874270499e-06, "loss": 0.5412, "step": 112090 }, { "epoch": 19.05490396056434, "grad_norm": 13.564790725708008, "learning_rate": 1.5751600657261036e-06, "loss": 0.6013, "step": 112100 }, { "epoch": 19.056603773584907, "grad_norm": 14.46264362335205, "learning_rate": 1.5723270440251571e-06, "loss": 0.6933, "step": 112110 }, { "epoch": 19.058303586605472, "grad_norm": 12.500521659851074, "learning_rate": 1.569494022324211e-06, "loss": 0.6343, "step": 112120 }, { "epoch": 19.06000339962604, "grad_norm": 11.890130043029785, "learning_rate": 1.5666610006232648e-06, "loss": 0.672, "step": 112130 }, { "epoch": 19.06170321264661, "grad_norm": 28.730911254882812, "learning_rate": 1.5638279789223187e-06, "loss": 0.6632, "step": 112140 }, { "epoch": 19.063403025667178, "grad_norm": 12.56108283996582, "learning_rate": 1.5609949572213723e-06, "loss": 0.5168, "step": 112150 }, { "epoch": 19.065102838687743, "grad_norm": 13.098250389099121, "learning_rate": 1.558161935520426e-06, "loss": 0.532, "step": 112160 }, { "epoch": 19.06680265170831, "grad_norm": 10.517898559570312, "learning_rate": 1.55532891381948e-06, "loss": 0.6316, "step": 112170 }, { "epoch": 19.06850246472888, "grad_norm": 12.728433609008789, "learning_rate": 1.5524958921185337e-06, "loss": 0.5231, "step": 112180 }, { "epoch": 19.07020227774945, "grad_norm": 15.071660995483398, "learning_rate": 1.5496628704175874e-06, "loss": 0.6114, "step": 112190 }, { "epoch": 19.071902090770017, "grad_norm": 14.066155433654785, "learning_rate": 1.5468298487166411e-06, "loss": 0.6935, "step": 112200 }, { "epoch": 19.073601903790582, "grad_norm": 11.705646514892578, "learning_rate": 1.5439968270156949e-06, "loss": 0.5617, "step": 112210 }, { "epoch": 19.07530171681115, "grad_norm": 38.68169021606445, "learning_rate": 1.5411638053147488e-06, "loss": 0.6224, "step": 112220 }, { "epoch": 19.07700152983172, "grad_norm": 11.96179485321045, "learning_rate": 1.5383307836138026e-06, "loss": 0.6475, "step": 112230 }, { "epoch": 19.078701342852288, "grad_norm": 13.851309776306152, "learning_rate": 1.5354977619128563e-06, "loss": 0.737, "step": 112240 }, { "epoch": 19.080401155872853, "grad_norm": 19.600597381591797, "learning_rate": 1.53266474021191e-06, "loss": 0.5736, "step": 112250 }, { "epoch": 19.08210096889342, "grad_norm": 14.133719444274902, "learning_rate": 1.529831718510964e-06, "loss": 0.5455, "step": 112260 }, { "epoch": 19.08380078191399, "grad_norm": 12.847475051879883, "learning_rate": 1.5269986968100177e-06, "loss": 0.658, "step": 112270 }, { "epoch": 19.085500594934558, "grad_norm": 32.634002685546875, "learning_rate": 1.5241656751090712e-06, "loss": 0.7307, "step": 112280 }, { "epoch": 19.087200407955123, "grad_norm": 13.516305923461914, "learning_rate": 1.5213326534081252e-06, "loss": 0.5446, "step": 112290 }, { "epoch": 19.08890022097569, "grad_norm": 12.778170585632324, "learning_rate": 1.518499631707179e-06, "loss": 0.6372, "step": 112300 }, { "epoch": 19.09060003399626, "grad_norm": 11.823874473571777, "learning_rate": 1.5156666100062328e-06, "loss": 0.496, "step": 112310 }, { "epoch": 19.09229984701683, "grad_norm": 14.453436851501465, "learning_rate": 1.5128335883052864e-06, "loss": 0.6638, "step": 112320 }, { "epoch": 19.093999660037397, "grad_norm": 13.748140335083008, "learning_rate": 1.51000056660434e-06, "loss": 0.743, "step": 112330 }, { "epoch": 19.095699473057962, "grad_norm": 13.662365913391113, "learning_rate": 1.507167544903394e-06, "loss": 0.6084, "step": 112340 }, { "epoch": 19.09739928607853, "grad_norm": 11.00143814086914, "learning_rate": 1.5043345232024478e-06, "loss": 0.6159, "step": 112350 }, { "epoch": 19.0990990990991, "grad_norm": 14.929679870605469, "learning_rate": 1.5015015015015017e-06, "loss": 0.6943, "step": 112360 }, { "epoch": 19.100798912119668, "grad_norm": 12.88417911529541, "learning_rate": 1.4986684798005552e-06, "loss": 0.6792, "step": 112370 }, { "epoch": 19.102498725140233, "grad_norm": 11.206308364868164, "learning_rate": 1.495835458099609e-06, "loss": 0.5624, "step": 112380 }, { "epoch": 19.1041985381608, "grad_norm": 11.717973709106445, "learning_rate": 1.493002436398663e-06, "loss": 0.7554, "step": 112390 }, { "epoch": 19.10589835118137, "grad_norm": 35.756595611572266, "learning_rate": 1.4901694146977167e-06, "loss": 0.782, "step": 112400 }, { "epoch": 19.10759816420194, "grad_norm": 12.473265647888184, "learning_rate": 1.4873363929967704e-06, "loss": 0.6059, "step": 112410 }, { "epoch": 19.109297977222507, "grad_norm": 10.921462059020996, "learning_rate": 1.4845033712958241e-06, "loss": 0.5988, "step": 112420 }, { "epoch": 19.110997790243072, "grad_norm": 16.34193992614746, "learning_rate": 1.4816703495948779e-06, "loss": 0.6674, "step": 112430 }, { "epoch": 19.11269760326364, "grad_norm": 13.237831115722656, "learning_rate": 1.4788373278939318e-06, "loss": 0.5733, "step": 112440 }, { "epoch": 19.11439741628421, "grad_norm": 11.173090934753418, "learning_rate": 1.4760043061929853e-06, "loss": 0.6668, "step": 112450 }, { "epoch": 19.116097229304778, "grad_norm": 9.537894248962402, "learning_rate": 1.4731712844920393e-06, "loss": 0.4613, "step": 112460 }, { "epoch": 19.117797042325343, "grad_norm": 10.910737037658691, "learning_rate": 1.470338262791093e-06, "loss": 0.6131, "step": 112470 }, { "epoch": 19.11949685534591, "grad_norm": 7.5938873291015625, "learning_rate": 1.4675052410901467e-06, "loss": 0.6815, "step": 112480 }, { "epoch": 19.12119666836648, "grad_norm": 13.059316635131836, "learning_rate": 1.4646722193892007e-06, "loss": 0.6568, "step": 112490 }, { "epoch": 19.12289648138705, "grad_norm": 10.980835914611816, "learning_rate": 1.4618391976882542e-06, "loss": 0.6076, "step": 112500 }, { "epoch": 19.124596294407617, "grad_norm": 10.820881843566895, "learning_rate": 1.4590061759873081e-06, "loss": 0.7026, "step": 112510 }, { "epoch": 19.126296107428182, "grad_norm": 13.037320137023926, "learning_rate": 1.4561731542863619e-06, "loss": 0.6996, "step": 112520 }, { "epoch": 19.12799592044875, "grad_norm": 17.918365478515625, "learning_rate": 1.4533401325854156e-06, "loss": 0.7737, "step": 112530 }, { "epoch": 19.12969573346932, "grad_norm": 10.827301025390625, "learning_rate": 1.4505071108844694e-06, "loss": 0.6364, "step": 112540 }, { "epoch": 19.131395546489887, "grad_norm": 14.46486759185791, "learning_rate": 1.447674089183523e-06, "loss": 0.6379, "step": 112550 }, { "epoch": 19.133095359510452, "grad_norm": 16.729129791259766, "learning_rate": 1.444841067482577e-06, "loss": 0.6289, "step": 112560 }, { "epoch": 19.13479517253102, "grad_norm": 66.47807312011719, "learning_rate": 1.4420080457816308e-06, "loss": 0.5932, "step": 112570 }, { "epoch": 19.13649498555159, "grad_norm": 19.473217010498047, "learning_rate": 1.4391750240806845e-06, "loss": 0.4739, "step": 112580 }, { "epoch": 19.138194798572158, "grad_norm": 13.995234489440918, "learning_rate": 1.4363420023797382e-06, "loss": 0.7573, "step": 112590 }, { "epoch": 19.139894611592723, "grad_norm": 14.660945892333984, "learning_rate": 1.433508980678792e-06, "loss": 0.842, "step": 112600 }, { "epoch": 19.14159442461329, "grad_norm": 10.75835132598877, "learning_rate": 1.430675958977846e-06, "loss": 0.647, "step": 112610 }, { "epoch": 19.14329423763386, "grad_norm": 8.936498641967773, "learning_rate": 1.4278429372768996e-06, "loss": 0.6363, "step": 112620 }, { "epoch": 19.14499405065443, "grad_norm": 8.539198875427246, "learning_rate": 1.4250099155759534e-06, "loss": 0.6217, "step": 112630 }, { "epoch": 19.146693863674997, "grad_norm": 8.8765230178833, "learning_rate": 1.4221768938750071e-06, "loss": 0.6789, "step": 112640 }, { "epoch": 19.148393676695562, "grad_norm": 10.037188529968262, "learning_rate": 1.4193438721740608e-06, "loss": 0.6209, "step": 112650 }, { "epoch": 19.15009348971613, "grad_norm": 12.727951049804688, "learning_rate": 1.4165108504731148e-06, "loss": 0.7424, "step": 112660 }, { "epoch": 19.1517933027367, "grad_norm": 10.646370887756348, "learning_rate": 1.4136778287721683e-06, "loss": 0.6113, "step": 112670 }, { "epoch": 19.153493115757268, "grad_norm": 14.68985366821289, "learning_rate": 1.4108448070712223e-06, "loss": 0.7145, "step": 112680 }, { "epoch": 19.155192928777833, "grad_norm": 14.621682167053223, "learning_rate": 1.408011785370276e-06, "loss": 0.7303, "step": 112690 }, { "epoch": 19.1568927417984, "grad_norm": 14.700738906860352, "learning_rate": 1.4051787636693297e-06, "loss": 0.7428, "step": 112700 }, { "epoch": 19.15859255481897, "grad_norm": 13.84660816192627, "learning_rate": 1.4023457419683835e-06, "loss": 0.5899, "step": 112710 }, { "epoch": 19.16029236783954, "grad_norm": 12.988057136535645, "learning_rate": 1.3995127202674372e-06, "loss": 0.4412, "step": 112720 }, { "epoch": 19.161992180860107, "grad_norm": 15.06737232208252, "learning_rate": 1.3966796985664911e-06, "loss": 0.5227, "step": 112730 }, { "epoch": 19.163691993880672, "grad_norm": 14.116692543029785, "learning_rate": 1.3938466768655449e-06, "loss": 0.6498, "step": 112740 }, { "epoch": 19.16539180690124, "grad_norm": 9.603267669677734, "learning_rate": 1.3910136551645986e-06, "loss": 0.6301, "step": 112750 }, { "epoch": 19.16709161992181, "grad_norm": 12.137751579284668, "learning_rate": 1.3881806334636523e-06, "loss": 0.6003, "step": 112760 }, { "epoch": 19.168791432942378, "grad_norm": 15.028119087219238, "learning_rate": 1.385347611762706e-06, "loss": 0.5793, "step": 112770 }, { "epoch": 19.170491245962943, "grad_norm": 10.449393272399902, "learning_rate": 1.38251459006176e-06, "loss": 0.5913, "step": 112780 }, { "epoch": 19.17219105898351, "grad_norm": 10.541220664978027, "learning_rate": 1.3796815683608137e-06, "loss": 0.632, "step": 112790 }, { "epoch": 19.17389087200408, "grad_norm": 16.751506805419922, "learning_rate": 1.3768485466598673e-06, "loss": 0.6948, "step": 112800 }, { "epoch": 19.17559068502465, "grad_norm": 9.976712226867676, "learning_rate": 1.3740155249589212e-06, "loss": 0.722, "step": 112810 }, { "epoch": 19.177290498045217, "grad_norm": 17.021900177001953, "learning_rate": 1.371182503257975e-06, "loss": 0.6701, "step": 112820 }, { "epoch": 19.178990311065782, "grad_norm": 14.747118949890137, "learning_rate": 1.368349481557029e-06, "loss": 0.7488, "step": 112830 }, { "epoch": 19.18069012408635, "grad_norm": 15.213937759399414, "learning_rate": 1.3655164598560824e-06, "loss": 0.8113, "step": 112840 }, { "epoch": 19.18238993710692, "grad_norm": 13.906622886657715, "learning_rate": 1.3626834381551361e-06, "loss": 0.7155, "step": 112850 }, { "epoch": 19.184089750127487, "grad_norm": 12.66335678100586, "learning_rate": 1.35985041645419e-06, "loss": 0.6003, "step": 112860 }, { "epoch": 19.185789563148052, "grad_norm": 14.830430030822754, "learning_rate": 1.3570173947532438e-06, "loss": 0.6202, "step": 112870 }, { "epoch": 19.18748937616862, "grad_norm": 12.736542701721191, "learning_rate": 1.3541843730522978e-06, "loss": 0.6272, "step": 112880 }, { "epoch": 19.18918918918919, "grad_norm": 17.898069381713867, "learning_rate": 1.3513513513513513e-06, "loss": 0.876, "step": 112890 }, { "epoch": 19.190889002209758, "grad_norm": 17.83774757385254, "learning_rate": 1.3485183296504052e-06, "loss": 0.629, "step": 112900 }, { "epoch": 19.192588815230323, "grad_norm": 8.75322437286377, "learning_rate": 1.345685307949459e-06, "loss": 0.5674, "step": 112910 }, { "epoch": 19.19428862825089, "grad_norm": 22.434608459472656, "learning_rate": 1.3428522862485127e-06, "loss": 0.6335, "step": 112920 }, { "epoch": 19.19598844127146, "grad_norm": 14.518367767333984, "learning_rate": 1.3400192645475664e-06, "loss": 0.6129, "step": 112930 }, { "epoch": 19.19768825429203, "grad_norm": 12.313422203063965, "learning_rate": 1.3371862428466202e-06, "loss": 0.5021, "step": 112940 }, { "epoch": 19.199388067312597, "grad_norm": 8.774840354919434, "learning_rate": 1.3343532211456741e-06, "loss": 0.5297, "step": 112950 }, { "epoch": 19.201087880333162, "grad_norm": 15.788919448852539, "learning_rate": 1.3315201994447279e-06, "loss": 0.7956, "step": 112960 }, { "epoch": 19.20278769335373, "grad_norm": 10.659158706665039, "learning_rate": 1.3286871777437814e-06, "loss": 0.5727, "step": 112970 }, { "epoch": 19.2044875063743, "grad_norm": 12.733697891235352, "learning_rate": 1.3258541560428353e-06, "loss": 0.7463, "step": 112980 }, { "epoch": 19.206187319394868, "grad_norm": 16.046232223510742, "learning_rate": 1.323021134341889e-06, "loss": 0.6295, "step": 112990 }, { "epoch": 19.207887132415433, "grad_norm": 11.918023109436035, "learning_rate": 1.320188112640943e-06, "loss": 0.5631, "step": 113000 }, { "epoch": 19.209586945436, "grad_norm": 11.690778732299805, "learning_rate": 1.3173550909399967e-06, "loss": 0.5427, "step": 113010 }, { "epoch": 19.21128675845657, "grad_norm": 11.949613571166992, "learning_rate": 1.3145220692390503e-06, "loss": 0.6356, "step": 113020 }, { "epoch": 19.21298657147714, "grad_norm": 14.11694622039795, "learning_rate": 1.3116890475381042e-06, "loss": 0.8254, "step": 113030 }, { "epoch": 19.214686384497707, "grad_norm": 15.707200050354004, "learning_rate": 1.308856025837158e-06, "loss": 0.6859, "step": 113040 }, { "epoch": 19.216386197518272, "grad_norm": 13.606523513793945, "learning_rate": 1.3060230041362119e-06, "loss": 0.7089, "step": 113050 }, { "epoch": 19.21808601053884, "grad_norm": 10.415746688842773, "learning_rate": 1.3031899824352654e-06, "loss": 0.4929, "step": 113060 }, { "epoch": 19.21978582355941, "grad_norm": 14.804309844970703, "learning_rate": 1.3003569607343191e-06, "loss": 0.7082, "step": 113070 }, { "epoch": 19.221485636579978, "grad_norm": 14.272875785827637, "learning_rate": 1.297523939033373e-06, "loss": 0.6841, "step": 113080 }, { "epoch": 19.223185449600543, "grad_norm": 10.597532272338867, "learning_rate": 1.2946909173324268e-06, "loss": 0.5548, "step": 113090 }, { "epoch": 19.22488526262111, "grad_norm": 14.530736923217773, "learning_rate": 1.2918578956314805e-06, "loss": 0.6655, "step": 113100 }, { "epoch": 19.22658507564168, "grad_norm": 12.107823371887207, "learning_rate": 1.2890248739305343e-06, "loss": 0.5971, "step": 113110 }, { "epoch": 19.228284888662248, "grad_norm": 9.55041217803955, "learning_rate": 1.286191852229588e-06, "loss": 0.5307, "step": 113120 }, { "epoch": 19.229984701682813, "grad_norm": 15.193769454956055, "learning_rate": 1.283358830528642e-06, "loss": 0.7067, "step": 113130 }, { "epoch": 19.23168451470338, "grad_norm": 12.919928550720215, "learning_rate": 1.2805258088276957e-06, "loss": 0.6033, "step": 113140 }, { "epoch": 19.23338432772395, "grad_norm": 77.31128692626953, "learning_rate": 1.2776927871267494e-06, "loss": 0.7114, "step": 113150 }, { "epoch": 19.23508414074452, "grad_norm": 19.02301788330078, "learning_rate": 1.2748597654258032e-06, "loss": 0.7003, "step": 113160 }, { "epoch": 19.236783953765087, "grad_norm": 11.321579933166504, "learning_rate": 1.272026743724857e-06, "loss": 0.5437, "step": 113170 }, { "epoch": 19.238483766785652, "grad_norm": 10.605307579040527, "learning_rate": 1.2691937220239108e-06, "loss": 0.7587, "step": 113180 }, { "epoch": 19.24018357980622, "grad_norm": 10.946444511413574, "learning_rate": 1.2663607003229644e-06, "loss": 0.6626, "step": 113190 }, { "epoch": 19.24188339282679, "grad_norm": 24.866289138793945, "learning_rate": 1.2635276786220183e-06, "loss": 0.6427, "step": 113200 }, { "epoch": 19.243583205847358, "grad_norm": 9.628180503845215, "learning_rate": 1.260694656921072e-06, "loss": 0.5209, "step": 113210 }, { "epoch": 19.245283018867923, "grad_norm": 12.619765281677246, "learning_rate": 1.257861635220126e-06, "loss": 0.7771, "step": 113220 }, { "epoch": 19.24698283188849, "grad_norm": 13.829896926879883, "learning_rate": 1.2550286135191795e-06, "loss": 0.6739, "step": 113230 }, { "epoch": 19.24868264490906, "grad_norm": 19.88239288330078, "learning_rate": 1.2521955918182332e-06, "loss": 0.5976, "step": 113240 }, { "epoch": 19.25038245792963, "grad_norm": 13.969345092773438, "learning_rate": 1.2493625701172872e-06, "loss": 0.7116, "step": 113250 }, { "epoch": 19.252082270950197, "grad_norm": 12.835433959960938, "learning_rate": 1.246529548416341e-06, "loss": 0.7393, "step": 113260 }, { "epoch": 19.253782083970762, "grad_norm": 13.495918273925781, "learning_rate": 1.2436965267153949e-06, "loss": 0.6487, "step": 113270 }, { "epoch": 19.25548189699133, "grad_norm": 11.7741060256958, "learning_rate": 1.2408635050144484e-06, "loss": 0.7799, "step": 113280 }, { "epoch": 19.2571817100119, "grad_norm": 11.558869361877441, "learning_rate": 1.2380304833135021e-06, "loss": 0.5928, "step": 113290 }, { "epoch": 19.258881523032468, "grad_norm": 11.157936096191406, "learning_rate": 1.235197461612556e-06, "loss": 0.6664, "step": 113300 }, { "epoch": 19.260581336053033, "grad_norm": 11.870532035827637, "learning_rate": 1.2323644399116098e-06, "loss": 0.4768, "step": 113310 }, { "epoch": 19.2622811490736, "grad_norm": 11.323569297790527, "learning_rate": 1.2295314182106635e-06, "loss": 0.6333, "step": 113320 }, { "epoch": 19.26398096209417, "grad_norm": 14.495512008666992, "learning_rate": 1.2266983965097173e-06, "loss": 0.7674, "step": 113330 }, { "epoch": 19.26568077511474, "grad_norm": 12.762264251708984, "learning_rate": 1.223865374808771e-06, "loss": 0.7199, "step": 113340 }, { "epoch": 19.267380588135307, "grad_norm": 13.388998985290527, "learning_rate": 1.221032353107825e-06, "loss": 0.6902, "step": 113350 }, { "epoch": 19.269080401155872, "grad_norm": 9.74078369140625, "learning_rate": 1.2181993314068785e-06, "loss": 0.7, "step": 113360 }, { "epoch": 19.27078021417644, "grad_norm": 12.500041007995605, "learning_rate": 1.2153663097059324e-06, "loss": 0.7202, "step": 113370 }, { "epoch": 19.27248002719701, "grad_norm": 9.497093200683594, "learning_rate": 1.2125332880049861e-06, "loss": 0.513, "step": 113380 }, { "epoch": 19.274179840217577, "grad_norm": 54.01585388183594, "learning_rate": 1.2097002663040399e-06, "loss": 0.6315, "step": 113390 }, { "epoch": 19.275879653238142, "grad_norm": 15.72019100189209, "learning_rate": 1.2068672446030938e-06, "loss": 0.6635, "step": 113400 }, { "epoch": 19.27757946625871, "grad_norm": 12.930865287780762, "learning_rate": 1.2040342229021473e-06, "loss": 0.5273, "step": 113410 }, { "epoch": 19.27927927927928, "grad_norm": 12.026968955993652, "learning_rate": 1.2012012012012013e-06, "loss": 0.5431, "step": 113420 }, { "epoch": 19.280979092299848, "grad_norm": 14.030369758605957, "learning_rate": 1.198368179500255e-06, "loss": 0.665, "step": 113430 }, { "epoch": 19.282678905320413, "grad_norm": 46.04462432861328, "learning_rate": 1.1955351577993088e-06, "loss": 0.6596, "step": 113440 }, { "epoch": 19.28437871834098, "grad_norm": 11.273436546325684, "learning_rate": 1.1927021360983625e-06, "loss": 0.6676, "step": 113450 }, { "epoch": 19.28607853136155, "grad_norm": 12.50796890258789, "learning_rate": 1.1898691143974162e-06, "loss": 0.6757, "step": 113460 }, { "epoch": 19.28777834438212, "grad_norm": 12.997644424438477, "learning_rate": 1.1870360926964702e-06, "loss": 0.5482, "step": 113470 }, { "epoch": 19.289478157402687, "grad_norm": 12.653881072998047, "learning_rate": 1.184203070995524e-06, "loss": 0.6161, "step": 113480 }, { "epoch": 19.291177970423252, "grad_norm": 11.417551040649414, "learning_rate": 1.1813700492945776e-06, "loss": 0.6311, "step": 113490 }, { "epoch": 19.29287778344382, "grad_norm": 14.609889030456543, "learning_rate": 1.1785370275936314e-06, "loss": 0.8336, "step": 113500 }, { "epoch": 19.29457759646439, "grad_norm": 14.230549812316895, "learning_rate": 1.1757040058926851e-06, "loss": 0.4947, "step": 113510 }, { "epoch": 19.296277409484958, "grad_norm": 10.932921409606934, "learning_rate": 1.172870984191739e-06, "loss": 0.7072, "step": 113520 }, { "epoch": 19.297977222505523, "grad_norm": 13.398268699645996, "learning_rate": 1.1700379624907928e-06, "loss": 0.771, "step": 113530 }, { "epoch": 19.29967703552609, "grad_norm": 18.96868896484375, "learning_rate": 1.1672049407898465e-06, "loss": 0.4985, "step": 113540 }, { "epoch": 19.30137684854666, "grad_norm": 11.655516624450684, "learning_rate": 1.1643719190889003e-06, "loss": 0.7164, "step": 113550 }, { "epoch": 19.30307666156723, "grad_norm": 12.381688117980957, "learning_rate": 1.161538897387954e-06, "loss": 0.697, "step": 113560 }, { "epoch": 19.304776474587797, "grad_norm": 15.557618141174316, "learning_rate": 1.158705875687008e-06, "loss": 0.5982, "step": 113570 }, { "epoch": 19.306476287608362, "grad_norm": 12.775697708129883, "learning_rate": 1.1558728539860615e-06, "loss": 0.5751, "step": 113580 }, { "epoch": 19.30817610062893, "grad_norm": 10.473443984985352, "learning_rate": 1.1530398322851154e-06, "loss": 0.5621, "step": 113590 }, { "epoch": 19.3098759136495, "grad_norm": 13.823132514953613, "learning_rate": 1.1502068105841691e-06, "loss": 0.6191, "step": 113600 }, { "epoch": 19.311575726670068, "grad_norm": 11.998457908630371, "learning_rate": 1.1473737888832229e-06, "loss": 0.6979, "step": 113610 }, { "epoch": 19.313275539690633, "grad_norm": 24.864349365234375, "learning_rate": 1.1445407671822766e-06, "loss": 0.6415, "step": 113620 }, { "epoch": 19.3149753527112, "grad_norm": 13.040392875671387, "learning_rate": 1.1417077454813303e-06, "loss": 0.6308, "step": 113630 }, { "epoch": 19.31667516573177, "grad_norm": 14.020249366760254, "learning_rate": 1.1388747237803843e-06, "loss": 0.8053, "step": 113640 }, { "epoch": 19.318374978752338, "grad_norm": 10.625869750976562, "learning_rate": 1.136041702079438e-06, "loss": 0.6514, "step": 113650 }, { "epoch": 19.320074791772903, "grad_norm": 17.148542404174805, "learning_rate": 1.1332086803784917e-06, "loss": 0.7272, "step": 113660 }, { "epoch": 19.32177460479347, "grad_norm": 10.500383377075195, "learning_rate": 1.1303756586775455e-06, "loss": 0.7238, "step": 113670 }, { "epoch": 19.32347441781404, "grad_norm": 15.258406639099121, "learning_rate": 1.1275426369765992e-06, "loss": 0.5747, "step": 113680 }, { "epoch": 19.32517423083461, "grad_norm": 21.19813346862793, "learning_rate": 1.1247096152756532e-06, "loss": 0.6297, "step": 113690 }, { "epoch": 19.326874043855177, "grad_norm": 17.1429443359375, "learning_rate": 1.1218765935747069e-06, "loss": 0.5949, "step": 113700 }, { "epoch": 19.328573856875742, "grad_norm": 11.503506660461426, "learning_rate": 1.1190435718737604e-06, "loss": 0.579, "step": 113710 }, { "epoch": 19.33027366989631, "grad_norm": 14.335367202758789, "learning_rate": 1.1162105501728144e-06, "loss": 0.6145, "step": 113720 }, { "epoch": 19.33197348291688, "grad_norm": 12.512152671813965, "learning_rate": 1.113377528471868e-06, "loss": 0.5979, "step": 113730 }, { "epoch": 19.333673295937448, "grad_norm": 15.435856819152832, "learning_rate": 1.110544506770922e-06, "loss": 0.6617, "step": 113740 }, { "epoch": 19.335373108958013, "grad_norm": 10.810848236083984, "learning_rate": 1.1077114850699756e-06, "loss": 0.5027, "step": 113750 }, { "epoch": 19.33707292197858, "grad_norm": 19.93914222717285, "learning_rate": 1.1048784633690293e-06, "loss": 0.5562, "step": 113760 }, { "epoch": 19.33877273499915, "grad_norm": 12.122425079345703, "learning_rate": 1.1020454416680832e-06, "loss": 0.6496, "step": 113770 }, { "epoch": 19.34047254801972, "grad_norm": 12.349194526672363, "learning_rate": 1.099212419967137e-06, "loss": 0.8316, "step": 113780 }, { "epoch": 19.342172361040287, "grad_norm": 15.640878677368164, "learning_rate": 1.0963793982661907e-06, "loss": 0.5964, "step": 113790 }, { "epoch": 19.343872174060852, "grad_norm": 8.98163890838623, "learning_rate": 1.0935463765652444e-06, "loss": 0.6362, "step": 113800 }, { "epoch": 19.34557198708142, "grad_norm": 12.239300727844238, "learning_rate": 1.0907133548642984e-06, "loss": 0.6144, "step": 113810 }, { "epoch": 19.34727180010199, "grad_norm": 11.811111450195312, "learning_rate": 1.0878803331633521e-06, "loss": 0.582, "step": 113820 }, { "epoch": 19.348971613122558, "grad_norm": 16.110029220581055, "learning_rate": 1.0850473114624059e-06, "loss": 0.7202, "step": 113830 }, { "epoch": 19.350671426143123, "grad_norm": 17.67813491821289, "learning_rate": 1.0822142897614596e-06, "loss": 0.5793, "step": 113840 }, { "epoch": 19.35237123916369, "grad_norm": 9.74953842163086, "learning_rate": 1.0793812680605133e-06, "loss": 0.4885, "step": 113850 }, { "epoch": 19.35407105218426, "grad_norm": 8.826353073120117, "learning_rate": 1.0765482463595673e-06, "loss": 0.7457, "step": 113860 }, { "epoch": 19.35577086520483, "grad_norm": 14.154044151306152, "learning_rate": 1.073715224658621e-06, "loss": 0.7355, "step": 113870 }, { "epoch": 19.357470678225397, "grad_norm": 10.981169700622559, "learning_rate": 1.0708822029576745e-06, "loss": 0.7489, "step": 113880 }, { "epoch": 19.359170491245962, "grad_norm": 10.55450439453125, "learning_rate": 1.0680491812567285e-06, "loss": 0.8675, "step": 113890 }, { "epoch": 19.36087030426653, "grad_norm": 16.21936798095703, "learning_rate": 1.0652161595557822e-06, "loss": 0.6931, "step": 113900 }, { "epoch": 19.3625701172871, "grad_norm": 19.39954948425293, "learning_rate": 1.0623831378548361e-06, "loss": 0.5502, "step": 113910 }, { "epoch": 19.364269930307668, "grad_norm": 18.013273239135742, "learning_rate": 1.0595501161538897e-06, "loss": 0.66, "step": 113920 }, { "epoch": 19.365969743328233, "grad_norm": 11.754866600036621, "learning_rate": 1.0567170944529434e-06, "loss": 0.5734, "step": 113930 }, { "epoch": 19.3676695563488, "grad_norm": 14.219246864318848, "learning_rate": 1.0538840727519973e-06, "loss": 0.6522, "step": 113940 }, { "epoch": 19.36936936936937, "grad_norm": 11.602063179016113, "learning_rate": 1.051051051051051e-06, "loss": 0.6003, "step": 113950 }, { "epoch": 19.371069182389938, "grad_norm": 11.005182266235352, "learning_rate": 1.048218029350105e-06, "loss": 0.6076, "step": 113960 }, { "epoch": 19.372768995410503, "grad_norm": 10.33104133605957, "learning_rate": 1.0453850076491585e-06, "loss": 0.6065, "step": 113970 }, { "epoch": 19.37446880843107, "grad_norm": 13.531607627868652, "learning_rate": 1.0425519859482123e-06, "loss": 0.555, "step": 113980 }, { "epoch": 19.37616862145164, "grad_norm": 13.06892204284668, "learning_rate": 1.0397189642472662e-06, "loss": 0.7046, "step": 113990 }, { "epoch": 19.37786843447221, "grad_norm": 18.628068923950195, "learning_rate": 1.03688594254632e-06, "loss": 0.772, "step": 114000 }, { "epoch": 19.379568247492777, "grad_norm": 12.09196949005127, "learning_rate": 1.0340529208453737e-06, "loss": 0.6286, "step": 114010 }, { "epoch": 19.381268060513342, "grad_norm": 12.42335033416748, "learning_rate": 1.0312198991444274e-06, "loss": 0.6413, "step": 114020 }, { "epoch": 19.38296787353391, "grad_norm": 14.070100784301758, "learning_rate": 1.0283868774434812e-06, "loss": 0.5804, "step": 114030 }, { "epoch": 19.38466768655448, "grad_norm": 15.79864501953125, "learning_rate": 1.025553855742535e-06, "loss": 0.6988, "step": 114040 }, { "epoch": 19.386367499575048, "grad_norm": 12.870453834533691, "learning_rate": 1.0227208340415886e-06, "loss": 0.7741, "step": 114050 }, { "epoch": 19.388067312595613, "grad_norm": 11.304689407348633, "learning_rate": 1.0198878123406426e-06, "loss": 0.5222, "step": 114060 }, { "epoch": 19.38976712561618, "grad_norm": 19.933956146240234, "learning_rate": 1.0170547906396963e-06, "loss": 0.574, "step": 114070 }, { "epoch": 19.39146693863675, "grad_norm": 15.31563663482666, "learning_rate": 1.01422176893875e-06, "loss": 0.7894, "step": 114080 }, { "epoch": 19.39316675165732, "grad_norm": 14.262803077697754, "learning_rate": 1.011388747237804e-06, "loss": 0.7159, "step": 114090 }, { "epoch": 19.394866564677887, "grad_norm": 8.667842864990234, "learning_rate": 1.0085557255368575e-06, "loss": 0.6086, "step": 114100 }, { "epoch": 19.396566377698452, "grad_norm": 16.16349220275879, "learning_rate": 1.0057227038359115e-06, "loss": 0.5544, "step": 114110 }, { "epoch": 19.39826619071902, "grad_norm": 13.84176254272461, "learning_rate": 1.0028896821349652e-06, "loss": 0.7377, "step": 114120 }, { "epoch": 19.39996600373959, "grad_norm": 19.56430435180664, "learning_rate": 1.0000566604340191e-06, "loss": 0.7512, "step": 114130 }, { "epoch": 19.401665816760158, "grad_norm": 14.642330169677734, "learning_rate": 9.972236387330727e-07, "loss": 0.5751, "step": 114140 }, { "epoch": 19.403365629780723, "grad_norm": 14.706603050231934, "learning_rate": 9.943906170321264e-07, "loss": 0.6007, "step": 114150 }, { "epoch": 19.40506544280129, "grad_norm": 24.317115783691406, "learning_rate": 9.915575953311803e-07, "loss": 0.6368, "step": 114160 }, { "epoch": 19.40676525582186, "grad_norm": 11.525954246520996, "learning_rate": 9.88724573630234e-07, "loss": 0.5692, "step": 114170 }, { "epoch": 19.40846506884243, "grad_norm": 14.868000984191895, "learning_rate": 9.858915519292878e-07, "loss": 0.5565, "step": 114180 }, { "epoch": 19.410164881862997, "grad_norm": 20.43423843383789, "learning_rate": 9.830585302283415e-07, "loss": 0.5648, "step": 114190 }, { "epoch": 19.411864694883562, "grad_norm": 17.37761688232422, "learning_rate": 9.802255085273953e-07, "loss": 0.5646, "step": 114200 }, { "epoch": 19.41356450790413, "grad_norm": 12.097757339477539, "learning_rate": 9.773924868264492e-07, "loss": 0.5814, "step": 114210 }, { "epoch": 19.4152643209247, "grad_norm": 12.367025375366211, "learning_rate": 9.74559465125503e-07, "loss": 0.5901, "step": 114220 }, { "epoch": 19.416964133945267, "grad_norm": 31.392467498779297, "learning_rate": 9.717264434245567e-07, "loss": 0.8583, "step": 114230 }, { "epoch": 19.418663946965832, "grad_norm": 13.281045913696289, "learning_rate": 9.688934217236104e-07, "loss": 0.5985, "step": 114240 }, { "epoch": 19.4203637599864, "grad_norm": 11.842436790466309, "learning_rate": 9.660604000226641e-07, "loss": 0.5945, "step": 114250 }, { "epoch": 19.42206357300697, "grad_norm": 12.96414566040039, "learning_rate": 9.63227378321718e-07, "loss": 0.6824, "step": 114260 }, { "epoch": 19.423763386027538, "grad_norm": 15.044861793518066, "learning_rate": 9.603943566207716e-07, "loss": 0.7393, "step": 114270 }, { "epoch": 19.425463199048103, "grad_norm": 11.322240829467773, "learning_rate": 9.575613349198256e-07, "loss": 0.6819, "step": 114280 }, { "epoch": 19.42716301206867, "grad_norm": 12.142621994018555, "learning_rate": 9.547283132188793e-07, "loss": 0.6484, "step": 114290 }, { "epoch": 19.42886282508924, "grad_norm": 14.52855110168457, "learning_rate": 9.518952915179331e-07, "loss": 0.7854, "step": 114300 }, { "epoch": 19.43056263810981, "grad_norm": 14.32501220703125, "learning_rate": 9.490622698169868e-07, "loss": 0.718, "step": 114310 }, { "epoch": 19.432262451130377, "grad_norm": 14.131340026855469, "learning_rate": 9.462292481160405e-07, "loss": 0.5732, "step": 114320 }, { "epoch": 19.433962264150942, "grad_norm": 15.446099281311035, "learning_rate": 9.433962264150943e-07, "loss": 0.853, "step": 114330 }, { "epoch": 19.43566207717151, "grad_norm": 11.211483001708984, "learning_rate": 9.405632047141482e-07, "loss": 0.6295, "step": 114340 }, { "epoch": 19.43736189019208, "grad_norm": 15.174564361572266, "learning_rate": 9.37730183013202e-07, "loss": 0.6619, "step": 114350 }, { "epoch": 19.439061703212648, "grad_norm": 13.956599235534668, "learning_rate": 9.348971613122557e-07, "loss": 0.7451, "step": 114360 }, { "epoch": 19.440761516233213, "grad_norm": 12.636488914489746, "learning_rate": 9.320641396113095e-07, "loss": 0.6993, "step": 114370 }, { "epoch": 19.44246132925378, "grad_norm": 10.380867958068848, "learning_rate": 9.292311179103632e-07, "loss": 0.6443, "step": 114380 }, { "epoch": 19.44416114227435, "grad_norm": 10.644994735717773, "learning_rate": 9.263980962094169e-07, "loss": 0.6342, "step": 114390 }, { "epoch": 19.44586095529492, "grad_norm": 10.608765602111816, "learning_rate": 9.235650745084708e-07, "loss": 0.5899, "step": 114400 }, { "epoch": 19.447560768315487, "grad_norm": 51.46427917480469, "learning_rate": 9.207320528075245e-07, "loss": 0.5877, "step": 114410 }, { "epoch": 19.449260581336052, "grad_norm": 11.780074119567871, "learning_rate": 9.178990311065784e-07, "loss": 0.5819, "step": 114420 }, { "epoch": 19.45096039435662, "grad_norm": 12.388472557067871, "learning_rate": 9.15066009405632e-07, "loss": 0.7795, "step": 114430 }, { "epoch": 19.45266020737719, "grad_norm": 21.643077850341797, "learning_rate": 9.122329877046858e-07, "loss": 0.5398, "step": 114440 }, { "epoch": 19.454360020397758, "grad_norm": 13.341803550720215, "learning_rate": 9.093999660037396e-07, "loss": 0.6273, "step": 114450 }, { "epoch": 19.456059833418323, "grad_norm": 6.532260894775391, "learning_rate": 9.065669443027934e-07, "loss": 0.615, "step": 114460 }, { "epoch": 19.45775964643889, "grad_norm": 11.272624969482422, "learning_rate": 9.037339226018472e-07, "loss": 0.5504, "step": 114470 }, { "epoch": 19.45945945945946, "grad_norm": 10.652008056640625, "learning_rate": 9.009009009009009e-07, "loss": 0.5033, "step": 114480 }, { "epoch": 19.461159272480028, "grad_norm": 12.578790664672852, "learning_rate": 8.980678791999547e-07, "loss": 0.695, "step": 114490 }, { "epoch": 19.462859085500597, "grad_norm": 12.108599662780762, "learning_rate": 8.952348574990084e-07, "loss": 0.6549, "step": 114500 }, { "epoch": 19.46455889852116, "grad_norm": 18.785892486572266, "learning_rate": 8.924018357980623e-07, "loss": 0.5709, "step": 114510 }, { "epoch": 19.46625871154173, "grad_norm": 11.470734596252441, "learning_rate": 8.89568814097116e-07, "loss": 0.6692, "step": 114520 }, { "epoch": 19.4679585245623, "grad_norm": 11.240107536315918, "learning_rate": 8.867357923961698e-07, "loss": 0.6949, "step": 114530 }, { "epoch": 19.469658337582867, "grad_norm": 11.623626708984375, "learning_rate": 8.839027706952235e-07, "loss": 0.6441, "step": 114540 }, { "epoch": 19.471358150603432, "grad_norm": 13.92934513092041, "learning_rate": 8.810697489942773e-07, "loss": 0.5749, "step": 114550 }, { "epoch": 19.473057963624, "grad_norm": 20.96584701538086, "learning_rate": 8.78236727293331e-07, "loss": 0.6257, "step": 114560 }, { "epoch": 19.47475777664457, "grad_norm": 14.33578109741211, "learning_rate": 8.754037055923849e-07, "loss": 0.6501, "step": 114570 }, { "epoch": 19.476457589665138, "grad_norm": 13.508953094482422, "learning_rate": 8.725706838914386e-07, "loss": 0.6364, "step": 114580 }, { "epoch": 19.478157402685703, "grad_norm": 16.62534523010254, "learning_rate": 8.697376621904924e-07, "loss": 0.5337, "step": 114590 }, { "epoch": 19.47985721570627, "grad_norm": 7.757818222045898, "learning_rate": 8.669046404895462e-07, "loss": 0.5545, "step": 114600 }, { "epoch": 19.48155702872684, "grad_norm": 12.200860023498535, "learning_rate": 8.640716187885999e-07, "loss": 0.6198, "step": 114610 }, { "epoch": 19.48325684174741, "grad_norm": 14.892401695251465, "learning_rate": 8.612385970876538e-07, "loss": 0.63, "step": 114620 }, { "epoch": 19.484956654767977, "grad_norm": 12.916767120361328, "learning_rate": 8.584055753867075e-07, "loss": 0.5877, "step": 114630 }, { "epoch": 19.486656467788542, "grad_norm": 10.634124755859375, "learning_rate": 8.555725536857612e-07, "loss": 0.8212, "step": 114640 }, { "epoch": 19.48835628080911, "grad_norm": 12.541566848754883, "learning_rate": 8.52739531984815e-07, "loss": 0.591, "step": 114650 }, { "epoch": 19.49005609382968, "grad_norm": 13.323342323303223, "learning_rate": 8.499065102838688e-07, "loss": 0.6052, "step": 114660 }, { "epoch": 19.491755906850248, "grad_norm": 14.399510383605957, "learning_rate": 8.470734885829225e-07, "loss": 0.7137, "step": 114670 }, { "epoch": 19.493455719870813, "grad_norm": 12.182391166687012, "learning_rate": 8.442404668819764e-07, "loss": 0.5454, "step": 114680 }, { "epoch": 19.49515553289138, "grad_norm": 10.795498847961426, "learning_rate": 8.414074451810301e-07, "loss": 0.518, "step": 114690 }, { "epoch": 19.49685534591195, "grad_norm": 12.085488319396973, "learning_rate": 8.385744234800838e-07, "loss": 0.537, "step": 114700 }, { "epoch": 19.49855515893252, "grad_norm": 9.0739164352417, "learning_rate": 8.357414017791376e-07, "loss": 0.5668, "step": 114710 }, { "epoch": 19.500254971953087, "grad_norm": 12.074892044067383, "learning_rate": 8.329083800781914e-07, "loss": 0.892, "step": 114720 }, { "epoch": 19.501954784973652, "grad_norm": 16.07805633544922, "learning_rate": 8.300753583772453e-07, "loss": 0.5836, "step": 114730 }, { "epoch": 19.50365459799422, "grad_norm": 14.790863037109375, "learning_rate": 8.27242336676299e-07, "loss": 0.6718, "step": 114740 }, { "epoch": 19.50535441101479, "grad_norm": 13.671958923339844, "learning_rate": 8.244093149753527e-07, "loss": 0.6123, "step": 114750 }, { "epoch": 19.507054224035357, "grad_norm": 11.957573890686035, "learning_rate": 8.215762932744065e-07, "loss": 0.7783, "step": 114760 }, { "epoch": 19.508754037055922, "grad_norm": 11.314095497131348, "learning_rate": 8.187432715734603e-07, "loss": 0.5409, "step": 114770 }, { "epoch": 19.51045385007649, "grad_norm": 16.233911514282227, "learning_rate": 8.15910249872514e-07, "loss": 0.5269, "step": 114780 }, { "epoch": 19.51215366309706, "grad_norm": 14.039759635925293, "learning_rate": 8.130772281715679e-07, "loss": 0.5792, "step": 114790 }, { "epoch": 19.513853476117628, "grad_norm": 11.650052070617676, "learning_rate": 8.102442064706216e-07, "loss": 0.6367, "step": 114800 }, { "epoch": 19.515553289138193, "grad_norm": 14.182018280029297, "learning_rate": 8.074111847696753e-07, "loss": 0.7932, "step": 114810 }, { "epoch": 19.51725310215876, "grad_norm": 10.277432441711426, "learning_rate": 8.045781630687291e-07, "loss": 0.629, "step": 114820 }, { "epoch": 19.51895291517933, "grad_norm": 12.004395484924316, "learning_rate": 8.017451413677829e-07, "loss": 0.6306, "step": 114830 }, { "epoch": 19.5206527281999, "grad_norm": 13.623759269714355, "learning_rate": 7.989121196668366e-07, "loss": 0.5486, "step": 114840 }, { "epoch": 19.522352541220467, "grad_norm": 14.716872215270996, "learning_rate": 7.960790979658905e-07, "loss": 0.4766, "step": 114850 }, { "epoch": 19.524052354241032, "grad_norm": 16.40275764465332, "learning_rate": 7.932460762649442e-07, "loss": 0.5793, "step": 114860 }, { "epoch": 19.5257521672616, "grad_norm": 36.087013244628906, "learning_rate": 7.90413054563998e-07, "loss": 0.5969, "step": 114870 }, { "epoch": 19.52745198028217, "grad_norm": 34.4140739440918, "learning_rate": 7.875800328630518e-07, "loss": 0.6374, "step": 114880 }, { "epoch": 19.529151793302738, "grad_norm": 9.63185977935791, "learning_rate": 7.847470111621055e-07, "loss": 0.4608, "step": 114890 }, { "epoch": 19.530851606323303, "grad_norm": 14.418022155761719, "learning_rate": 7.819139894611594e-07, "loss": 0.6303, "step": 114900 }, { "epoch": 19.53255141934387, "grad_norm": 8.460796356201172, "learning_rate": 7.79080967760213e-07, "loss": 0.6866, "step": 114910 }, { "epoch": 19.53425123236444, "grad_norm": 11.014175415039062, "learning_rate": 7.762479460592668e-07, "loss": 0.7248, "step": 114920 }, { "epoch": 19.53595104538501, "grad_norm": 16.304712295532227, "learning_rate": 7.734149243583206e-07, "loss": 0.4861, "step": 114930 }, { "epoch": 19.537650858405577, "grad_norm": 16.01433563232422, "learning_rate": 7.705819026573744e-07, "loss": 0.5856, "step": 114940 }, { "epoch": 19.539350671426142, "grad_norm": 14.775360107421875, "learning_rate": 7.677488809564281e-07, "loss": 0.4553, "step": 114950 }, { "epoch": 19.54105048444671, "grad_norm": 15.20848560333252, "learning_rate": 7.64915859255482e-07, "loss": 0.6381, "step": 114960 }, { "epoch": 19.54275029746728, "grad_norm": 18.49176788330078, "learning_rate": 7.620828375545356e-07, "loss": 0.6486, "step": 114970 }, { "epoch": 19.544450110487848, "grad_norm": 11.159788131713867, "learning_rate": 7.592498158535894e-07, "loss": 0.69, "step": 114980 }, { "epoch": 19.546149923508413, "grad_norm": 11.846086502075195, "learning_rate": 7.564167941526432e-07, "loss": 0.6465, "step": 114990 }, { "epoch": 19.54784973652898, "grad_norm": 13.201220512390137, "learning_rate": 7.53583772451697e-07, "loss": 0.5153, "step": 115000 }, { "epoch": 19.54954954954955, "grad_norm": 15.564470291137695, "learning_rate": 7.507507507507509e-07, "loss": 0.674, "step": 115010 }, { "epoch": 19.55124936257012, "grad_norm": 12.172850608825684, "learning_rate": 7.479177290498045e-07, "loss": 0.5488, "step": 115020 }, { "epoch": 19.552949175590683, "grad_norm": 25.38190460205078, "learning_rate": 7.450847073488583e-07, "loss": 0.6991, "step": 115030 }, { "epoch": 19.554648988611252, "grad_norm": 10.753886222839355, "learning_rate": 7.422516856479121e-07, "loss": 0.7486, "step": 115040 }, { "epoch": 19.55634880163182, "grad_norm": 12.508926391601562, "learning_rate": 7.394186639469659e-07, "loss": 0.7384, "step": 115050 }, { "epoch": 19.55804861465239, "grad_norm": 10.68426513671875, "learning_rate": 7.365856422460196e-07, "loss": 0.6558, "step": 115060 }, { "epoch": 19.559748427672957, "grad_norm": 10.382109642028809, "learning_rate": 7.337526205450734e-07, "loss": 0.7026, "step": 115070 }, { "epoch": 19.561448240693522, "grad_norm": 15.908834457397461, "learning_rate": 7.309195988441271e-07, "loss": 0.6567, "step": 115080 }, { "epoch": 19.56314805371409, "grad_norm": 13.532742500305176, "learning_rate": 7.280865771431809e-07, "loss": 0.7514, "step": 115090 }, { "epoch": 19.56484786673466, "grad_norm": 14.134461402893066, "learning_rate": 7.252535554422347e-07, "loss": 0.7651, "step": 115100 }, { "epoch": 19.566547679755228, "grad_norm": 14.883820533752441, "learning_rate": 7.224205337412885e-07, "loss": 0.6847, "step": 115110 }, { "epoch": 19.568247492775793, "grad_norm": 14.752935409545898, "learning_rate": 7.195875120403422e-07, "loss": 0.6879, "step": 115120 }, { "epoch": 19.56994730579636, "grad_norm": 12.841023445129395, "learning_rate": 7.16754490339396e-07, "loss": 0.6576, "step": 115130 }, { "epoch": 19.57164711881693, "grad_norm": 12.891286849975586, "learning_rate": 7.139214686384498e-07, "loss": 0.692, "step": 115140 }, { "epoch": 19.5733469318375, "grad_norm": 13.55389404296875, "learning_rate": 7.110884469375036e-07, "loss": 0.6983, "step": 115150 }, { "epoch": 19.575046744858067, "grad_norm": 15.866182327270508, "learning_rate": 7.082554252365574e-07, "loss": 0.6894, "step": 115160 }, { "epoch": 19.576746557878632, "grad_norm": 15.788119316101074, "learning_rate": 7.054224035356111e-07, "loss": 0.7794, "step": 115170 }, { "epoch": 19.5784463708992, "grad_norm": 15.984511375427246, "learning_rate": 7.025893818346649e-07, "loss": 0.6793, "step": 115180 }, { "epoch": 19.58014618391977, "grad_norm": 11.575261116027832, "learning_rate": 6.997563601337186e-07, "loss": 0.7647, "step": 115190 }, { "epoch": 19.581845996940338, "grad_norm": 12.307255744934082, "learning_rate": 6.969233384327724e-07, "loss": 0.5048, "step": 115200 }, { "epoch": 19.583545809960903, "grad_norm": 10.079413414001465, "learning_rate": 6.940903167318262e-07, "loss": 0.5701, "step": 115210 }, { "epoch": 19.58524562298147, "grad_norm": 12.010627746582031, "learning_rate": 6.9125729503088e-07, "loss": 0.6179, "step": 115220 }, { "epoch": 19.58694543600204, "grad_norm": 11.988457679748535, "learning_rate": 6.884242733299336e-07, "loss": 0.6894, "step": 115230 }, { "epoch": 19.58864524902261, "grad_norm": 10.419591903686523, "learning_rate": 6.855912516289875e-07, "loss": 0.5491, "step": 115240 }, { "epoch": 19.590345062043177, "grad_norm": 20.760189056396484, "learning_rate": 6.827582299280412e-07, "loss": 0.6328, "step": 115250 }, { "epoch": 19.592044875063742, "grad_norm": 11.37855052947998, "learning_rate": 6.79925208227095e-07, "loss": 0.6155, "step": 115260 }, { "epoch": 19.59374468808431, "grad_norm": 12.84091854095459, "learning_rate": 6.770921865261489e-07, "loss": 0.7382, "step": 115270 }, { "epoch": 19.59544450110488, "grad_norm": 11.651398658752441, "learning_rate": 6.742591648252026e-07, "loss": 0.7274, "step": 115280 }, { "epoch": 19.597144314125448, "grad_norm": 14.394978523254395, "learning_rate": 6.714261431242564e-07, "loss": 0.5005, "step": 115290 }, { "epoch": 19.598844127146013, "grad_norm": 11.05339527130127, "learning_rate": 6.685931214233101e-07, "loss": 0.7516, "step": 115300 }, { "epoch": 19.60054394016658, "grad_norm": 13.72655200958252, "learning_rate": 6.657600997223639e-07, "loss": 0.6825, "step": 115310 }, { "epoch": 19.60224375318715, "grad_norm": 15.696529388427734, "learning_rate": 6.629270780214177e-07, "loss": 0.7125, "step": 115320 }, { "epoch": 19.603943566207718, "grad_norm": 11.367429733276367, "learning_rate": 6.600940563204715e-07, "loss": 0.623, "step": 115330 }, { "epoch": 19.605643379228283, "grad_norm": 17.076562881469727, "learning_rate": 6.572610346195251e-07, "loss": 0.8565, "step": 115340 }, { "epoch": 19.60734319224885, "grad_norm": 15.355883598327637, "learning_rate": 6.54428012918579e-07, "loss": 0.7798, "step": 115350 }, { "epoch": 19.60904300526942, "grad_norm": 15.196087837219238, "learning_rate": 6.515949912176327e-07, "loss": 0.5658, "step": 115360 }, { "epoch": 19.61074281828999, "grad_norm": 14.879523277282715, "learning_rate": 6.487619695166865e-07, "loss": 0.8405, "step": 115370 }, { "epoch": 19.612442631310557, "grad_norm": 14.33401107788086, "learning_rate": 6.459289478157403e-07, "loss": 0.6878, "step": 115380 }, { "epoch": 19.614142444331122, "grad_norm": 11.925209999084473, "learning_rate": 6.43095926114794e-07, "loss": 0.6113, "step": 115390 }, { "epoch": 19.61584225735169, "grad_norm": 16.400949478149414, "learning_rate": 6.402629044138478e-07, "loss": 0.7434, "step": 115400 }, { "epoch": 19.61754207037226, "grad_norm": 10.696770668029785, "learning_rate": 6.374298827129016e-07, "loss": 0.5303, "step": 115410 }, { "epoch": 19.619241883392828, "grad_norm": 15.541396141052246, "learning_rate": 6.345968610119554e-07, "loss": 0.7574, "step": 115420 }, { "epoch": 19.620941696413393, "grad_norm": 16.737979888916016, "learning_rate": 6.317638393110092e-07, "loss": 0.5046, "step": 115430 }, { "epoch": 19.62264150943396, "grad_norm": 7.679785251617432, "learning_rate": 6.28930817610063e-07, "loss": 0.6679, "step": 115440 }, { "epoch": 19.62434132245453, "grad_norm": 13.62015438079834, "learning_rate": 6.260977959091166e-07, "loss": 0.7067, "step": 115450 }, { "epoch": 19.6260411354751, "grad_norm": 13.546465873718262, "learning_rate": 6.232647742081705e-07, "loss": 0.5977, "step": 115460 }, { "epoch": 19.627740948495667, "grad_norm": 14.48752212524414, "learning_rate": 6.204317525072242e-07, "loss": 0.6211, "step": 115470 }, { "epoch": 19.629440761516232, "grad_norm": 9.010101318359375, "learning_rate": 6.17598730806278e-07, "loss": 0.5971, "step": 115480 }, { "epoch": 19.6311405745368, "grad_norm": 11.892492294311523, "learning_rate": 6.147657091053318e-07, "loss": 0.7111, "step": 115490 }, { "epoch": 19.63284038755737, "grad_norm": 10.446573257446289, "learning_rate": 6.119326874043855e-07, "loss": 0.6635, "step": 115500 }, { "epoch": 19.634540200577938, "grad_norm": 16.849529266357422, "learning_rate": 6.090996657034392e-07, "loss": 0.5742, "step": 115510 }, { "epoch": 19.636240013598503, "grad_norm": 11.557904243469238, "learning_rate": 6.062666440024931e-07, "loss": 0.6456, "step": 115520 }, { "epoch": 19.63793982661907, "grad_norm": 21.94384765625, "learning_rate": 6.034336223015469e-07, "loss": 0.6205, "step": 115530 }, { "epoch": 19.63963963963964, "grad_norm": 11.419543266296387, "learning_rate": 6.006006006006006e-07, "loss": 0.7865, "step": 115540 }, { "epoch": 19.64133945266021, "grad_norm": 10.309202194213867, "learning_rate": 5.977675788996544e-07, "loss": 0.7625, "step": 115550 }, { "epoch": 19.643039265680777, "grad_norm": 13.595233917236328, "learning_rate": 5.949345571987081e-07, "loss": 0.5979, "step": 115560 }, { "epoch": 19.644739078701342, "grad_norm": 8.243183135986328, "learning_rate": 5.92101535497762e-07, "loss": 0.6517, "step": 115570 }, { "epoch": 19.64643889172191, "grad_norm": 11.377893447875977, "learning_rate": 5.892685137968157e-07, "loss": 0.605, "step": 115580 }, { "epoch": 19.64813870474248, "grad_norm": 19.433298110961914, "learning_rate": 5.864354920958695e-07, "loss": 0.4779, "step": 115590 }, { "epoch": 19.649838517763047, "grad_norm": 33.84046173095703, "learning_rate": 5.836024703949233e-07, "loss": 0.6802, "step": 115600 }, { "epoch": 19.651538330783612, "grad_norm": 11.772449493408203, "learning_rate": 5.80769448693977e-07, "loss": 0.5487, "step": 115610 }, { "epoch": 19.65323814380418, "grad_norm": 10.087921142578125, "learning_rate": 5.779364269930307e-07, "loss": 0.5903, "step": 115620 }, { "epoch": 19.65493795682475, "grad_norm": 11.276001930236816, "learning_rate": 5.751034052920846e-07, "loss": 0.6532, "step": 115630 }, { "epoch": 19.656637769845318, "grad_norm": 13.069170951843262, "learning_rate": 5.722703835911383e-07, "loss": 0.6701, "step": 115640 }, { "epoch": 19.658337582865883, "grad_norm": 15.107105255126953, "learning_rate": 5.694373618901921e-07, "loss": 0.5976, "step": 115650 }, { "epoch": 19.66003739588645, "grad_norm": 12.821709632873535, "learning_rate": 5.666043401892459e-07, "loss": 0.5587, "step": 115660 }, { "epoch": 19.66173720890702, "grad_norm": 15.413789749145508, "learning_rate": 5.637713184882996e-07, "loss": 0.4667, "step": 115670 }, { "epoch": 19.66343702192759, "grad_norm": 15.057353973388672, "learning_rate": 5.609382967873534e-07, "loss": 0.6206, "step": 115680 }, { "epoch": 19.665136834948157, "grad_norm": 10.678116798400879, "learning_rate": 5.581052750864072e-07, "loss": 0.6164, "step": 115690 }, { "epoch": 19.666836647968722, "grad_norm": 15.427254676818848, "learning_rate": 5.55272253385461e-07, "loss": 0.6396, "step": 115700 }, { "epoch": 19.66853646098929, "grad_norm": 12.484142303466797, "learning_rate": 5.524392316845146e-07, "loss": 0.6597, "step": 115710 }, { "epoch": 19.67023627400986, "grad_norm": 11.332648277282715, "learning_rate": 5.496062099835685e-07, "loss": 0.63, "step": 115720 }, { "epoch": 19.671936087030428, "grad_norm": 17.3951416015625, "learning_rate": 5.467731882826222e-07, "loss": 0.7152, "step": 115730 }, { "epoch": 19.673635900050993, "grad_norm": 14.729843139648438, "learning_rate": 5.439401665816761e-07, "loss": 0.5727, "step": 115740 }, { "epoch": 19.67533571307156, "grad_norm": 8.269103050231934, "learning_rate": 5.411071448807298e-07, "loss": 0.6861, "step": 115750 }, { "epoch": 19.67703552609213, "grad_norm": 13.814142227172852, "learning_rate": 5.382741231797836e-07, "loss": 0.6494, "step": 115760 }, { "epoch": 19.6787353391127, "grad_norm": 11.750955581665039, "learning_rate": 5.354411014788373e-07, "loss": 0.6149, "step": 115770 }, { "epoch": 19.680435152133267, "grad_norm": 13.070866584777832, "learning_rate": 5.326080797778911e-07, "loss": 0.5509, "step": 115780 }, { "epoch": 19.682134965153832, "grad_norm": 29.13620948791504, "learning_rate": 5.297750580769448e-07, "loss": 0.5474, "step": 115790 }, { "epoch": 19.6838347781744, "grad_norm": 14.393733978271484, "learning_rate": 5.269420363759987e-07, "loss": 0.6012, "step": 115800 }, { "epoch": 19.68553459119497, "grad_norm": 16.35603141784668, "learning_rate": 5.241090146750525e-07, "loss": 0.7989, "step": 115810 }, { "epoch": 19.687234404215538, "grad_norm": 10.953839302062988, "learning_rate": 5.212759929741061e-07, "loss": 0.7171, "step": 115820 }, { "epoch": 19.688934217236103, "grad_norm": 9.3568115234375, "learning_rate": 5.1844297127316e-07, "loss": 0.7117, "step": 115830 }, { "epoch": 19.69063403025667, "grad_norm": 10.47490406036377, "learning_rate": 5.156099495722137e-07, "loss": 0.5211, "step": 115840 }, { "epoch": 19.69233384327724, "grad_norm": 13.566125869750977, "learning_rate": 5.127769278712676e-07, "loss": 0.6408, "step": 115850 }, { "epoch": 19.694033656297808, "grad_norm": 11.610088348388672, "learning_rate": 5.099439061703213e-07, "loss": 0.7509, "step": 115860 }, { "epoch": 19.695733469318377, "grad_norm": 10.268345832824707, "learning_rate": 5.07110884469375e-07, "loss": 0.5588, "step": 115870 }, { "epoch": 19.69743328233894, "grad_norm": 14.443018913269043, "learning_rate": 5.042778627684288e-07, "loss": 0.5929, "step": 115880 }, { "epoch": 19.69913309535951, "grad_norm": 12.005629539489746, "learning_rate": 5.014448410674826e-07, "loss": 0.5943, "step": 115890 }, { "epoch": 19.70083290838008, "grad_norm": 27.30284881591797, "learning_rate": 4.986118193665363e-07, "loss": 0.6641, "step": 115900 }, { "epoch": 19.702532721400647, "grad_norm": 13.315796852111816, "learning_rate": 4.957787976655902e-07, "loss": 0.5225, "step": 115910 }, { "epoch": 19.704232534421212, "grad_norm": 12.707597732543945, "learning_rate": 4.929457759646439e-07, "loss": 0.4926, "step": 115920 }, { "epoch": 19.70593234744178, "grad_norm": 13.08067512512207, "learning_rate": 4.901127542636976e-07, "loss": 0.7706, "step": 115930 }, { "epoch": 19.70763216046235, "grad_norm": 14.922141075134277, "learning_rate": 4.872797325627515e-07, "loss": 0.6193, "step": 115940 }, { "epoch": 19.709331973482918, "grad_norm": 11.814521789550781, "learning_rate": 4.844467108618052e-07, "loss": 0.6673, "step": 115950 }, { "epoch": 19.711031786503483, "grad_norm": 14.605836868286133, "learning_rate": 4.81613689160859e-07, "loss": 0.6915, "step": 115960 }, { "epoch": 19.71273159952405, "grad_norm": 11.697607040405273, "learning_rate": 4.787806674599128e-07, "loss": 0.5141, "step": 115970 }, { "epoch": 19.71443141254462, "grad_norm": 10.552337646484375, "learning_rate": 4.7594764575896657e-07, "loss": 0.4447, "step": 115980 }, { "epoch": 19.71613122556519, "grad_norm": 11.898014068603516, "learning_rate": 4.7311462405802025e-07, "loss": 0.5849, "step": 115990 }, { "epoch": 19.717831038585757, "grad_norm": 14.800291061401367, "learning_rate": 4.702816023570741e-07, "loss": 0.5621, "step": 116000 }, { "epoch": 19.719530851606322, "grad_norm": 14.755671501159668, "learning_rate": 4.6744858065612787e-07, "loss": 0.6058, "step": 116010 }, { "epoch": 19.72123066462689, "grad_norm": 13.594990730285645, "learning_rate": 4.646155589551816e-07, "loss": 0.7426, "step": 116020 }, { "epoch": 19.72293047764746, "grad_norm": 15.358997344970703, "learning_rate": 4.617825372542354e-07, "loss": 0.7531, "step": 116030 }, { "epoch": 19.724630290668028, "grad_norm": 12.342812538146973, "learning_rate": 4.589495155532892e-07, "loss": 0.5935, "step": 116040 }, { "epoch": 19.726330103688593, "grad_norm": 11.149002075195312, "learning_rate": 4.561164938523429e-07, "loss": 0.5164, "step": 116050 }, { "epoch": 19.72802991670916, "grad_norm": 10.87216854095459, "learning_rate": 4.532834721513967e-07, "loss": 0.6645, "step": 116060 }, { "epoch": 19.72972972972973, "grad_norm": 12.751341819763184, "learning_rate": 4.5045045045045043e-07, "loss": 0.6209, "step": 116070 }, { "epoch": 19.7314295427503, "grad_norm": 11.878283500671387, "learning_rate": 4.476174287495042e-07, "loss": 0.5098, "step": 116080 }, { "epoch": 19.733129355770867, "grad_norm": 10.76452350616455, "learning_rate": 4.44784407048558e-07, "loss": 0.5521, "step": 116090 }, { "epoch": 19.734829168791432, "grad_norm": 14.968302726745605, "learning_rate": 4.4195138534761174e-07, "loss": 0.6368, "step": 116100 }, { "epoch": 19.736528981812, "grad_norm": 10.493738174438477, "learning_rate": 4.391183636466655e-07, "loss": 0.5733, "step": 116110 }, { "epoch": 19.73822879483257, "grad_norm": 8.106233596801758, "learning_rate": 4.362853419457193e-07, "loss": 0.5987, "step": 116120 }, { "epoch": 19.739928607853138, "grad_norm": 18.193496704101562, "learning_rate": 4.334523202447731e-07, "loss": 0.5847, "step": 116130 }, { "epoch": 19.741628420873703, "grad_norm": 8.725821495056152, "learning_rate": 4.306192985438269e-07, "loss": 0.5103, "step": 116140 }, { "epoch": 19.74332823389427, "grad_norm": 15.183518409729004, "learning_rate": 4.277862768428806e-07, "loss": 0.5771, "step": 116150 }, { "epoch": 19.74502804691484, "grad_norm": 15.040628433227539, "learning_rate": 4.249532551419344e-07, "loss": 0.5317, "step": 116160 }, { "epoch": 19.746727859935408, "grad_norm": 15.488381385803223, "learning_rate": 4.221202334409882e-07, "loss": 0.4876, "step": 116170 }, { "epoch": 19.748427672955973, "grad_norm": 19.56632423400879, "learning_rate": 4.192872117400419e-07, "loss": 0.7055, "step": 116180 }, { "epoch": 19.75012748597654, "grad_norm": 14.053078651428223, "learning_rate": 4.164541900390957e-07, "loss": 0.6119, "step": 116190 }, { "epoch": 19.75182729899711, "grad_norm": 13.074031829833984, "learning_rate": 4.136211683381495e-07, "loss": 0.6424, "step": 116200 }, { "epoch": 19.75352711201768, "grad_norm": 11.238129615783691, "learning_rate": 4.1078814663720323e-07, "loss": 0.5807, "step": 116210 }, { "epoch": 19.755226925038247, "grad_norm": 15.081506729125977, "learning_rate": 4.07955124936257e-07, "loss": 0.565, "step": 116220 }, { "epoch": 19.756926738058812, "grad_norm": 12.793720245361328, "learning_rate": 4.051221032353108e-07, "loss": 0.6369, "step": 116230 }, { "epoch": 19.75862655107938, "grad_norm": 11.455244064331055, "learning_rate": 4.0228908153436454e-07, "loss": 0.7066, "step": 116240 }, { "epoch": 19.76032636409995, "grad_norm": 16.94839859008789, "learning_rate": 3.994560598334183e-07, "loss": 0.6255, "step": 116250 }, { "epoch": 19.762026177120518, "grad_norm": 9.755508422851562, "learning_rate": 3.966230381324721e-07, "loss": 0.6081, "step": 116260 }, { "epoch": 19.763725990141083, "grad_norm": 16.023603439331055, "learning_rate": 3.937900164315259e-07, "loss": 0.6958, "step": 116270 }, { "epoch": 19.76542580316165, "grad_norm": 10.953290939331055, "learning_rate": 3.909569947305797e-07, "loss": 0.7021, "step": 116280 }, { "epoch": 19.76712561618222, "grad_norm": 11.143585205078125, "learning_rate": 3.881239730296334e-07, "loss": 0.6346, "step": 116290 }, { "epoch": 19.76882542920279, "grad_norm": 20.796411514282227, "learning_rate": 3.852909513286872e-07, "loss": 0.5235, "step": 116300 }, { "epoch": 19.770525242223357, "grad_norm": 10.859975814819336, "learning_rate": 3.82457929627741e-07, "loss": 0.6978, "step": 116310 }, { "epoch": 19.772225055243922, "grad_norm": 13.834797859191895, "learning_rate": 3.796249079267947e-07, "loss": 0.532, "step": 116320 }, { "epoch": 19.77392486826449, "grad_norm": 13.473756790161133, "learning_rate": 3.767918862258485e-07, "loss": 0.4835, "step": 116330 }, { "epoch": 19.77562468128506, "grad_norm": 12.485865592956543, "learning_rate": 3.7395886452490224e-07, "loss": 0.5174, "step": 116340 }, { "epoch": 19.777324494305628, "grad_norm": 20.13069725036621, "learning_rate": 3.7112584282395603e-07, "loss": 0.6145, "step": 116350 }, { "epoch": 19.779024307326193, "grad_norm": 12.743202209472656, "learning_rate": 3.682928211230098e-07, "loss": 0.533, "step": 116360 }, { "epoch": 19.78072412034676, "grad_norm": 11.719700813293457, "learning_rate": 3.6545979942206355e-07, "loss": 0.5872, "step": 116370 }, { "epoch": 19.78242393336733, "grad_norm": 8.793728828430176, "learning_rate": 3.6262677772111734e-07, "loss": 0.5649, "step": 116380 }, { "epoch": 19.7841237463879, "grad_norm": 8.137155532836914, "learning_rate": 3.597937560201711e-07, "loss": 0.6585, "step": 116390 }, { "epoch": 19.785823559408463, "grad_norm": 11.725790023803711, "learning_rate": 3.569607343192249e-07, "loss": 0.6258, "step": 116400 }, { "epoch": 19.787523372429032, "grad_norm": 11.262950897216797, "learning_rate": 3.541277126182787e-07, "loss": 0.5283, "step": 116410 }, { "epoch": 19.7892231854496, "grad_norm": 11.203479766845703, "learning_rate": 3.5129469091733243e-07, "loss": 0.7035, "step": 116420 }, { "epoch": 19.79092299847017, "grad_norm": 14.302757263183594, "learning_rate": 3.484616692163862e-07, "loss": 0.6048, "step": 116430 }, { "epoch": 19.792622811490737, "grad_norm": 13.516587257385254, "learning_rate": 3.4562864751544e-07, "loss": 0.6803, "step": 116440 }, { "epoch": 19.794322624511302, "grad_norm": 17.10633659362793, "learning_rate": 3.4279562581449374e-07, "loss": 0.6824, "step": 116450 }, { "epoch": 19.79602243753187, "grad_norm": 14.313952445983887, "learning_rate": 3.399626041135475e-07, "loss": 0.5994, "step": 116460 }, { "epoch": 19.79772225055244, "grad_norm": 11.524977684020996, "learning_rate": 3.371295824126013e-07, "loss": 0.6062, "step": 116470 }, { "epoch": 19.799422063573008, "grad_norm": 15.086190223693848, "learning_rate": 3.3429656071165504e-07, "loss": 0.675, "step": 116480 }, { "epoch": 19.801121876593573, "grad_norm": 11.614287376403809, "learning_rate": 3.3146353901070883e-07, "loss": 0.6556, "step": 116490 }, { "epoch": 19.80282168961414, "grad_norm": 13.181023597717285, "learning_rate": 3.2863051730976256e-07, "loss": 0.6448, "step": 116500 }, { "epoch": 19.80452150263471, "grad_norm": 12.145759582519531, "learning_rate": 3.2579749560881635e-07, "loss": 0.7482, "step": 116510 }, { "epoch": 19.80622131565528, "grad_norm": 15.006765365600586, "learning_rate": 3.2296447390787014e-07, "loss": 0.7174, "step": 116520 }, { "epoch": 19.807921128675847, "grad_norm": 9.70891284942627, "learning_rate": 3.201314522069239e-07, "loss": 0.7433, "step": 116530 }, { "epoch": 19.809620941696412, "grad_norm": 10.961007118225098, "learning_rate": 3.172984305059777e-07, "loss": 0.7151, "step": 116540 }, { "epoch": 19.81132075471698, "grad_norm": 17.390117645263672, "learning_rate": 3.144654088050315e-07, "loss": 0.5596, "step": 116550 }, { "epoch": 19.81302056773755, "grad_norm": 27.8223876953125, "learning_rate": 3.1163238710408523e-07, "loss": 0.7336, "step": 116560 }, { "epoch": 19.814720380758118, "grad_norm": 9.245030403137207, "learning_rate": 3.08799365403139e-07, "loss": 0.6021, "step": 116570 }, { "epoch": 19.816420193778683, "grad_norm": 9.429797172546387, "learning_rate": 3.0596634370219275e-07, "loss": 0.595, "step": 116580 }, { "epoch": 19.81812000679925, "grad_norm": 14.649182319641113, "learning_rate": 3.0313332200124654e-07, "loss": 0.5736, "step": 116590 }, { "epoch": 19.81981981981982, "grad_norm": 12.541906356811523, "learning_rate": 3.003003003003003e-07, "loss": 0.5377, "step": 116600 }, { "epoch": 19.82151963284039, "grad_norm": 12.291504859924316, "learning_rate": 2.9746727859935406e-07, "loss": 0.8761, "step": 116610 }, { "epoch": 19.823219445860957, "grad_norm": 13.229620933532715, "learning_rate": 2.9463425689840784e-07, "loss": 0.5664, "step": 116620 }, { "epoch": 19.824919258881522, "grad_norm": 14.796963691711426, "learning_rate": 2.9180123519746163e-07, "loss": 0.7827, "step": 116630 }, { "epoch": 19.82661907190209, "grad_norm": 12.367390632629395, "learning_rate": 2.8896821349651536e-07, "loss": 0.7706, "step": 116640 }, { "epoch": 19.82831888492266, "grad_norm": 23.139291763305664, "learning_rate": 2.8613519179556915e-07, "loss": 0.5995, "step": 116650 }, { "epoch": 19.830018697943228, "grad_norm": 12.510575294494629, "learning_rate": 2.8330217009462294e-07, "loss": 0.466, "step": 116660 }, { "epoch": 19.831718510963793, "grad_norm": 9.670228958129883, "learning_rate": 2.804691483936767e-07, "loss": 0.5371, "step": 116670 }, { "epoch": 19.83341832398436, "grad_norm": 11.272738456726074, "learning_rate": 2.776361266927305e-07, "loss": 0.5477, "step": 116680 }, { "epoch": 19.83511813700493, "grad_norm": 12.800834655761719, "learning_rate": 2.7480310499178424e-07, "loss": 0.6221, "step": 116690 }, { "epoch": 19.836817950025498, "grad_norm": 14.592101097106934, "learning_rate": 2.7197008329083803e-07, "loss": 0.6536, "step": 116700 }, { "epoch": 19.838517763046063, "grad_norm": 14.969858169555664, "learning_rate": 2.691370615898918e-07, "loss": 0.6489, "step": 116710 }, { "epoch": 19.84021757606663, "grad_norm": 12.245625495910645, "learning_rate": 2.6630403988894555e-07, "loss": 0.7101, "step": 116720 }, { "epoch": 19.8419173890872, "grad_norm": 13.81213665008545, "learning_rate": 2.6347101818799934e-07, "loss": 0.5968, "step": 116730 }, { "epoch": 19.84361720210777, "grad_norm": 18.42179298400879, "learning_rate": 2.6063799648705307e-07, "loss": 0.646, "step": 116740 }, { "epoch": 19.845317015128337, "grad_norm": 10.98261547088623, "learning_rate": 2.5780497478610686e-07, "loss": 0.5787, "step": 116750 }, { "epoch": 19.847016828148902, "grad_norm": 17.353384017944336, "learning_rate": 2.5497195308516064e-07, "loss": 0.6906, "step": 116760 }, { "epoch": 19.84871664116947, "grad_norm": 12.427804946899414, "learning_rate": 2.521389313842144e-07, "loss": 0.7777, "step": 116770 }, { "epoch": 19.85041645419004, "grad_norm": 12.515690803527832, "learning_rate": 2.4930590968326816e-07, "loss": 0.7402, "step": 116780 }, { "epoch": 19.852116267210608, "grad_norm": 10.724082946777344, "learning_rate": 2.4647288798232195e-07, "loss": 0.7627, "step": 116790 }, { "epoch": 19.853816080231173, "grad_norm": 11.300004005432129, "learning_rate": 2.4363986628137574e-07, "loss": 0.6251, "step": 116800 }, { "epoch": 19.85551589325174, "grad_norm": 11.234041213989258, "learning_rate": 2.408068445804295e-07, "loss": 0.7654, "step": 116810 }, { "epoch": 19.85721570627231, "grad_norm": 10.667282104492188, "learning_rate": 2.3797382287948328e-07, "loss": 0.588, "step": 116820 }, { "epoch": 19.85891551929288, "grad_norm": 17.500656127929688, "learning_rate": 2.3514080117853704e-07, "loss": 0.7576, "step": 116830 }, { "epoch": 19.860615332313447, "grad_norm": 14.032679557800293, "learning_rate": 2.323077794775908e-07, "loss": 0.7409, "step": 116840 }, { "epoch": 19.862315145334012, "grad_norm": 15.014411926269531, "learning_rate": 2.294747577766446e-07, "loss": 0.5499, "step": 116850 }, { "epoch": 19.86401495835458, "grad_norm": 11.95638656616211, "learning_rate": 2.2664173607569835e-07, "loss": 0.5616, "step": 116860 }, { "epoch": 19.86571477137515, "grad_norm": 9.7181396484375, "learning_rate": 2.238087143747521e-07, "loss": 0.6118, "step": 116870 }, { "epoch": 19.867414584395718, "grad_norm": 12.884450912475586, "learning_rate": 2.2097569267380587e-07, "loss": 0.5995, "step": 116880 }, { "epoch": 19.869114397416283, "grad_norm": 13.640349388122559, "learning_rate": 2.1814267097285966e-07, "loss": 0.7407, "step": 116890 }, { "epoch": 19.87081421043685, "grad_norm": 12.741305351257324, "learning_rate": 2.1530964927191344e-07, "loss": 0.6313, "step": 116900 }, { "epoch": 19.87251402345742, "grad_norm": 11.036375999450684, "learning_rate": 2.124766275709672e-07, "loss": 0.7375, "step": 116910 }, { "epoch": 19.87421383647799, "grad_norm": 15.744893074035645, "learning_rate": 2.0964360587002096e-07, "loss": 0.7171, "step": 116920 }, { "epoch": 19.875913649498557, "grad_norm": 13.044231414794922, "learning_rate": 2.0681058416907475e-07, "loss": 0.558, "step": 116930 }, { "epoch": 19.877613462519122, "grad_norm": 12.132162094116211, "learning_rate": 2.039775624681285e-07, "loss": 0.7246, "step": 116940 }, { "epoch": 19.87931327553969, "grad_norm": 12.727339744567871, "learning_rate": 2.0114454076718227e-07, "loss": 0.6703, "step": 116950 }, { "epoch": 19.88101308856026, "grad_norm": 13.07107925415039, "learning_rate": 1.9831151906623606e-07, "loss": 0.5841, "step": 116960 }, { "epoch": 19.882712901580827, "grad_norm": 13.574267387390137, "learning_rate": 1.9547849736528984e-07, "loss": 0.6803, "step": 116970 }, { "epoch": 19.884412714601392, "grad_norm": 12.260666847229004, "learning_rate": 1.926454756643436e-07, "loss": 0.7379, "step": 116980 }, { "epoch": 19.88611252762196, "grad_norm": 11.704825401306152, "learning_rate": 1.8981245396339736e-07, "loss": 0.6257, "step": 116990 }, { "epoch": 19.88781234064253, "grad_norm": 15.27352523803711, "learning_rate": 1.8697943226245112e-07, "loss": 0.6283, "step": 117000 }, { "epoch": 19.889512153663098, "grad_norm": 14.72170639038086, "learning_rate": 1.841464105615049e-07, "loss": 0.7455, "step": 117010 }, { "epoch": 19.891211966683663, "grad_norm": 12.75976276397705, "learning_rate": 1.8131338886055867e-07, "loss": 0.7236, "step": 117020 }, { "epoch": 19.89291177970423, "grad_norm": 12.269734382629395, "learning_rate": 1.7848036715961246e-07, "loss": 0.5819, "step": 117030 }, { "epoch": 19.8946115927248, "grad_norm": 10.693853378295898, "learning_rate": 1.7564734545866622e-07, "loss": 0.7727, "step": 117040 }, { "epoch": 19.89631140574537, "grad_norm": 13.495809555053711, "learning_rate": 1.7281432375772e-07, "loss": 0.6435, "step": 117050 }, { "epoch": 19.898011218765937, "grad_norm": 11.358928680419922, "learning_rate": 1.6998130205677376e-07, "loss": 0.6328, "step": 117060 }, { "epoch": 19.899711031786502, "grad_norm": 19.25068473815918, "learning_rate": 1.6714828035582752e-07, "loss": 0.5648, "step": 117070 }, { "epoch": 19.90141084480707, "grad_norm": 9.682066917419434, "learning_rate": 1.6431525865488128e-07, "loss": 0.5922, "step": 117080 }, { "epoch": 19.90311065782764, "grad_norm": 11.820561408996582, "learning_rate": 1.6148223695393507e-07, "loss": 0.6248, "step": 117090 }, { "epoch": 19.904810470848208, "grad_norm": 10.966790199279785, "learning_rate": 1.5864921525298886e-07, "loss": 0.6493, "step": 117100 }, { "epoch": 19.906510283868773, "grad_norm": 24.548063278198242, "learning_rate": 1.5581619355204262e-07, "loss": 0.692, "step": 117110 }, { "epoch": 19.90821009688934, "grad_norm": 18.712038040161133, "learning_rate": 1.5298317185109638e-07, "loss": 0.6291, "step": 117120 }, { "epoch": 19.90990990990991, "grad_norm": 18.268293380737305, "learning_rate": 1.5015015015015016e-07, "loss": 0.5931, "step": 117130 }, { "epoch": 19.91160972293048, "grad_norm": 13.932934761047363, "learning_rate": 1.4731712844920392e-07, "loss": 0.8462, "step": 117140 }, { "epoch": 19.913309535951047, "grad_norm": 17.098581314086914, "learning_rate": 1.4448410674825768e-07, "loss": 0.6147, "step": 117150 }, { "epoch": 19.915009348971612, "grad_norm": 14.770748138427734, "learning_rate": 1.4165108504731147e-07, "loss": 0.6216, "step": 117160 }, { "epoch": 19.91670916199218, "grad_norm": 17.048105239868164, "learning_rate": 1.3881806334636525e-07, "loss": 0.6051, "step": 117170 }, { "epoch": 19.91840897501275, "grad_norm": 13.962204933166504, "learning_rate": 1.3598504164541901e-07, "loss": 0.6209, "step": 117180 }, { "epoch": 19.920108788033318, "grad_norm": 31.75360679626465, "learning_rate": 1.3315201994447277e-07, "loss": 0.534, "step": 117190 }, { "epoch": 19.921808601053883, "grad_norm": 9.942351341247559, "learning_rate": 1.3031899824352653e-07, "loss": 0.5817, "step": 117200 }, { "epoch": 19.92350841407445, "grad_norm": 17.341238021850586, "learning_rate": 1.2748597654258032e-07, "loss": 0.774, "step": 117210 }, { "epoch": 19.92520822709502, "grad_norm": 8.292819023132324, "learning_rate": 1.2465295484163408e-07, "loss": 0.5779, "step": 117220 }, { "epoch": 19.92690804011559, "grad_norm": 10.769972801208496, "learning_rate": 1.2181993314068787e-07, "loss": 0.664, "step": 117230 }, { "epoch": 19.928607853136157, "grad_norm": 25.371047973632812, "learning_rate": 1.1898691143974164e-07, "loss": 0.7154, "step": 117240 }, { "epoch": 19.930307666156722, "grad_norm": 12.015654563903809, "learning_rate": 1.161538897387954e-07, "loss": 0.7294, "step": 117250 }, { "epoch": 19.93200747917729, "grad_norm": 8.183801651000977, "learning_rate": 1.1332086803784917e-07, "loss": 0.6848, "step": 117260 }, { "epoch": 19.93370729219786, "grad_norm": 18.97544288635254, "learning_rate": 1.1048784633690293e-07, "loss": 0.7747, "step": 117270 }, { "epoch": 19.935407105218427, "grad_norm": 11.099807739257812, "learning_rate": 1.0765482463595672e-07, "loss": 0.4856, "step": 117280 }, { "epoch": 19.937106918238992, "grad_norm": 11.574996948242188, "learning_rate": 1.0482180293501048e-07, "loss": 0.5772, "step": 117290 }, { "epoch": 19.93880673125956, "grad_norm": 13.712858200073242, "learning_rate": 1.0198878123406425e-07, "loss": 0.5364, "step": 117300 }, { "epoch": 19.94050654428013, "grad_norm": 23.5440673828125, "learning_rate": 9.915575953311803e-08, "loss": 0.6096, "step": 117310 }, { "epoch": 19.942206357300698, "grad_norm": 13.281144142150879, "learning_rate": 9.63227378321718e-08, "loss": 0.6366, "step": 117320 }, { "epoch": 19.943906170321263, "grad_norm": 13.103743553161621, "learning_rate": 9.348971613122556e-08, "loss": 0.7736, "step": 117330 }, { "epoch": 19.94560598334183, "grad_norm": 13.77402400970459, "learning_rate": 9.065669443027933e-08, "loss": 0.5868, "step": 117340 }, { "epoch": 19.9473057963624, "grad_norm": 14.04423713684082, "learning_rate": 8.782367272933311e-08, "loss": 0.7991, "step": 117350 }, { "epoch": 19.94900560938297, "grad_norm": 10.073491096496582, "learning_rate": 8.499065102838688e-08, "loss": 0.7033, "step": 117360 }, { "epoch": 19.950705422403537, "grad_norm": 11.85392951965332, "learning_rate": 8.215762932744064e-08, "loss": 0.7194, "step": 117370 }, { "epoch": 19.952405235424102, "grad_norm": 7.739538669586182, "learning_rate": 7.932460762649443e-08, "loss": 0.7299, "step": 117380 }, { "epoch": 19.95410504844467, "grad_norm": 10.752803802490234, "learning_rate": 7.649158592554819e-08, "loss": 0.5705, "step": 117390 }, { "epoch": 19.95580486146524, "grad_norm": 17.661691665649414, "learning_rate": 7.365856422460196e-08, "loss": 0.8343, "step": 117400 }, { "epoch": 19.957504674485808, "grad_norm": 14.970563888549805, "learning_rate": 7.082554252365573e-08, "loss": 0.4533, "step": 117410 }, { "epoch": 19.959204487506373, "grad_norm": 11.032374382019043, "learning_rate": 6.799252082270951e-08, "loss": 0.5566, "step": 117420 }, { "epoch": 19.96090430052694, "grad_norm": 13.578103065490723, "learning_rate": 6.515949912176327e-08, "loss": 0.8537, "step": 117430 }, { "epoch": 19.96260411354751, "grad_norm": 11.8951997756958, "learning_rate": 6.232647742081704e-08, "loss": 0.5703, "step": 117440 }, { "epoch": 19.96430392656808, "grad_norm": 13.350385665893555, "learning_rate": 5.949345571987082e-08, "loss": 0.6213, "step": 117450 }, { "epoch": 19.966003739588647, "grad_norm": 12.875322341918945, "learning_rate": 5.666043401892459e-08, "loss": 0.5869, "step": 117460 }, { "epoch": 19.967703552609212, "grad_norm": 12.465872764587402, "learning_rate": 5.382741231797836e-08, "loss": 0.4955, "step": 117470 }, { "epoch": 19.96940336562978, "grad_norm": 14.412276268005371, "learning_rate": 5.099439061703213e-08, "loss": 0.6624, "step": 117480 }, { "epoch": 19.97110317865035, "grad_norm": 12.223875045776367, "learning_rate": 4.81613689160859e-08, "loss": 0.613, "step": 117490 }, { "epoch": 19.972802991670918, "grad_norm": 13.86021900177002, "learning_rate": 4.532834721513967e-08, "loss": 0.5961, "step": 117500 }, { "epoch": 19.974502804691483, "grad_norm": 15.112532615661621, "learning_rate": 4.249532551419344e-08, "loss": 0.5115, "step": 117510 }, { "epoch": 19.97620261771205, "grad_norm": 13.632711410522461, "learning_rate": 3.9662303813247214e-08, "loss": 0.5896, "step": 117520 }, { "epoch": 19.97790243073262, "grad_norm": 14.382295608520508, "learning_rate": 3.682928211230098e-08, "loss": 0.4585, "step": 117530 }, { "epoch": 19.979602243753188, "grad_norm": 10.066935539245605, "learning_rate": 3.3996260411354754e-08, "loss": 0.7776, "step": 117540 }, { "epoch": 19.981302056773757, "grad_norm": 11.458023071289062, "learning_rate": 3.116323871040852e-08, "loss": 0.6502, "step": 117550 }, { "epoch": 19.98300186979432, "grad_norm": 11.034173011779785, "learning_rate": 2.8330217009462294e-08, "loss": 0.5677, "step": 117560 }, { "epoch": 19.98470168281489, "grad_norm": 12.86415958404541, "learning_rate": 2.5497195308516064e-08, "loss": 0.6118, "step": 117570 }, { "epoch": 19.98640149583546, "grad_norm": 10.399276733398438, "learning_rate": 2.2664173607569834e-08, "loss": 0.5567, "step": 117580 }, { "epoch": 19.988101308856027, "grad_norm": 11.247305870056152, "learning_rate": 1.9831151906623607e-08, "loss": 0.7186, "step": 117590 }, { "epoch": 19.989801121876592, "grad_norm": 11.301703453063965, "learning_rate": 1.6998130205677377e-08, "loss": 0.7018, "step": 117600 }, { "epoch": 19.99150093489716, "grad_norm": 14.88711929321289, "learning_rate": 1.4165108504731147e-08, "loss": 0.6015, "step": 117610 }, { "epoch": 19.99320074791773, "grad_norm": 12.800294876098633, "learning_rate": 1.1332086803784917e-08, "loss": 0.5882, "step": 117620 }, { "epoch": 19.994900560938298, "grad_norm": 11.100491523742676, "learning_rate": 8.499065102838688e-09, "loss": 0.474, "step": 117630 }, { "epoch": 19.996600373958863, "grad_norm": 12.040958404541016, "learning_rate": 5.666043401892458e-09, "loss": 0.6462, "step": 117640 }, { "epoch": 19.99830018697943, "grad_norm": 12.47590160369873, "learning_rate": 2.833021700946229e-09, "loss": 0.6876, "step": 117650 }, { "epoch": 20.0, "grad_norm": 29.25600814819336, "learning_rate": 0.0, "loss": 0.5415, "step": 117660 }, { "epoch": 20.0, "eval_cer": 1.0, "eval_loss": 2.5755105018615723, "eval_runtime": 1975.3886, "eval_samples_per_second": 0.238, "eval_steps_per_second": 0.238, "step": 117660 } ], "logging_steps": 10, "max_steps": 117660, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.873978296028889e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }