{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5476701200310347, "eval_steps": 500, "global_step": 24000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00022819588334626443, "grad_norm": 38.25, "learning_rate": 1.5209125475285173e-07, "loss": 1.723, "step": 10 }, { "epoch": 0.00045639176669252886, "grad_norm": 26.125, "learning_rate": 3.0418250950570346e-07, "loss": 1.7514, "step": 20 }, { "epoch": 0.0006845876500387933, "grad_norm": 45.75, "learning_rate": 4.562737642585552e-07, "loss": 1.8018, "step": 30 }, { "epoch": 0.0009127835333850577, "grad_norm": 73.0, "learning_rate": 6.083650190114069e-07, "loss": 1.6866, "step": 40 }, { "epoch": 0.0011409794167313223, "grad_norm": 33.0, "learning_rate": 7.604562737642586e-07, "loss": 1.7271, "step": 50 }, { "epoch": 0.0013691753000775865, "grad_norm": 55.5, "learning_rate": 9.125475285171104e-07, "loss": 1.7047, "step": 60 }, { "epoch": 0.001597371183423851, "grad_norm": 20.5, "learning_rate": 1.064638783269962e-06, "loss": 1.6827, "step": 70 }, { "epoch": 0.0018255670667701154, "grad_norm": 33.0, "learning_rate": 1.2167300380228138e-06, "loss": 1.7259, "step": 80 }, { "epoch": 0.00205376295011638, "grad_norm": 32.75, "learning_rate": 1.3688212927756656e-06, "loss": 1.7137, "step": 90 }, { "epoch": 0.0022819588334626446, "grad_norm": 4.65625, "learning_rate": 1.5209125475285172e-06, "loss": 1.6195, "step": 100 }, { "epoch": 0.002510154716808909, "grad_norm": 4.4375, "learning_rate": 1.6730038022813688e-06, "loss": 1.668, "step": 110 }, { "epoch": 0.002738350600155173, "grad_norm": 3.953125, "learning_rate": 1.8250950570342208e-06, "loss": 1.6049, "step": 120 }, { "epoch": 0.0029665464835014377, "grad_norm": 3.84375, "learning_rate": 1.9771863117870722e-06, "loss": 1.6246, "step": 130 }, { "epoch": 0.003194742366847702, "grad_norm": 4.0625, "learning_rate": 2.129277566539924e-06, "loss": 1.5938, "step": 140 }, { "epoch": 0.0034229382501939666, "grad_norm": 3.75, "learning_rate": 2.281368821292776e-06, "loss": 1.572, "step": 150 }, { "epoch": 0.003651134133540231, "grad_norm": 3.890625, "learning_rate": 2.4334600760456276e-06, "loss": 1.5447, "step": 160 }, { "epoch": 0.0038793300168864955, "grad_norm": 3.671875, "learning_rate": 2.585551330798479e-06, "loss": 1.5597, "step": 170 }, { "epoch": 0.00410752590023276, "grad_norm": 3.484375, "learning_rate": 2.7376425855513313e-06, "loss": 1.5742, "step": 180 }, { "epoch": 0.004335721783579024, "grad_norm": 3.4375, "learning_rate": 2.8897338403041826e-06, "loss": 1.5406, "step": 190 }, { "epoch": 0.004563917666925289, "grad_norm": 5.0, "learning_rate": 3.0418250950570345e-06, "loss": 1.5553, "step": 200 }, { "epoch": 0.004792113550271553, "grad_norm": 3.625, "learning_rate": 3.1939163498098863e-06, "loss": 1.577, "step": 210 }, { "epoch": 0.005020309433617818, "grad_norm": 3.984375, "learning_rate": 3.3460076045627376e-06, "loss": 1.5313, "step": 220 }, { "epoch": 0.005248505316964082, "grad_norm": 4.4375, "learning_rate": 3.4980988593155894e-06, "loss": 1.5889, "step": 230 }, { "epoch": 0.005476701200310346, "grad_norm": 3.78125, "learning_rate": 3.6501901140684417e-06, "loss": 1.5283, "step": 240 }, { "epoch": 0.005704897083656611, "grad_norm": 3.9375, "learning_rate": 3.8022813688212926e-06, "loss": 1.5268, "step": 250 }, { "epoch": 0.005933092967002875, "grad_norm": 3.6875, "learning_rate": 3.9543726235741444e-06, "loss": 1.5106, "step": 260 }, { "epoch": 0.00616128885034914, "grad_norm": 3.96875, "learning_rate": 4.106463878326996e-06, "loss": 1.5173, "step": 270 }, { "epoch": 0.006389484733695404, "grad_norm": 4.3125, "learning_rate": 4.258555133079848e-06, "loss": 1.5211, "step": 280 }, { "epoch": 0.0066176806170416685, "grad_norm": 4.0, "learning_rate": 4.4106463878327e-06, "loss": 1.5162, "step": 290 }, { "epoch": 0.006845876500387933, "grad_norm": 3.796875, "learning_rate": 4.562737642585552e-06, "loss": 1.516, "step": 300 }, { "epoch": 0.007074072383734197, "grad_norm": 4.15625, "learning_rate": 4.7148288973384035e-06, "loss": 1.5391, "step": 310 }, { "epoch": 0.007302268267080462, "grad_norm": 3.34375, "learning_rate": 4.866920152091255e-06, "loss": 1.5261, "step": 320 }, { "epoch": 0.007530464150426726, "grad_norm": 4.0625, "learning_rate": 5.019011406844106e-06, "loss": 1.5017, "step": 330 }, { "epoch": 0.007758660033772991, "grad_norm": 3.59375, "learning_rate": 5.171102661596958e-06, "loss": 1.5055, "step": 340 }, { "epoch": 0.007986855917119255, "grad_norm": 3.6875, "learning_rate": 5.323193916349811e-06, "loss": 1.5308, "step": 350 }, { "epoch": 0.00821505180046552, "grad_norm": 3.890625, "learning_rate": 5.4752851711026625e-06, "loss": 1.5245, "step": 360 }, { "epoch": 0.008443247683811784, "grad_norm": 3.875, "learning_rate": 5.627376425855514e-06, "loss": 1.458, "step": 370 }, { "epoch": 0.008671443567158049, "grad_norm": 3.921875, "learning_rate": 5.779467680608365e-06, "loss": 1.4163, "step": 380 }, { "epoch": 0.008899639450504314, "grad_norm": 3.734375, "learning_rate": 5.931558935361217e-06, "loss": 1.4617, "step": 390 }, { "epoch": 0.009127835333850578, "grad_norm": 3.578125, "learning_rate": 6.083650190114069e-06, "loss": 1.478, "step": 400 }, { "epoch": 0.009356031217196841, "grad_norm": 4.25, "learning_rate": 6.235741444866921e-06, "loss": 1.4673, "step": 410 }, { "epoch": 0.009584227100543106, "grad_norm": 3.609375, "learning_rate": 6.3878326996197725e-06, "loss": 1.4664, "step": 420 }, { "epoch": 0.00981242298388937, "grad_norm": 3.90625, "learning_rate": 6.539923954372624e-06, "loss": 1.4617, "step": 430 }, { "epoch": 0.010040618867235635, "grad_norm": 3.953125, "learning_rate": 6.692015209125475e-06, "loss": 1.5364, "step": 440 }, { "epoch": 0.0102688147505819, "grad_norm": 3.890625, "learning_rate": 6.844106463878327e-06, "loss": 1.4874, "step": 450 }, { "epoch": 0.010497010633928164, "grad_norm": 3.59375, "learning_rate": 6.996197718631179e-06, "loss": 1.4275, "step": 460 }, { "epoch": 0.01072520651727443, "grad_norm": 3.703125, "learning_rate": 7.148288973384031e-06, "loss": 1.3942, "step": 470 }, { "epoch": 0.010953402400620692, "grad_norm": 3.859375, "learning_rate": 7.300380228136883e-06, "loss": 1.462, "step": 480 }, { "epoch": 0.011181598283966957, "grad_norm": 3.796875, "learning_rate": 7.452471482889735e-06, "loss": 1.4269, "step": 490 }, { "epoch": 0.011409794167313221, "grad_norm": 3.75, "learning_rate": 7.604562737642585e-06, "loss": 1.4625, "step": 500 }, { "epoch": 0.011637990050659486, "grad_norm": 4.28125, "learning_rate": 7.756653992395437e-06, "loss": 1.4103, "step": 510 }, { "epoch": 0.01186618593400575, "grad_norm": 4.125, "learning_rate": 7.908745247148289e-06, "loss": 1.4796, "step": 520 }, { "epoch": 0.012094381817352015, "grad_norm": 3.796875, "learning_rate": 8.06083650190114e-06, "loss": 1.4429, "step": 530 }, { "epoch": 0.01232257770069828, "grad_norm": 3.8125, "learning_rate": 8.212927756653993e-06, "loss": 1.3771, "step": 540 }, { "epoch": 0.012550773584044543, "grad_norm": 3.84375, "learning_rate": 8.365019011406846e-06, "loss": 1.4054, "step": 550 }, { "epoch": 0.012778969467390808, "grad_norm": 3.59375, "learning_rate": 8.517110266159696e-06, "loss": 1.4619, "step": 560 }, { "epoch": 0.013007165350737072, "grad_norm": 4.0, "learning_rate": 8.669201520912548e-06, "loss": 1.4362, "step": 570 }, { "epoch": 0.013235361234083337, "grad_norm": 3.765625, "learning_rate": 8.8212927756654e-06, "loss": 1.3942, "step": 580 }, { "epoch": 0.013463557117429602, "grad_norm": 3.59375, "learning_rate": 8.973384030418252e-06, "loss": 1.3715, "step": 590 }, { "epoch": 0.013691753000775866, "grad_norm": 3.59375, "learning_rate": 9.125475285171103e-06, "loss": 1.3841, "step": 600 }, { "epoch": 0.013919948884122131, "grad_norm": 3.5625, "learning_rate": 9.277566539923955e-06, "loss": 1.4372, "step": 610 }, { "epoch": 0.014148144767468394, "grad_norm": 3.4375, "learning_rate": 9.429657794676807e-06, "loss": 1.4196, "step": 620 }, { "epoch": 0.014376340650814659, "grad_norm": 3.578125, "learning_rate": 9.581749049429659e-06, "loss": 1.4417, "step": 630 }, { "epoch": 0.014604536534160923, "grad_norm": 3.6875, "learning_rate": 9.73384030418251e-06, "loss": 1.4401, "step": 640 }, { "epoch": 0.014832732417507188, "grad_norm": 3.640625, "learning_rate": 9.885931558935362e-06, "loss": 1.4314, "step": 650 }, { "epoch": 0.015060928300853453, "grad_norm": 3.53125, "learning_rate": 1.0038022813688212e-05, "loss": 1.3939, "step": 660 }, { "epoch": 0.015289124184199717, "grad_norm": 3.53125, "learning_rate": 1.0190114068441066e-05, "loss": 1.3851, "step": 670 }, { "epoch": 0.015517320067545982, "grad_norm": 3.390625, "learning_rate": 1.0342205323193916e-05, "loss": 1.4583, "step": 680 }, { "epoch": 0.015745515950892245, "grad_norm": 3.828125, "learning_rate": 1.049429657794677e-05, "loss": 1.3869, "step": 690 }, { "epoch": 0.01597371183423851, "grad_norm": 4.09375, "learning_rate": 1.0646387832699621e-05, "loss": 1.3684, "step": 700 }, { "epoch": 0.016201907717584774, "grad_norm": 4.21875, "learning_rate": 1.0798479087452472e-05, "loss": 1.3874, "step": 710 }, { "epoch": 0.01643010360093104, "grad_norm": 3.828125, "learning_rate": 1.0950570342205325e-05, "loss": 1.3773, "step": 720 }, { "epoch": 0.016658299484277304, "grad_norm": 3.5, "learning_rate": 1.1102661596958175e-05, "loss": 1.3338, "step": 730 }, { "epoch": 0.01688649536762357, "grad_norm": 3.875, "learning_rate": 1.1254752851711029e-05, "loss": 1.4083, "step": 740 }, { "epoch": 0.017114691250969833, "grad_norm": 3.703125, "learning_rate": 1.1406844106463879e-05, "loss": 1.4171, "step": 750 }, { "epoch": 0.017342887134316098, "grad_norm": 3.765625, "learning_rate": 1.155893536121673e-05, "loss": 1.4492, "step": 760 }, { "epoch": 0.017571083017662362, "grad_norm": 3.6875, "learning_rate": 1.1711026615969582e-05, "loss": 1.3698, "step": 770 }, { "epoch": 0.017799278901008627, "grad_norm": 4.875, "learning_rate": 1.1863117870722434e-05, "loss": 1.4096, "step": 780 }, { "epoch": 0.01802747478435489, "grad_norm": 3.90625, "learning_rate": 1.2015209125475286e-05, "loss": 1.4024, "step": 790 }, { "epoch": 0.018255670667701156, "grad_norm": 3.359375, "learning_rate": 1.2167300380228138e-05, "loss": 1.4165, "step": 800 }, { "epoch": 0.018483866551047418, "grad_norm": 3.40625, "learning_rate": 1.2319391634980991e-05, "loss": 1.3782, "step": 810 }, { "epoch": 0.018712062434393682, "grad_norm": 3.703125, "learning_rate": 1.2471482889733841e-05, "loss": 1.4021, "step": 820 }, { "epoch": 0.018940258317739947, "grad_norm": 3.546875, "learning_rate": 1.2623574144486692e-05, "loss": 1.3975, "step": 830 }, { "epoch": 0.01916845420108621, "grad_norm": 3.265625, "learning_rate": 1.2775665399239545e-05, "loss": 1.3528, "step": 840 }, { "epoch": 0.019396650084432476, "grad_norm": 3.546875, "learning_rate": 1.2927756653992395e-05, "loss": 1.3684, "step": 850 }, { "epoch": 0.01962484596777874, "grad_norm": 3.578125, "learning_rate": 1.3079847908745249e-05, "loss": 1.3472, "step": 860 }, { "epoch": 0.019853041851125006, "grad_norm": 4.0, "learning_rate": 1.32319391634981e-05, "loss": 1.4346, "step": 870 }, { "epoch": 0.02008123773447127, "grad_norm": 3.8125, "learning_rate": 1.338403041825095e-05, "loss": 1.4504, "step": 880 }, { "epoch": 0.020309433617817535, "grad_norm": 3.859375, "learning_rate": 1.3536121673003804e-05, "loss": 1.3516, "step": 890 }, { "epoch": 0.0205376295011638, "grad_norm": 3.640625, "learning_rate": 1.3688212927756654e-05, "loss": 1.37, "step": 900 }, { "epoch": 0.020765825384510064, "grad_norm": 3.46875, "learning_rate": 1.3840304182509508e-05, "loss": 1.4166, "step": 910 }, { "epoch": 0.02099402126785633, "grad_norm": 3.765625, "learning_rate": 1.3992395437262358e-05, "loss": 1.4551, "step": 920 }, { "epoch": 0.021222217151202594, "grad_norm": 3.75, "learning_rate": 1.4144486692015211e-05, "loss": 1.3976, "step": 930 }, { "epoch": 0.02145041303454886, "grad_norm": 3.90625, "learning_rate": 1.4296577946768061e-05, "loss": 1.3726, "step": 940 }, { "epoch": 0.02167860891789512, "grad_norm": 3.4375, "learning_rate": 1.4448669201520913e-05, "loss": 1.412, "step": 950 }, { "epoch": 0.021906804801241384, "grad_norm": 3.375, "learning_rate": 1.4600760456273767e-05, "loss": 1.3502, "step": 960 }, { "epoch": 0.02213500068458765, "grad_norm": 3.4375, "learning_rate": 1.4752851711026617e-05, "loss": 1.4424, "step": 970 }, { "epoch": 0.022363196567933914, "grad_norm": 3.375, "learning_rate": 1.490494296577947e-05, "loss": 1.3747, "step": 980 }, { "epoch": 0.022591392451280178, "grad_norm": 3.46875, "learning_rate": 1.505703422053232e-05, "loss": 1.3558, "step": 990 }, { "epoch": 0.022819588334626443, "grad_norm": 3.203125, "learning_rate": 1.520912547528517e-05, "loss": 1.3847, "step": 1000 }, { "epoch": 0.023047784217972708, "grad_norm": 3.40625, "learning_rate": 1.5361216730038024e-05, "loss": 1.3874, "step": 1010 }, { "epoch": 0.023275980101318972, "grad_norm": 3.890625, "learning_rate": 1.5513307984790874e-05, "loss": 1.3789, "step": 1020 }, { "epoch": 0.023504175984665237, "grad_norm": 3.40625, "learning_rate": 1.5665399239543728e-05, "loss": 1.3891, "step": 1030 }, { "epoch": 0.0237323718680115, "grad_norm": 3.75, "learning_rate": 1.5817490494296578e-05, "loss": 1.3984, "step": 1040 }, { "epoch": 0.023960567751357766, "grad_norm": 3.8125, "learning_rate": 1.596958174904943e-05, "loss": 1.3308, "step": 1050 }, { "epoch": 0.02418876363470403, "grad_norm": 3.625, "learning_rate": 1.612167300380228e-05, "loss": 1.3975, "step": 1060 }, { "epoch": 0.024416959518050296, "grad_norm": 3.46875, "learning_rate": 1.6273764258555135e-05, "loss": 1.427, "step": 1070 }, { "epoch": 0.02464515540139656, "grad_norm": 4.46875, "learning_rate": 1.6425855513307985e-05, "loss": 1.3605, "step": 1080 }, { "epoch": 0.024873351284742825, "grad_norm": 3.1875, "learning_rate": 1.657794676806084e-05, "loss": 1.4108, "step": 1090 }, { "epoch": 0.025101547168089086, "grad_norm": 3.671875, "learning_rate": 1.6730038022813692e-05, "loss": 1.3061, "step": 1100 }, { "epoch": 0.02532974305143535, "grad_norm": 3.625, "learning_rate": 1.6882129277566542e-05, "loss": 1.3819, "step": 1110 }, { "epoch": 0.025557938934781615, "grad_norm": 3.484375, "learning_rate": 1.7034220532319392e-05, "loss": 1.3756, "step": 1120 }, { "epoch": 0.02578613481812788, "grad_norm": 3.59375, "learning_rate": 1.7186311787072246e-05, "loss": 1.3613, "step": 1130 }, { "epoch": 0.026014330701474145, "grad_norm": 3.46875, "learning_rate": 1.7338403041825096e-05, "loss": 1.3646, "step": 1140 }, { "epoch": 0.02624252658482041, "grad_norm": 3.609375, "learning_rate": 1.749049429657795e-05, "loss": 1.3491, "step": 1150 }, { "epoch": 0.026470722468166674, "grad_norm": 3.28125, "learning_rate": 1.76425855513308e-05, "loss": 1.3289, "step": 1160 }, { "epoch": 0.02669891835151294, "grad_norm": 3.703125, "learning_rate": 1.779467680608365e-05, "loss": 1.4166, "step": 1170 }, { "epoch": 0.026927114234859204, "grad_norm": 3.28125, "learning_rate": 1.7946768060836503e-05, "loss": 1.3441, "step": 1180 }, { "epoch": 0.027155310118205468, "grad_norm": 3.46875, "learning_rate": 1.8098859315589353e-05, "loss": 1.3552, "step": 1190 }, { "epoch": 0.027383506001551733, "grad_norm": 3.28125, "learning_rate": 1.8250950570342207e-05, "loss": 1.4087, "step": 1200 }, { "epoch": 0.027611701884897998, "grad_norm": 3.609375, "learning_rate": 1.8403041825095057e-05, "loss": 1.358, "step": 1210 }, { "epoch": 0.027839897768244262, "grad_norm": 3.34375, "learning_rate": 1.855513307984791e-05, "loss": 1.4267, "step": 1220 }, { "epoch": 0.028068093651590527, "grad_norm": 3.359375, "learning_rate": 1.870722433460076e-05, "loss": 1.4027, "step": 1230 }, { "epoch": 0.028296289534936788, "grad_norm": 3.484375, "learning_rate": 1.8859315589353614e-05, "loss": 1.4192, "step": 1240 }, { "epoch": 0.028524485418283053, "grad_norm": 3.40625, "learning_rate": 1.9011406844106467e-05, "loss": 1.348, "step": 1250 }, { "epoch": 0.028752681301629317, "grad_norm": 3.34375, "learning_rate": 1.9163498098859318e-05, "loss": 1.3163, "step": 1260 }, { "epoch": 0.028980877184975582, "grad_norm": 3.390625, "learning_rate": 1.931558935361217e-05, "loss": 1.3834, "step": 1270 }, { "epoch": 0.029209073068321847, "grad_norm": 3.390625, "learning_rate": 1.946768060836502e-05, "loss": 1.4104, "step": 1280 }, { "epoch": 0.02943726895166811, "grad_norm": 3.390625, "learning_rate": 1.961977186311787e-05, "loss": 1.3416, "step": 1290 }, { "epoch": 0.029665464835014376, "grad_norm": 3.296875, "learning_rate": 1.9771863117870725e-05, "loss": 1.3557, "step": 1300 }, { "epoch": 0.02989366071836064, "grad_norm": 3.5, "learning_rate": 1.9923954372623575e-05, "loss": 1.4189, "step": 1310 }, { "epoch": 0.030121856601706905, "grad_norm": 3.21875, "learning_rate": 1.9997647446302963e-05, "loss": 1.384, "step": 1320 }, { "epoch": 0.03035005248505317, "grad_norm": 3.265625, "learning_rate": 1.9992942338908888e-05, "loss": 1.3405, "step": 1330 }, { "epoch": 0.030578248368399435, "grad_norm": 3.375, "learning_rate": 1.998823723151481e-05, "loss": 1.345, "step": 1340 }, { "epoch": 0.0308064442517457, "grad_norm": 3.15625, "learning_rate": 1.9983532124120735e-05, "loss": 1.3545, "step": 1350 }, { "epoch": 0.031034640135091964, "grad_norm": 3.484375, "learning_rate": 1.9978827016726657e-05, "loss": 1.4016, "step": 1360 }, { "epoch": 0.031262836018438225, "grad_norm": 3.0625, "learning_rate": 1.9974121909332583e-05, "loss": 1.3485, "step": 1370 }, { "epoch": 0.03149103190178449, "grad_norm": 3.203125, "learning_rate": 1.9969416801938505e-05, "loss": 1.3508, "step": 1380 }, { "epoch": 0.031719227785130755, "grad_norm": 3.296875, "learning_rate": 1.996471169454443e-05, "loss": 1.3914, "step": 1390 }, { "epoch": 0.03194742366847702, "grad_norm": 3.5, "learning_rate": 1.9960006587150352e-05, "loss": 1.3361, "step": 1400 }, { "epoch": 0.032175619551823284, "grad_norm": 3.078125, "learning_rate": 1.9955301479756278e-05, "loss": 1.3417, "step": 1410 }, { "epoch": 0.03240381543516955, "grad_norm": 3.0625, "learning_rate": 1.99505963723622e-05, "loss": 1.345, "step": 1420 }, { "epoch": 0.03263201131851581, "grad_norm": 2.984375, "learning_rate": 1.9945891264968125e-05, "loss": 1.3148, "step": 1430 }, { "epoch": 0.03286020720186208, "grad_norm": 3.09375, "learning_rate": 1.9941186157574047e-05, "loss": 1.3093, "step": 1440 }, { "epoch": 0.03308840308520834, "grad_norm": 3.0625, "learning_rate": 1.9936481050179973e-05, "loss": 1.343, "step": 1450 }, { "epoch": 0.03331659896855461, "grad_norm": 3.15625, "learning_rate": 1.9931775942785895e-05, "loss": 1.3747, "step": 1460 }, { "epoch": 0.03354479485190087, "grad_norm": 3.09375, "learning_rate": 1.992707083539182e-05, "loss": 1.3472, "step": 1470 }, { "epoch": 0.03377299073524714, "grad_norm": 3.296875, "learning_rate": 1.9922365727997742e-05, "loss": 1.3915, "step": 1480 }, { "epoch": 0.0340011866185934, "grad_norm": 3.265625, "learning_rate": 1.9917660620603667e-05, "loss": 1.3595, "step": 1490 }, { "epoch": 0.034229382501939666, "grad_norm": 3.484375, "learning_rate": 1.991295551320959e-05, "loss": 1.3305, "step": 1500 }, { "epoch": 0.03445757838528593, "grad_norm": 3.296875, "learning_rate": 1.9908250405815515e-05, "loss": 1.3188, "step": 1510 }, { "epoch": 0.034685774268632195, "grad_norm": 3.375, "learning_rate": 1.9903545298421437e-05, "loss": 1.3128, "step": 1520 }, { "epoch": 0.03491397015197846, "grad_norm": 3.078125, "learning_rate": 1.9898840191027362e-05, "loss": 1.3442, "step": 1530 }, { "epoch": 0.035142166035324725, "grad_norm": 3.109375, "learning_rate": 1.9894135083633284e-05, "loss": 1.3796, "step": 1540 }, { "epoch": 0.03537036191867099, "grad_norm": 3.171875, "learning_rate": 1.988942997623921e-05, "loss": 1.3631, "step": 1550 }, { "epoch": 0.035598557802017254, "grad_norm": 3.28125, "learning_rate": 1.9884724868845132e-05, "loss": 1.3231, "step": 1560 }, { "epoch": 0.03582675368536352, "grad_norm": 3.203125, "learning_rate": 1.9880019761451057e-05, "loss": 1.3384, "step": 1570 }, { "epoch": 0.03605494956870978, "grad_norm": 3.25, "learning_rate": 1.987531465405698e-05, "loss": 1.3778, "step": 1580 }, { "epoch": 0.03628314545205605, "grad_norm": 3.34375, "learning_rate": 1.9870609546662905e-05, "loss": 1.2998, "step": 1590 }, { "epoch": 0.03651134133540231, "grad_norm": 3.234375, "learning_rate": 1.9865904439268827e-05, "loss": 1.3991, "step": 1600 }, { "epoch": 0.03673953721874857, "grad_norm": 3.546875, "learning_rate": 1.9861199331874752e-05, "loss": 1.2882, "step": 1610 }, { "epoch": 0.036967733102094835, "grad_norm": 3.578125, "learning_rate": 1.9856494224480677e-05, "loss": 1.3104, "step": 1620 }, { "epoch": 0.0371959289854411, "grad_norm": 3.21875, "learning_rate": 1.98517891170866e-05, "loss": 1.3528, "step": 1630 }, { "epoch": 0.037424124868787365, "grad_norm": 3.328125, "learning_rate": 1.9847084009692525e-05, "loss": 1.3201, "step": 1640 }, { "epoch": 0.03765232075213363, "grad_norm": 3.15625, "learning_rate": 1.9842378902298447e-05, "loss": 1.3611, "step": 1650 }, { "epoch": 0.037880516635479894, "grad_norm": 3.296875, "learning_rate": 1.9837673794904372e-05, "loss": 1.3765, "step": 1660 }, { "epoch": 0.03810871251882616, "grad_norm": 3.34375, "learning_rate": 1.9832968687510294e-05, "loss": 1.3333, "step": 1670 }, { "epoch": 0.03833690840217242, "grad_norm": 3.609375, "learning_rate": 1.982826358011622e-05, "loss": 1.4382, "step": 1680 }, { "epoch": 0.03856510428551869, "grad_norm": 3.1875, "learning_rate": 1.9823558472722142e-05, "loss": 1.2967, "step": 1690 }, { "epoch": 0.03879330016886495, "grad_norm": 3.109375, "learning_rate": 1.9818853365328067e-05, "loss": 1.3432, "step": 1700 }, { "epoch": 0.03902149605221122, "grad_norm": 2.921875, "learning_rate": 1.981414825793399e-05, "loss": 1.3983, "step": 1710 }, { "epoch": 0.03924969193555748, "grad_norm": 3.28125, "learning_rate": 1.9809443150539915e-05, "loss": 1.3312, "step": 1720 }, { "epoch": 0.03947788781890375, "grad_norm": 3.140625, "learning_rate": 1.9804738043145837e-05, "loss": 1.3478, "step": 1730 }, { "epoch": 0.03970608370225001, "grad_norm": 3.03125, "learning_rate": 1.9800032935751762e-05, "loss": 1.3477, "step": 1740 }, { "epoch": 0.039934279585596276, "grad_norm": 3.125, "learning_rate": 1.9795327828357684e-05, "loss": 1.3439, "step": 1750 }, { "epoch": 0.04016247546894254, "grad_norm": 3.046875, "learning_rate": 1.979062272096361e-05, "loss": 1.3563, "step": 1760 }, { "epoch": 0.040390671352288805, "grad_norm": 3.375, "learning_rate": 1.978591761356953e-05, "loss": 1.4, "step": 1770 }, { "epoch": 0.04061886723563507, "grad_norm": 3.25, "learning_rate": 1.9781212506175457e-05, "loss": 1.3364, "step": 1780 }, { "epoch": 0.040847063118981335, "grad_norm": 3.203125, "learning_rate": 1.977650739878138e-05, "loss": 1.3672, "step": 1790 }, { "epoch": 0.0410752590023276, "grad_norm": 3.03125, "learning_rate": 1.9771802291387304e-05, "loss": 1.3825, "step": 1800 }, { "epoch": 0.041303454885673864, "grad_norm": 3.203125, "learning_rate": 1.9767097183993226e-05, "loss": 1.3331, "step": 1810 }, { "epoch": 0.04153165076902013, "grad_norm": 2.921875, "learning_rate": 1.976239207659915e-05, "loss": 1.3049, "step": 1820 }, { "epoch": 0.04175984665236639, "grad_norm": 2.875, "learning_rate": 1.9757686969205074e-05, "loss": 1.2942, "step": 1830 }, { "epoch": 0.04198804253571266, "grad_norm": 3.140625, "learning_rate": 1.9752981861811e-05, "loss": 1.3519, "step": 1840 }, { "epoch": 0.04221623841905892, "grad_norm": 3.28125, "learning_rate": 1.974827675441692e-05, "loss": 1.3123, "step": 1850 }, { "epoch": 0.04244443430240519, "grad_norm": 3.15625, "learning_rate": 1.9743571647022847e-05, "loss": 1.3317, "step": 1860 }, { "epoch": 0.04267263018575145, "grad_norm": 3.296875, "learning_rate": 1.973886653962877e-05, "loss": 1.3286, "step": 1870 }, { "epoch": 0.04290082606909772, "grad_norm": 3.1875, "learning_rate": 1.9734161432234694e-05, "loss": 1.2829, "step": 1880 }, { "epoch": 0.04312902195244398, "grad_norm": 3.234375, "learning_rate": 1.9729456324840616e-05, "loss": 1.3229, "step": 1890 }, { "epoch": 0.04335721783579024, "grad_norm": 3.0, "learning_rate": 1.972475121744654e-05, "loss": 1.3511, "step": 1900 }, { "epoch": 0.043585413719136504, "grad_norm": 3.234375, "learning_rate": 1.9720046110052463e-05, "loss": 1.3444, "step": 1910 }, { "epoch": 0.04381360960248277, "grad_norm": 3.109375, "learning_rate": 1.971534100265839e-05, "loss": 1.3569, "step": 1920 }, { "epoch": 0.04404180548582903, "grad_norm": 3.3125, "learning_rate": 1.971063589526431e-05, "loss": 1.3211, "step": 1930 }, { "epoch": 0.0442700013691753, "grad_norm": 3.46875, "learning_rate": 1.9705930787870236e-05, "loss": 1.3437, "step": 1940 }, { "epoch": 0.04449819725252156, "grad_norm": 2.78125, "learning_rate": 1.9701225680476158e-05, "loss": 1.313, "step": 1950 }, { "epoch": 0.04472639313586783, "grad_norm": 2.859375, "learning_rate": 1.9696520573082084e-05, "loss": 1.3624, "step": 1960 }, { "epoch": 0.04495458901921409, "grad_norm": 2.9375, "learning_rate": 1.9691815465688006e-05, "loss": 1.3802, "step": 1970 }, { "epoch": 0.045182784902560356, "grad_norm": 3.015625, "learning_rate": 1.968711035829393e-05, "loss": 1.3185, "step": 1980 }, { "epoch": 0.04541098078590662, "grad_norm": 3.0625, "learning_rate": 1.9682405250899853e-05, "loss": 1.3172, "step": 1990 }, { "epoch": 0.045639176669252886, "grad_norm": 3.21875, "learning_rate": 1.967770014350578e-05, "loss": 1.3595, "step": 2000 }, { "epoch": 0.04586737255259915, "grad_norm": 3.1875, "learning_rate": 1.96729950361117e-05, "loss": 1.3756, "step": 2010 }, { "epoch": 0.046095568435945415, "grad_norm": 3.015625, "learning_rate": 1.9668289928717626e-05, "loss": 1.3744, "step": 2020 }, { "epoch": 0.04632376431929168, "grad_norm": 3.4375, "learning_rate": 1.9663584821323548e-05, "loss": 1.3532, "step": 2030 }, { "epoch": 0.046551960202637944, "grad_norm": 3.078125, "learning_rate": 1.9658879713929473e-05, "loss": 1.3375, "step": 2040 }, { "epoch": 0.04678015608598421, "grad_norm": 3.15625, "learning_rate": 1.9654174606535395e-05, "loss": 1.3547, "step": 2050 }, { "epoch": 0.047008351969330474, "grad_norm": 3.0625, "learning_rate": 1.964946949914132e-05, "loss": 1.3437, "step": 2060 }, { "epoch": 0.04723654785267674, "grad_norm": 2.90625, "learning_rate": 1.9644764391747243e-05, "loss": 1.3712, "step": 2070 }, { "epoch": 0.047464743736023, "grad_norm": 2.734375, "learning_rate": 1.9640059284353168e-05, "loss": 1.3295, "step": 2080 }, { "epoch": 0.04769293961936927, "grad_norm": 3.09375, "learning_rate": 1.963535417695909e-05, "loss": 1.2965, "step": 2090 }, { "epoch": 0.04792113550271553, "grad_norm": 2.78125, "learning_rate": 1.9630649069565016e-05, "loss": 1.3355, "step": 2100 }, { "epoch": 0.0481493313860618, "grad_norm": 3.03125, "learning_rate": 1.9625943962170938e-05, "loss": 1.2739, "step": 2110 }, { "epoch": 0.04837752726940806, "grad_norm": 2.921875, "learning_rate": 1.9621238854776863e-05, "loss": 1.3527, "step": 2120 }, { "epoch": 0.04860572315275433, "grad_norm": 3.1875, "learning_rate": 1.9616533747382785e-05, "loss": 1.3844, "step": 2130 }, { "epoch": 0.04883391903610059, "grad_norm": 3.390625, "learning_rate": 1.961182863998871e-05, "loss": 1.266, "step": 2140 }, { "epoch": 0.049062114919446856, "grad_norm": 3.015625, "learning_rate": 1.9607123532594632e-05, "loss": 1.289, "step": 2150 }, { "epoch": 0.04929031080279312, "grad_norm": 2.921875, "learning_rate": 1.9602418425200558e-05, "loss": 1.3261, "step": 2160 }, { "epoch": 0.049518506686139385, "grad_norm": 3.09375, "learning_rate": 1.959771331780648e-05, "loss": 1.3243, "step": 2170 }, { "epoch": 0.04974670256948565, "grad_norm": 3.109375, "learning_rate": 1.9593008210412405e-05, "loss": 1.3062, "step": 2180 }, { "epoch": 0.04997489845283191, "grad_norm": 3.015625, "learning_rate": 1.958830310301833e-05, "loss": 1.3586, "step": 2190 }, { "epoch": 0.05020309433617817, "grad_norm": 3.15625, "learning_rate": 1.9583597995624253e-05, "loss": 1.2829, "step": 2200 }, { "epoch": 0.05043129021952444, "grad_norm": 3.046875, "learning_rate": 1.9578892888230178e-05, "loss": 1.3036, "step": 2210 }, { "epoch": 0.0506594861028707, "grad_norm": 3.03125, "learning_rate": 1.95741877808361e-05, "loss": 1.2972, "step": 2220 }, { "epoch": 0.050887681986216966, "grad_norm": 3.0625, "learning_rate": 1.9569482673442026e-05, "loss": 1.3014, "step": 2230 }, { "epoch": 0.05111587786956323, "grad_norm": 3.109375, "learning_rate": 1.9564777566047944e-05, "loss": 1.2874, "step": 2240 }, { "epoch": 0.051344073752909496, "grad_norm": 2.96875, "learning_rate": 1.956007245865387e-05, "loss": 1.2832, "step": 2250 }, { "epoch": 0.05157226963625576, "grad_norm": 3.09375, "learning_rate": 1.955536735125979e-05, "loss": 1.2873, "step": 2260 }, { "epoch": 0.051800465519602025, "grad_norm": 3.375, "learning_rate": 1.9550662243865717e-05, "loss": 1.3528, "step": 2270 }, { "epoch": 0.05202866140294829, "grad_norm": 3.015625, "learning_rate": 1.954595713647164e-05, "loss": 1.3258, "step": 2280 }, { "epoch": 0.052256857286294554, "grad_norm": 3.484375, "learning_rate": 1.9541252029077564e-05, "loss": 1.333, "step": 2290 }, { "epoch": 0.05248505316964082, "grad_norm": 3.0, "learning_rate": 1.9536546921683486e-05, "loss": 1.3283, "step": 2300 }, { "epoch": 0.052713249052987084, "grad_norm": 2.90625, "learning_rate": 1.9531841814289412e-05, "loss": 1.3391, "step": 2310 }, { "epoch": 0.05294144493633335, "grad_norm": 3.453125, "learning_rate": 1.9527136706895334e-05, "loss": 1.3023, "step": 2320 }, { "epoch": 0.05316964081967961, "grad_norm": 2.828125, "learning_rate": 1.952243159950126e-05, "loss": 1.2578, "step": 2330 }, { "epoch": 0.05339783670302588, "grad_norm": 3.359375, "learning_rate": 1.951772649210718e-05, "loss": 1.3337, "step": 2340 }, { "epoch": 0.05362603258637214, "grad_norm": 3.015625, "learning_rate": 1.9513021384713107e-05, "loss": 1.3553, "step": 2350 }, { "epoch": 0.05385422846971841, "grad_norm": 3.046875, "learning_rate": 1.9508316277319032e-05, "loss": 1.327, "step": 2360 }, { "epoch": 0.05408242435306467, "grad_norm": 3.140625, "learning_rate": 1.9503611169924954e-05, "loss": 1.335, "step": 2370 }, { "epoch": 0.054310620236410936, "grad_norm": 3.09375, "learning_rate": 1.949890606253088e-05, "loss": 1.3125, "step": 2380 }, { "epoch": 0.0545388161197572, "grad_norm": 2.828125, "learning_rate": 1.94942009551368e-05, "loss": 1.3499, "step": 2390 }, { "epoch": 0.054767012003103466, "grad_norm": 2.90625, "learning_rate": 1.9489495847742727e-05, "loss": 1.3259, "step": 2400 }, { "epoch": 0.05499520788644973, "grad_norm": 2.921875, "learning_rate": 1.948479074034865e-05, "loss": 1.2998, "step": 2410 }, { "epoch": 0.055223403769795995, "grad_norm": 2.96875, "learning_rate": 1.9480085632954574e-05, "loss": 1.3369, "step": 2420 }, { "epoch": 0.05545159965314226, "grad_norm": 2.78125, "learning_rate": 1.9475380525560496e-05, "loss": 1.3212, "step": 2430 }, { "epoch": 0.055679795536488524, "grad_norm": 3.046875, "learning_rate": 1.9470675418166422e-05, "loss": 1.3363, "step": 2440 }, { "epoch": 0.05590799141983479, "grad_norm": 2.90625, "learning_rate": 1.9465970310772344e-05, "loss": 1.2463, "step": 2450 }, { "epoch": 0.056136187303181054, "grad_norm": 2.984375, "learning_rate": 1.946126520337827e-05, "loss": 1.262, "step": 2460 }, { "epoch": 0.05636438318652732, "grad_norm": 2.78125, "learning_rate": 1.945656009598419e-05, "loss": 1.2973, "step": 2470 }, { "epoch": 0.056592579069873576, "grad_norm": 3.25, "learning_rate": 1.9451854988590117e-05, "loss": 1.2681, "step": 2480 }, { "epoch": 0.05682077495321984, "grad_norm": 2.984375, "learning_rate": 1.944714988119604e-05, "loss": 1.348, "step": 2490 }, { "epoch": 0.057048970836566106, "grad_norm": 2.921875, "learning_rate": 1.9442444773801964e-05, "loss": 1.3223, "step": 2500 }, { "epoch": 0.05727716671991237, "grad_norm": 2.875, "learning_rate": 1.9437739666407886e-05, "loss": 1.3331, "step": 2510 }, { "epoch": 0.057505362603258635, "grad_norm": 3.0625, "learning_rate": 1.943303455901381e-05, "loss": 1.3448, "step": 2520 }, { "epoch": 0.0577335584866049, "grad_norm": 2.828125, "learning_rate": 1.9428329451619734e-05, "loss": 1.2804, "step": 2530 }, { "epoch": 0.057961754369951164, "grad_norm": 2.96875, "learning_rate": 1.942362434422566e-05, "loss": 1.3091, "step": 2540 }, { "epoch": 0.05818995025329743, "grad_norm": 3.015625, "learning_rate": 1.941891923683158e-05, "loss": 1.298, "step": 2550 }, { "epoch": 0.058418146136643694, "grad_norm": 3.0, "learning_rate": 1.9414214129437506e-05, "loss": 1.3311, "step": 2560 }, { "epoch": 0.05864634201998996, "grad_norm": 2.9375, "learning_rate": 1.940950902204343e-05, "loss": 1.3316, "step": 2570 }, { "epoch": 0.05887453790333622, "grad_norm": 2.921875, "learning_rate": 1.9404803914649354e-05, "loss": 1.2494, "step": 2580 }, { "epoch": 0.05910273378668249, "grad_norm": 2.96875, "learning_rate": 1.9400098807255276e-05, "loss": 1.3575, "step": 2590 }, { "epoch": 0.05933092967002875, "grad_norm": 2.90625, "learning_rate": 1.93953936998612e-05, "loss": 1.3136, "step": 2600 }, { "epoch": 0.05955912555337502, "grad_norm": 2.96875, "learning_rate": 1.9390688592467123e-05, "loss": 1.2939, "step": 2610 }, { "epoch": 0.05978732143672128, "grad_norm": 3.3125, "learning_rate": 1.938598348507305e-05, "loss": 1.2837, "step": 2620 }, { "epoch": 0.060015517320067546, "grad_norm": 2.796875, "learning_rate": 1.938127837767897e-05, "loss": 1.3491, "step": 2630 }, { "epoch": 0.06024371320341381, "grad_norm": 3.0, "learning_rate": 1.9376573270284896e-05, "loss": 1.3313, "step": 2640 }, { "epoch": 0.060471909086760076, "grad_norm": 3.046875, "learning_rate": 1.9371868162890818e-05, "loss": 1.2966, "step": 2650 }, { "epoch": 0.06070010497010634, "grad_norm": 2.96875, "learning_rate": 1.9367163055496743e-05, "loss": 1.3495, "step": 2660 }, { "epoch": 0.060928300853452605, "grad_norm": 3.0625, "learning_rate": 1.9362457948102666e-05, "loss": 1.3175, "step": 2670 }, { "epoch": 0.06115649673679887, "grad_norm": 2.90625, "learning_rate": 1.935775284070859e-05, "loss": 1.3328, "step": 2680 }, { "epoch": 0.061384692620145134, "grad_norm": 3.140625, "learning_rate": 1.9353047733314513e-05, "loss": 1.3052, "step": 2690 }, { "epoch": 0.0616128885034914, "grad_norm": 3.171875, "learning_rate": 1.934834262592044e-05, "loss": 1.3565, "step": 2700 }, { "epoch": 0.061841084386837664, "grad_norm": 2.921875, "learning_rate": 1.934363751852636e-05, "loss": 1.2375, "step": 2710 }, { "epoch": 0.06206928027018393, "grad_norm": 3.1875, "learning_rate": 1.9338932411132286e-05, "loss": 1.3403, "step": 2720 }, { "epoch": 0.06229747615353019, "grad_norm": 3.0, "learning_rate": 1.9334227303738208e-05, "loss": 1.2978, "step": 2730 }, { "epoch": 0.06252567203687645, "grad_norm": 2.796875, "learning_rate": 1.9329522196344133e-05, "loss": 1.2961, "step": 2740 }, { "epoch": 0.06275386792022272, "grad_norm": 3.0, "learning_rate": 1.9324817088950055e-05, "loss": 1.2535, "step": 2750 }, { "epoch": 0.06298206380356898, "grad_norm": 3.0625, "learning_rate": 1.932011198155598e-05, "loss": 1.3331, "step": 2760 }, { "epoch": 0.06321025968691525, "grad_norm": 2.875, "learning_rate": 1.9315406874161903e-05, "loss": 1.3123, "step": 2770 }, { "epoch": 0.06343845557026151, "grad_norm": 3.109375, "learning_rate": 1.9310701766767828e-05, "loss": 1.3033, "step": 2780 }, { "epoch": 0.06366665145360778, "grad_norm": 3.09375, "learning_rate": 1.930599665937375e-05, "loss": 1.2968, "step": 2790 }, { "epoch": 0.06389484733695404, "grad_norm": 3.015625, "learning_rate": 1.9301291551979675e-05, "loss": 1.2564, "step": 2800 }, { "epoch": 0.06412304322030031, "grad_norm": 2.890625, "learning_rate": 1.9296586444585597e-05, "loss": 1.3422, "step": 2810 }, { "epoch": 0.06435123910364657, "grad_norm": 3.28125, "learning_rate": 1.9291881337191523e-05, "loss": 1.3109, "step": 2820 }, { "epoch": 0.06457943498699284, "grad_norm": 3.03125, "learning_rate": 1.9287176229797445e-05, "loss": 1.3506, "step": 2830 }, { "epoch": 0.0648076308703391, "grad_norm": 2.984375, "learning_rate": 1.928247112240337e-05, "loss": 1.3651, "step": 2840 }, { "epoch": 0.06503582675368537, "grad_norm": 3.03125, "learning_rate": 1.9277766015009292e-05, "loss": 1.2158, "step": 2850 }, { "epoch": 0.06526402263703163, "grad_norm": 3.125, "learning_rate": 1.9273060907615218e-05, "loss": 1.3632, "step": 2860 }, { "epoch": 0.0654922185203779, "grad_norm": 3.09375, "learning_rate": 1.926835580022114e-05, "loss": 1.2916, "step": 2870 }, { "epoch": 0.06572041440372416, "grad_norm": 3.15625, "learning_rate": 1.9263650692827065e-05, "loss": 1.2579, "step": 2880 }, { "epoch": 0.06594861028707043, "grad_norm": 3.0625, "learning_rate": 1.9258945585432987e-05, "loss": 1.3358, "step": 2890 }, { "epoch": 0.06617680617041669, "grad_norm": 2.875, "learning_rate": 1.9254240478038913e-05, "loss": 1.296, "step": 2900 }, { "epoch": 0.06640500205376294, "grad_norm": 3.109375, "learning_rate": 1.9249535370644835e-05, "loss": 1.3481, "step": 2910 }, { "epoch": 0.06663319793710921, "grad_norm": 3.046875, "learning_rate": 1.924483026325076e-05, "loss": 1.3228, "step": 2920 }, { "epoch": 0.06686139382045547, "grad_norm": 2.78125, "learning_rate": 1.9240125155856682e-05, "loss": 1.315, "step": 2930 }, { "epoch": 0.06708958970380174, "grad_norm": 3.140625, "learning_rate": 1.9235420048462607e-05, "loss": 1.2657, "step": 2940 }, { "epoch": 0.067317785587148, "grad_norm": 2.9375, "learning_rate": 1.9230714941068533e-05, "loss": 1.2938, "step": 2950 }, { "epoch": 0.06754598147049427, "grad_norm": 2.859375, "learning_rate": 1.9226009833674455e-05, "loss": 1.2649, "step": 2960 }, { "epoch": 0.06777417735384053, "grad_norm": 3.078125, "learning_rate": 1.922130472628038e-05, "loss": 1.3182, "step": 2970 }, { "epoch": 0.0680023732371868, "grad_norm": 3.015625, "learning_rate": 1.9216599618886302e-05, "loss": 1.2931, "step": 2980 }, { "epoch": 0.06823056912053306, "grad_norm": 2.875, "learning_rate": 1.9211894511492228e-05, "loss": 1.2929, "step": 2990 }, { "epoch": 0.06845876500387933, "grad_norm": 2.90625, "learning_rate": 1.920718940409815e-05, "loss": 1.3254, "step": 3000 }, { "epoch": 0.06868696088722559, "grad_norm": 3.09375, "learning_rate": 1.9202484296704075e-05, "loss": 1.295, "step": 3010 }, { "epoch": 0.06891515677057186, "grad_norm": 2.96875, "learning_rate": 1.9197779189309997e-05, "loss": 1.2974, "step": 3020 }, { "epoch": 0.06914335265391812, "grad_norm": 2.765625, "learning_rate": 1.9193074081915923e-05, "loss": 1.3092, "step": 3030 }, { "epoch": 0.06937154853726439, "grad_norm": 2.828125, "learning_rate": 1.9188368974521845e-05, "loss": 1.2749, "step": 3040 }, { "epoch": 0.06959974442061065, "grad_norm": 2.984375, "learning_rate": 1.918366386712777e-05, "loss": 1.3363, "step": 3050 }, { "epoch": 0.06982794030395692, "grad_norm": 3.375, "learning_rate": 1.9178958759733692e-05, "loss": 1.2407, "step": 3060 }, { "epoch": 0.07005613618730318, "grad_norm": 3.3125, "learning_rate": 1.9174253652339617e-05, "loss": 1.2827, "step": 3070 }, { "epoch": 0.07028433207064945, "grad_norm": 3.265625, "learning_rate": 1.916954854494554e-05, "loss": 1.3172, "step": 3080 }, { "epoch": 0.07051252795399571, "grad_norm": 3.0, "learning_rate": 1.9164843437551465e-05, "loss": 1.3089, "step": 3090 }, { "epoch": 0.07074072383734198, "grad_norm": 3.34375, "learning_rate": 1.9160138330157387e-05, "loss": 1.2981, "step": 3100 }, { "epoch": 0.07096891972068824, "grad_norm": 2.703125, "learning_rate": 1.9155433222763312e-05, "loss": 1.2745, "step": 3110 }, { "epoch": 0.07119711560403451, "grad_norm": 2.8125, "learning_rate": 1.9150728115369234e-05, "loss": 1.2705, "step": 3120 }, { "epoch": 0.07142531148738077, "grad_norm": 3.125, "learning_rate": 1.914602300797516e-05, "loss": 1.2858, "step": 3130 }, { "epoch": 0.07165350737072704, "grad_norm": 2.984375, "learning_rate": 1.914131790058108e-05, "loss": 1.2985, "step": 3140 }, { "epoch": 0.0718817032540733, "grad_norm": 3.265625, "learning_rate": 1.9136612793187007e-05, "loss": 1.3154, "step": 3150 }, { "epoch": 0.07210989913741957, "grad_norm": 3.578125, "learning_rate": 1.913190768579293e-05, "loss": 1.2897, "step": 3160 }, { "epoch": 0.07233809502076582, "grad_norm": 3.015625, "learning_rate": 1.9127202578398855e-05, "loss": 1.2612, "step": 3170 }, { "epoch": 0.0725662909041121, "grad_norm": 2.90625, "learning_rate": 1.9122497471004777e-05, "loss": 1.2448, "step": 3180 }, { "epoch": 0.07279448678745835, "grad_norm": 2.96875, "learning_rate": 1.9117792363610702e-05, "loss": 1.3043, "step": 3190 }, { "epoch": 0.07302268267080463, "grad_norm": 2.875, "learning_rate": 1.9113087256216624e-05, "loss": 1.3078, "step": 3200 }, { "epoch": 0.07325087855415088, "grad_norm": 3.078125, "learning_rate": 1.910838214882255e-05, "loss": 1.2646, "step": 3210 }, { "epoch": 0.07347907443749714, "grad_norm": 3.015625, "learning_rate": 1.910367704142847e-05, "loss": 1.2309, "step": 3220 }, { "epoch": 0.07370727032084341, "grad_norm": 3.109375, "learning_rate": 1.9098971934034397e-05, "loss": 1.2709, "step": 3230 }, { "epoch": 0.07393546620418967, "grad_norm": 3.1875, "learning_rate": 1.909426682664032e-05, "loss": 1.2789, "step": 3240 }, { "epoch": 0.07416366208753594, "grad_norm": 2.765625, "learning_rate": 1.9089561719246244e-05, "loss": 1.2751, "step": 3250 }, { "epoch": 0.0743918579708822, "grad_norm": 3.71875, "learning_rate": 1.9084856611852166e-05, "loss": 1.2787, "step": 3260 }, { "epoch": 0.07462005385422847, "grad_norm": 3.015625, "learning_rate": 1.908015150445809e-05, "loss": 1.2515, "step": 3270 }, { "epoch": 0.07484824973757473, "grad_norm": 2.859375, "learning_rate": 1.9075446397064014e-05, "loss": 1.2512, "step": 3280 }, { "epoch": 0.075076445620921, "grad_norm": 2.921875, "learning_rate": 1.907074128966994e-05, "loss": 1.3457, "step": 3290 }, { "epoch": 0.07530464150426726, "grad_norm": 3.25, "learning_rate": 1.906603618227586e-05, "loss": 1.3079, "step": 3300 }, { "epoch": 0.07553283738761353, "grad_norm": 3.0625, "learning_rate": 1.9061331074881786e-05, "loss": 1.2903, "step": 3310 }, { "epoch": 0.07576103327095979, "grad_norm": 2.71875, "learning_rate": 1.905662596748771e-05, "loss": 1.2931, "step": 3320 }, { "epoch": 0.07598922915430606, "grad_norm": 3.015625, "learning_rate": 1.9051920860093634e-05, "loss": 1.3441, "step": 3330 }, { "epoch": 0.07621742503765232, "grad_norm": 2.5625, "learning_rate": 1.9047215752699556e-05, "loss": 1.266, "step": 3340 }, { "epoch": 0.07644562092099859, "grad_norm": 3.0625, "learning_rate": 1.904251064530548e-05, "loss": 1.3063, "step": 3350 }, { "epoch": 0.07667381680434485, "grad_norm": 3.015625, "learning_rate": 1.9037805537911403e-05, "loss": 1.3123, "step": 3360 }, { "epoch": 0.07690201268769112, "grad_norm": 3.15625, "learning_rate": 1.903310043051733e-05, "loss": 1.2629, "step": 3370 }, { "epoch": 0.07713020857103738, "grad_norm": 2.765625, "learning_rate": 1.902839532312325e-05, "loss": 1.3116, "step": 3380 }, { "epoch": 0.07735840445438365, "grad_norm": 2.9375, "learning_rate": 1.9023690215729176e-05, "loss": 1.2507, "step": 3390 }, { "epoch": 0.0775866003377299, "grad_norm": 2.890625, "learning_rate": 1.9018985108335098e-05, "loss": 1.2674, "step": 3400 }, { "epoch": 0.07781479622107618, "grad_norm": 3.578125, "learning_rate": 1.9014280000941024e-05, "loss": 1.288, "step": 3410 }, { "epoch": 0.07804299210442243, "grad_norm": 3.015625, "learning_rate": 1.9009574893546946e-05, "loss": 1.3085, "step": 3420 }, { "epoch": 0.0782711879877687, "grad_norm": 3.109375, "learning_rate": 1.900486978615287e-05, "loss": 1.271, "step": 3430 }, { "epoch": 0.07849938387111496, "grad_norm": 2.75, "learning_rate": 1.9000164678758793e-05, "loss": 1.2267, "step": 3440 }, { "epoch": 0.07872757975446124, "grad_norm": 2.640625, "learning_rate": 1.899545957136472e-05, "loss": 1.2884, "step": 3450 }, { "epoch": 0.0789557756378075, "grad_norm": 3.125, "learning_rate": 1.899075446397064e-05, "loss": 1.2701, "step": 3460 }, { "epoch": 0.07918397152115376, "grad_norm": 2.953125, "learning_rate": 1.8986049356576566e-05, "loss": 1.3125, "step": 3470 }, { "epoch": 0.07941216740450002, "grad_norm": 2.9375, "learning_rate": 1.8981344249182488e-05, "loss": 1.2674, "step": 3480 }, { "epoch": 0.0796403632878463, "grad_norm": 2.96875, "learning_rate": 1.8976639141788413e-05, "loss": 1.3193, "step": 3490 }, { "epoch": 0.07986855917119255, "grad_norm": 3.109375, "learning_rate": 1.8971934034394335e-05, "loss": 1.3263, "step": 3500 }, { "epoch": 0.08009675505453881, "grad_norm": 2.953125, "learning_rate": 1.896722892700026e-05, "loss": 1.3223, "step": 3510 }, { "epoch": 0.08032495093788508, "grad_norm": 3.140625, "learning_rate": 1.8962523819606186e-05, "loss": 1.2505, "step": 3520 }, { "epoch": 0.08055314682123134, "grad_norm": 2.890625, "learning_rate": 1.8957818712212108e-05, "loss": 1.2797, "step": 3530 }, { "epoch": 0.08078134270457761, "grad_norm": 2.984375, "learning_rate": 1.8953113604818034e-05, "loss": 1.2591, "step": 3540 }, { "epoch": 0.08100953858792387, "grad_norm": 2.90625, "learning_rate": 1.8948408497423956e-05, "loss": 1.259, "step": 3550 }, { "epoch": 0.08123773447127014, "grad_norm": 2.921875, "learning_rate": 1.894370339002988e-05, "loss": 1.2834, "step": 3560 }, { "epoch": 0.0814659303546164, "grad_norm": 2.8125, "learning_rate": 1.8938998282635803e-05, "loss": 1.2744, "step": 3570 }, { "epoch": 0.08169412623796267, "grad_norm": 2.578125, "learning_rate": 1.893429317524173e-05, "loss": 1.2332, "step": 3580 }, { "epoch": 0.08192232212130893, "grad_norm": 2.984375, "learning_rate": 1.892958806784765e-05, "loss": 1.2801, "step": 3590 }, { "epoch": 0.0821505180046552, "grad_norm": 3.234375, "learning_rate": 1.8924882960453576e-05, "loss": 1.2917, "step": 3600 }, { "epoch": 0.08237871388800146, "grad_norm": 3.0, "learning_rate": 1.8920177853059498e-05, "loss": 1.33, "step": 3610 }, { "epoch": 0.08260690977134773, "grad_norm": 3.359375, "learning_rate": 1.8915472745665423e-05, "loss": 1.315, "step": 3620 }, { "epoch": 0.08283510565469399, "grad_norm": 2.90625, "learning_rate": 1.8910767638271345e-05, "loss": 1.2019, "step": 3630 }, { "epoch": 0.08306330153804026, "grad_norm": 2.96875, "learning_rate": 1.890606253087727e-05, "loss": 1.3372, "step": 3640 }, { "epoch": 0.08329149742138652, "grad_norm": 3.046875, "learning_rate": 1.8901357423483193e-05, "loss": 1.2758, "step": 3650 }, { "epoch": 0.08351969330473279, "grad_norm": 3.015625, "learning_rate": 1.8896652316089118e-05, "loss": 1.2893, "step": 3660 }, { "epoch": 0.08374788918807904, "grad_norm": 2.875, "learning_rate": 1.889194720869504e-05, "loss": 1.2878, "step": 3670 }, { "epoch": 0.08397608507142532, "grad_norm": 2.609375, "learning_rate": 1.8887242101300966e-05, "loss": 1.2628, "step": 3680 }, { "epoch": 0.08420428095477157, "grad_norm": 3.046875, "learning_rate": 1.8882536993906888e-05, "loss": 1.3282, "step": 3690 }, { "epoch": 0.08443247683811785, "grad_norm": 2.890625, "learning_rate": 1.8877831886512813e-05, "loss": 1.2558, "step": 3700 }, { "epoch": 0.0846606727214641, "grad_norm": 3.6875, "learning_rate": 1.8873126779118735e-05, "loss": 1.2594, "step": 3710 }, { "epoch": 0.08488886860481037, "grad_norm": 2.84375, "learning_rate": 1.886842167172466e-05, "loss": 1.2525, "step": 3720 }, { "epoch": 0.08511706448815663, "grad_norm": 2.90625, "learning_rate": 1.8863716564330582e-05, "loss": 1.2717, "step": 3730 }, { "epoch": 0.0853452603715029, "grad_norm": 3.09375, "learning_rate": 1.8859011456936508e-05, "loss": 1.2786, "step": 3740 }, { "epoch": 0.08557345625484916, "grad_norm": 2.859375, "learning_rate": 1.885430634954243e-05, "loss": 1.2976, "step": 3750 }, { "epoch": 0.08580165213819543, "grad_norm": 2.65625, "learning_rate": 1.8849601242148355e-05, "loss": 1.2451, "step": 3760 }, { "epoch": 0.08602984802154169, "grad_norm": 2.6875, "learning_rate": 1.8844896134754277e-05, "loss": 1.2518, "step": 3770 }, { "epoch": 0.08625804390488796, "grad_norm": 2.84375, "learning_rate": 1.8840191027360203e-05, "loss": 1.2399, "step": 3780 }, { "epoch": 0.08648623978823422, "grad_norm": 3.09375, "learning_rate": 1.8835485919966125e-05, "loss": 1.2926, "step": 3790 }, { "epoch": 0.08671443567158048, "grad_norm": 3.078125, "learning_rate": 1.883078081257205e-05, "loss": 1.3284, "step": 3800 }, { "epoch": 0.08694263155492675, "grad_norm": 2.890625, "learning_rate": 1.8826075705177972e-05, "loss": 1.3465, "step": 3810 }, { "epoch": 0.08717082743827301, "grad_norm": 2.8125, "learning_rate": 1.8821370597783898e-05, "loss": 1.2742, "step": 3820 }, { "epoch": 0.08739902332161928, "grad_norm": 2.640625, "learning_rate": 1.881666549038982e-05, "loss": 1.2418, "step": 3830 }, { "epoch": 0.08762721920496554, "grad_norm": 2.890625, "learning_rate": 1.8811960382995745e-05, "loss": 1.2656, "step": 3840 }, { "epoch": 0.08785541508831181, "grad_norm": 2.96875, "learning_rate": 1.8807255275601667e-05, "loss": 1.3345, "step": 3850 }, { "epoch": 0.08808361097165807, "grad_norm": 2.796875, "learning_rate": 1.8802550168207592e-05, "loss": 1.3108, "step": 3860 }, { "epoch": 0.08831180685500434, "grad_norm": 3.0, "learning_rate": 1.8797845060813514e-05, "loss": 1.3534, "step": 3870 }, { "epoch": 0.0885400027383506, "grad_norm": 2.890625, "learning_rate": 1.879313995341944e-05, "loss": 1.2493, "step": 3880 }, { "epoch": 0.08876819862169687, "grad_norm": 3.1875, "learning_rate": 1.8788434846025362e-05, "loss": 1.3026, "step": 3890 }, { "epoch": 0.08899639450504312, "grad_norm": 2.84375, "learning_rate": 1.8783729738631287e-05, "loss": 1.2977, "step": 3900 }, { "epoch": 0.0892245903883894, "grad_norm": 2.890625, "learning_rate": 1.877902463123721e-05, "loss": 1.2378, "step": 3910 }, { "epoch": 0.08945278627173565, "grad_norm": 3.109375, "learning_rate": 1.8774319523843135e-05, "loss": 1.2688, "step": 3920 }, { "epoch": 0.08968098215508193, "grad_norm": 2.96875, "learning_rate": 1.8769614416449057e-05, "loss": 1.2846, "step": 3930 }, { "epoch": 0.08990917803842818, "grad_norm": 2.890625, "learning_rate": 1.8764909309054982e-05, "loss": 1.2867, "step": 3940 }, { "epoch": 0.09013737392177446, "grad_norm": 2.890625, "learning_rate": 1.8760204201660904e-05, "loss": 1.2363, "step": 3950 }, { "epoch": 0.09036556980512071, "grad_norm": 2.921875, "learning_rate": 1.875549909426683e-05, "loss": 1.3008, "step": 3960 }, { "epoch": 0.09059376568846698, "grad_norm": 2.796875, "learning_rate": 1.875079398687275e-05, "loss": 1.2138, "step": 3970 }, { "epoch": 0.09082196157181324, "grad_norm": 3.1875, "learning_rate": 1.8746088879478677e-05, "loss": 1.2049, "step": 3980 }, { "epoch": 0.09105015745515951, "grad_norm": 3.15625, "learning_rate": 1.87413837720846e-05, "loss": 1.2463, "step": 3990 }, { "epoch": 0.09127835333850577, "grad_norm": 2.84375, "learning_rate": 1.8736678664690524e-05, "loss": 1.2972, "step": 4000 }, { "epoch": 0.09150654922185204, "grad_norm": 3.03125, "learning_rate": 1.8731973557296446e-05, "loss": 1.2633, "step": 4010 }, { "epoch": 0.0917347451051983, "grad_norm": 3.03125, "learning_rate": 1.8727268449902372e-05, "loss": 1.247, "step": 4020 }, { "epoch": 0.09196294098854457, "grad_norm": 3.09375, "learning_rate": 1.8722563342508294e-05, "loss": 1.2925, "step": 4030 }, { "epoch": 0.09219113687189083, "grad_norm": 3.078125, "learning_rate": 1.871785823511422e-05, "loss": 1.2233, "step": 4040 }, { "epoch": 0.0924193327552371, "grad_norm": 3.0625, "learning_rate": 1.871315312772014e-05, "loss": 1.3218, "step": 4050 }, { "epoch": 0.09264752863858336, "grad_norm": 3.28125, "learning_rate": 1.8708448020326067e-05, "loss": 1.3449, "step": 4060 }, { "epoch": 0.09287572452192963, "grad_norm": 3.109375, "learning_rate": 1.870374291293199e-05, "loss": 1.2999, "step": 4070 }, { "epoch": 0.09310392040527589, "grad_norm": 2.75, "learning_rate": 1.8699037805537914e-05, "loss": 1.2917, "step": 4080 }, { "epoch": 0.09333211628862215, "grad_norm": 3.015625, "learning_rate": 1.8694332698143836e-05, "loss": 1.2619, "step": 4090 }, { "epoch": 0.09356031217196842, "grad_norm": 3.03125, "learning_rate": 1.8689627590749758e-05, "loss": 1.2881, "step": 4100 }, { "epoch": 0.09378850805531468, "grad_norm": 3.046875, "learning_rate": 1.8684922483355683e-05, "loss": 1.2401, "step": 4110 }, { "epoch": 0.09401670393866095, "grad_norm": 2.890625, "learning_rate": 1.8680217375961606e-05, "loss": 1.2744, "step": 4120 }, { "epoch": 0.0942448998220072, "grad_norm": 2.8125, "learning_rate": 1.867551226856753e-05, "loss": 1.256, "step": 4130 }, { "epoch": 0.09447309570535348, "grad_norm": 2.890625, "learning_rate": 1.8670807161173453e-05, "loss": 1.2605, "step": 4140 }, { "epoch": 0.09470129158869973, "grad_norm": 2.9375, "learning_rate": 1.866610205377938e-05, "loss": 1.255, "step": 4150 }, { "epoch": 0.094929487472046, "grad_norm": 2.78125, "learning_rate": 1.86613969463853e-05, "loss": 1.2723, "step": 4160 }, { "epoch": 0.09515768335539226, "grad_norm": 3.21875, "learning_rate": 1.8656691838991226e-05, "loss": 1.2614, "step": 4170 }, { "epoch": 0.09538587923873854, "grad_norm": 3.046875, "learning_rate": 1.8651986731597148e-05, "loss": 1.2515, "step": 4180 }, { "epoch": 0.0956140751220848, "grad_norm": 3.25, "learning_rate": 1.8647281624203073e-05, "loss": 1.2351, "step": 4190 }, { "epoch": 0.09584227100543107, "grad_norm": 2.953125, "learning_rate": 1.8642576516808995e-05, "loss": 1.2404, "step": 4200 }, { "epoch": 0.09607046688877732, "grad_norm": 2.765625, "learning_rate": 1.863787140941492e-05, "loss": 1.3401, "step": 4210 }, { "epoch": 0.0962986627721236, "grad_norm": 3.03125, "learning_rate": 1.8633166302020843e-05, "loss": 1.2879, "step": 4220 }, { "epoch": 0.09652685865546985, "grad_norm": 3.21875, "learning_rate": 1.8628461194626768e-05, "loss": 1.2982, "step": 4230 }, { "epoch": 0.09675505453881612, "grad_norm": 2.9375, "learning_rate": 1.862375608723269e-05, "loss": 1.2635, "step": 4240 }, { "epoch": 0.09698325042216238, "grad_norm": 2.875, "learning_rate": 1.8619050979838615e-05, "loss": 1.2737, "step": 4250 }, { "epoch": 0.09721144630550865, "grad_norm": 3.0, "learning_rate": 1.8614345872444537e-05, "loss": 1.2966, "step": 4260 }, { "epoch": 0.09743964218885491, "grad_norm": 2.890625, "learning_rate": 1.8609640765050463e-05, "loss": 1.2325, "step": 4270 }, { "epoch": 0.09766783807220118, "grad_norm": 2.71875, "learning_rate": 1.8604935657656388e-05, "loss": 1.2849, "step": 4280 }, { "epoch": 0.09789603395554744, "grad_norm": 3.125, "learning_rate": 1.860023055026231e-05, "loss": 1.284, "step": 4290 }, { "epoch": 0.09812422983889371, "grad_norm": 2.859375, "learning_rate": 1.8595525442868236e-05, "loss": 1.2073, "step": 4300 }, { "epoch": 0.09835242572223997, "grad_norm": 3.140625, "learning_rate": 1.8590820335474158e-05, "loss": 1.2362, "step": 4310 }, { "epoch": 0.09858062160558624, "grad_norm": 2.640625, "learning_rate": 1.8586115228080083e-05, "loss": 1.2705, "step": 4320 }, { "epoch": 0.0988088174889325, "grad_norm": 2.875, "learning_rate": 1.8581410120686005e-05, "loss": 1.3003, "step": 4330 }, { "epoch": 0.09903701337227877, "grad_norm": 2.953125, "learning_rate": 1.857670501329193e-05, "loss": 1.2598, "step": 4340 }, { "epoch": 0.09926520925562503, "grad_norm": 2.96875, "learning_rate": 1.8571999905897853e-05, "loss": 1.2231, "step": 4350 }, { "epoch": 0.0994934051389713, "grad_norm": 2.921875, "learning_rate": 1.8567294798503778e-05, "loss": 1.2559, "step": 4360 }, { "epoch": 0.09972160102231756, "grad_norm": 2.765625, "learning_rate": 1.85625896911097e-05, "loss": 1.2536, "step": 4370 }, { "epoch": 0.09994979690566382, "grad_norm": 2.875, "learning_rate": 1.8557884583715625e-05, "loss": 1.2588, "step": 4380 }, { "epoch": 0.10017799278901009, "grad_norm": 3.046875, "learning_rate": 1.8553179476321547e-05, "loss": 1.2592, "step": 4390 }, { "epoch": 0.10040618867235634, "grad_norm": 2.9375, "learning_rate": 1.8548474368927473e-05, "loss": 1.269, "step": 4400 }, { "epoch": 0.10063438455570262, "grad_norm": 2.953125, "learning_rate": 1.8543769261533395e-05, "loss": 1.2416, "step": 4410 }, { "epoch": 0.10086258043904887, "grad_norm": 2.8125, "learning_rate": 1.853906415413932e-05, "loss": 1.3027, "step": 4420 }, { "epoch": 0.10109077632239515, "grad_norm": 3.296875, "learning_rate": 1.8534359046745242e-05, "loss": 1.2604, "step": 4430 }, { "epoch": 0.1013189722057414, "grad_norm": 2.84375, "learning_rate": 1.8529653939351168e-05, "loss": 1.2807, "step": 4440 }, { "epoch": 0.10154716808908767, "grad_norm": 3.3125, "learning_rate": 1.852494883195709e-05, "loss": 1.2839, "step": 4450 }, { "epoch": 0.10177536397243393, "grad_norm": 3.046875, "learning_rate": 1.8520243724563015e-05, "loss": 1.2289, "step": 4460 }, { "epoch": 0.1020035598557802, "grad_norm": 3.03125, "learning_rate": 1.8515538617168937e-05, "loss": 1.2233, "step": 4470 }, { "epoch": 0.10223175573912646, "grad_norm": 2.859375, "learning_rate": 1.8510833509774863e-05, "loss": 1.2179, "step": 4480 }, { "epoch": 0.10245995162247273, "grad_norm": 2.796875, "learning_rate": 1.8506128402380785e-05, "loss": 1.3219, "step": 4490 }, { "epoch": 0.10268814750581899, "grad_norm": 2.78125, "learning_rate": 1.850142329498671e-05, "loss": 1.2578, "step": 4500 }, { "epoch": 0.10291634338916526, "grad_norm": 2.859375, "learning_rate": 1.8496718187592632e-05, "loss": 1.2995, "step": 4510 }, { "epoch": 0.10314453927251152, "grad_norm": 3.25, "learning_rate": 1.8492013080198557e-05, "loss": 1.2729, "step": 4520 }, { "epoch": 0.10337273515585779, "grad_norm": 2.875, "learning_rate": 1.848730797280448e-05, "loss": 1.2876, "step": 4530 }, { "epoch": 0.10360093103920405, "grad_norm": 2.84375, "learning_rate": 1.8482602865410405e-05, "loss": 1.2863, "step": 4540 }, { "epoch": 0.10382912692255032, "grad_norm": 3.015625, "learning_rate": 1.8477897758016327e-05, "loss": 1.2277, "step": 4550 }, { "epoch": 0.10405732280589658, "grad_norm": 2.96875, "learning_rate": 1.8473192650622252e-05, "loss": 1.2692, "step": 4560 }, { "epoch": 0.10428551868924285, "grad_norm": 2.890625, "learning_rate": 1.8468487543228174e-05, "loss": 1.2142, "step": 4570 }, { "epoch": 0.10451371457258911, "grad_norm": 3.03125, "learning_rate": 1.84637824358341e-05, "loss": 1.2877, "step": 4580 }, { "epoch": 0.10474191045593538, "grad_norm": 3.109375, "learning_rate": 1.845907732844002e-05, "loss": 1.2741, "step": 4590 }, { "epoch": 0.10497010633928164, "grad_norm": 2.9375, "learning_rate": 1.8454372221045947e-05, "loss": 1.2728, "step": 4600 }, { "epoch": 0.10519830222262791, "grad_norm": 2.828125, "learning_rate": 1.844966711365187e-05, "loss": 1.2688, "step": 4610 }, { "epoch": 0.10542649810597417, "grad_norm": 2.65625, "learning_rate": 1.8444962006257795e-05, "loss": 1.2641, "step": 4620 }, { "epoch": 0.10565469398932044, "grad_norm": 3.0, "learning_rate": 1.8440256898863717e-05, "loss": 1.2258, "step": 4630 }, { "epoch": 0.1058828898726667, "grad_norm": 3.15625, "learning_rate": 1.8435551791469642e-05, "loss": 1.2974, "step": 4640 }, { "epoch": 0.10611108575601297, "grad_norm": 2.953125, "learning_rate": 1.8430846684075564e-05, "loss": 1.2839, "step": 4650 }, { "epoch": 0.10633928163935923, "grad_norm": 3.078125, "learning_rate": 1.842614157668149e-05, "loss": 1.2838, "step": 4660 }, { "epoch": 0.10656747752270548, "grad_norm": 3.171875, "learning_rate": 1.842143646928741e-05, "loss": 1.2589, "step": 4670 }, { "epoch": 0.10679567340605176, "grad_norm": 2.796875, "learning_rate": 1.8416731361893337e-05, "loss": 1.2832, "step": 4680 }, { "epoch": 0.10702386928939801, "grad_norm": 3.078125, "learning_rate": 1.841202625449926e-05, "loss": 1.2472, "step": 4690 }, { "epoch": 0.10725206517274428, "grad_norm": 3.078125, "learning_rate": 1.8407321147105184e-05, "loss": 1.25, "step": 4700 }, { "epoch": 0.10748026105609054, "grad_norm": 2.90625, "learning_rate": 1.8402616039711106e-05, "loss": 1.292, "step": 4710 }, { "epoch": 0.10770845693943681, "grad_norm": 3.5, "learning_rate": 1.839791093231703e-05, "loss": 1.2658, "step": 4720 }, { "epoch": 0.10793665282278307, "grad_norm": 2.71875, "learning_rate": 1.8393205824922954e-05, "loss": 1.2651, "step": 4730 }, { "epoch": 0.10816484870612934, "grad_norm": 3.140625, "learning_rate": 1.838850071752888e-05, "loss": 1.3102, "step": 4740 }, { "epoch": 0.1083930445894756, "grad_norm": 3.0, "learning_rate": 1.83837956101348e-05, "loss": 1.2472, "step": 4750 }, { "epoch": 0.10862124047282187, "grad_norm": 2.84375, "learning_rate": 1.8379090502740726e-05, "loss": 1.2705, "step": 4760 }, { "epoch": 0.10884943635616813, "grad_norm": 3.15625, "learning_rate": 1.837438539534665e-05, "loss": 1.2625, "step": 4770 }, { "epoch": 0.1090776322395144, "grad_norm": 3.234375, "learning_rate": 1.8369680287952574e-05, "loss": 1.2495, "step": 4780 }, { "epoch": 0.10930582812286066, "grad_norm": 2.96875, "learning_rate": 1.8364975180558496e-05, "loss": 1.3189, "step": 4790 }, { "epoch": 0.10953402400620693, "grad_norm": 2.8125, "learning_rate": 1.836027007316442e-05, "loss": 1.2462, "step": 4800 }, { "epoch": 0.10976221988955319, "grad_norm": 2.9375, "learning_rate": 1.8355564965770343e-05, "loss": 1.2437, "step": 4810 }, { "epoch": 0.10999041577289946, "grad_norm": 3.109375, "learning_rate": 1.835085985837627e-05, "loss": 1.2827, "step": 4820 }, { "epoch": 0.11021861165624572, "grad_norm": 2.765625, "learning_rate": 1.834615475098219e-05, "loss": 1.2788, "step": 4830 }, { "epoch": 0.11044680753959199, "grad_norm": 2.765625, "learning_rate": 1.8341449643588116e-05, "loss": 1.3199, "step": 4840 }, { "epoch": 0.11067500342293825, "grad_norm": 2.890625, "learning_rate": 1.833674453619404e-05, "loss": 1.247, "step": 4850 }, { "epoch": 0.11090319930628452, "grad_norm": 2.875, "learning_rate": 1.8332039428799964e-05, "loss": 1.3147, "step": 4860 }, { "epoch": 0.11113139518963078, "grad_norm": 2.96875, "learning_rate": 1.832733432140589e-05, "loss": 1.2362, "step": 4870 }, { "epoch": 0.11135959107297705, "grad_norm": 3.21875, "learning_rate": 1.832262921401181e-05, "loss": 1.2855, "step": 4880 }, { "epoch": 0.1115877869563233, "grad_norm": 2.734375, "learning_rate": 1.8317924106617736e-05, "loss": 1.2943, "step": 4890 }, { "epoch": 0.11181598283966958, "grad_norm": 3.140625, "learning_rate": 1.831321899922366e-05, "loss": 1.2613, "step": 4900 }, { "epoch": 0.11204417872301584, "grad_norm": 2.703125, "learning_rate": 1.8308513891829584e-05, "loss": 1.2518, "step": 4910 }, { "epoch": 0.11227237460636211, "grad_norm": 2.953125, "learning_rate": 1.8303808784435506e-05, "loss": 1.236, "step": 4920 }, { "epoch": 0.11250057048970837, "grad_norm": 2.953125, "learning_rate": 1.829910367704143e-05, "loss": 1.246, "step": 4930 }, { "epoch": 0.11272876637305464, "grad_norm": 2.984375, "learning_rate": 1.8294398569647353e-05, "loss": 1.3012, "step": 4940 }, { "epoch": 0.1129569622564009, "grad_norm": 2.84375, "learning_rate": 1.828969346225328e-05, "loss": 1.2405, "step": 4950 }, { "epoch": 0.11318515813974715, "grad_norm": 2.828125, "learning_rate": 1.82849883548592e-05, "loss": 1.3221, "step": 4960 }, { "epoch": 0.11341335402309342, "grad_norm": 2.9375, "learning_rate": 1.8280283247465126e-05, "loss": 1.2593, "step": 4970 }, { "epoch": 0.11364154990643968, "grad_norm": 3.34375, "learning_rate": 1.8275578140071048e-05, "loss": 1.2521, "step": 4980 }, { "epoch": 0.11386974578978595, "grad_norm": 3.125, "learning_rate": 1.8270873032676974e-05, "loss": 1.2798, "step": 4990 }, { "epoch": 0.11409794167313221, "grad_norm": 2.890625, "learning_rate": 1.8266167925282896e-05, "loss": 1.2496, "step": 5000 }, { "epoch": 0.11432613755647848, "grad_norm": 2.84375, "learning_rate": 1.826146281788882e-05, "loss": 1.2854, "step": 5010 }, { "epoch": 0.11455433343982474, "grad_norm": 2.890625, "learning_rate": 1.8256757710494743e-05, "loss": 1.2502, "step": 5020 }, { "epoch": 0.11478252932317101, "grad_norm": 2.796875, "learning_rate": 1.825205260310067e-05, "loss": 1.3004, "step": 5030 }, { "epoch": 0.11501072520651727, "grad_norm": 2.96875, "learning_rate": 1.824734749570659e-05, "loss": 1.2751, "step": 5040 }, { "epoch": 0.11523892108986354, "grad_norm": 2.75, "learning_rate": 1.8242642388312516e-05, "loss": 1.2622, "step": 5050 }, { "epoch": 0.1154671169732098, "grad_norm": 3.296875, "learning_rate": 1.8237937280918438e-05, "loss": 1.2805, "step": 5060 }, { "epoch": 0.11569531285655607, "grad_norm": 3.046875, "learning_rate": 1.8233232173524363e-05, "loss": 1.3034, "step": 5070 }, { "epoch": 0.11592350873990233, "grad_norm": 3.0625, "learning_rate": 1.8228527066130285e-05, "loss": 1.2822, "step": 5080 }, { "epoch": 0.1161517046232486, "grad_norm": 2.796875, "learning_rate": 1.822382195873621e-05, "loss": 1.2507, "step": 5090 }, { "epoch": 0.11637990050659486, "grad_norm": 2.921875, "learning_rate": 1.8219116851342133e-05, "loss": 1.2798, "step": 5100 }, { "epoch": 0.11660809638994113, "grad_norm": 3.046875, "learning_rate": 1.8214411743948058e-05, "loss": 1.2438, "step": 5110 }, { "epoch": 0.11683629227328739, "grad_norm": 2.84375, "learning_rate": 1.820970663655398e-05, "loss": 1.2499, "step": 5120 }, { "epoch": 0.11706448815663366, "grad_norm": 2.984375, "learning_rate": 1.8205001529159906e-05, "loss": 1.2277, "step": 5130 }, { "epoch": 0.11729268403997992, "grad_norm": 2.953125, "learning_rate": 1.8200296421765828e-05, "loss": 1.2105, "step": 5140 }, { "epoch": 0.11752087992332619, "grad_norm": 3.0625, "learning_rate": 1.8195591314371753e-05, "loss": 1.2571, "step": 5150 }, { "epoch": 0.11774907580667245, "grad_norm": 3.171875, "learning_rate": 1.8190886206977675e-05, "loss": 1.281, "step": 5160 }, { "epoch": 0.11797727169001872, "grad_norm": 3.078125, "learning_rate": 1.81861810995836e-05, "loss": 1.2809, "step": 5170 }, { "epoch": 0.11820546757336498, "grad_norm": 2.84375, "learning_rate": 1.8181475992189522e-05, "loss": 1.2533, "step": 5180 }, { "epoch": 0.11843366345671125, "grad_norm": 2.921875, "learning_rate": 1.8176770884795448e-05, "loss": 1.2672, "step": 5190 }, { "epoch": 0.1186618593400575, "grad_norm": 3.203125, "learning_rate": 1.817206577740137e-05, "loss": 1.301, "step": 5200 }, { "epoch": 0.11889005522340378, "grad_norm": 2.921875, "learning_rate": 1.8167360670007295e-05, "loss": 1.2164, "step": 5210 }, { "epoch": 0.11911825110675003, "grad_norm": 3.125, "learning_rate": 1.8162655562613217e-05, "loss": 1.2581, "step": 5220 }, { "epoch": 0.1193464469900963, "grad_norm": 3.125, "learning_rate": 1.8157950455219143e-05, "loss": 1.2218, "step": 5230 }, { "epoch": 0.11957464287344256, "grad_norm": 3.03125, "learning_rate": 1.8153245347825065e-05, "loss": 1.2251, "step": 5240 }, { "epoch": 0.11980283875678882, "grad_norm": 2.890625, "learning_rate": 1.814854024043099e-05, "loss": 1.2544, "step": 5250 }, { "epoch": 0.12003103464013509, "grad_norm": 3.15625, "learning_rate": 1.8143835133036912e-05, "loss": 1.3162, "step": 5260 }, { "epoch": 0.12025923052348135, "grad_norm": 2.921875, "learning_rate": 1.8139130025642838e-05, "loss": 1.1967, "step": 5270 }, { "epoch": 0.12048742640682762, "grad_norm": 2.890625, "learning_rate": 1.813442491824876e-05, "loss": 1.2682, "step": 5280 }, { "epoch": 0.12071562229017388, "grad_norm": 3.359375, "learning_rate": 1.8129719810854685e-05, "loss": 1.2578, "step": 5290 }, { "epoch": 0.12094381817352015, "grad_norm": 3.03125, "learning_rate": 1.8125014703460607e-05, "loss": 1.2763, "step": 5300 }, { "epoch": 0.12117201405686641, "grad_norm": 2.734375, "learning_rate": 1.8120309596066532e-05, "loss": 1.2392, "step": 5310 }, { "epoch": 0.12140020994021268, "grad_norm": 3.1875, "learning_rate": 1.8115604488672454e-05, "loss": 1.2178, "step": 5320 }, { "epoch": 0.12162840582355894, "grad_norm": 3.3125, "learning_rate": 1.811089938127838e-05, "loss": 1.2655, "step": 5330 }, { "epoch": 0.12185660170690521, "grad_norm": 3.078125, "learning_rate": 1.8106194273884302e-05, "loss": 1.2879, "step": 5340 }, { "epoch": 0.12208479759025147, "grad_norm": 2.859375, "learning_rate": 1.8101489166490227e-05, "loss": 1.2712, "step": 5350 }, { "epoch": 0.12231299347359774, "grad_norm": 2.765625, "learning_rate": 1.809678405909615e-05, "loss": 1.2287, "step": 5360 }, { "epoch": 0.122541189356944, "grad_norm": 3.015625, "learning_rate": 1.8092078951702075e-05, "loss": 1.2933, "step": 5370 }, { "epoch": 0.12276938524029027, "grad_norm": 3.125, "learning_rate": 1.8087373844307997e-05, "loss": 1.2734, "step": 5380 }, { "epoch": 0.12299758112363653, "grad_norm": 3.140625, "learning_rate": 1.8082668736913922e-05, "loss": 1.265, "step": 5390 }, { "epoch": 0.1232257770069828, "grad_norm": 2.671875, "learning_rate": 1.8077963629519844e-05, "loss": 1.2476, "step": 5400 }, { "epoch": 0.12345397289032906, "grad_norm": 3.140625, "learning_rate": 1.807325852212577e-05, "loss": 1.2881, "step": 5410 }, { "epoch": 0.12368216877367533, "grad_norm": 2.828125, "learning_rate": 1.8068553414731695e-05, "loss": 1.2796, "step": 5420 }, { "epoch": 0.12391036465702158, "grad_norm": 2.8125, "learning_rate": 1.8063848307337617e-05, "loss": 1.2454, "step": 5430 }, { "epoch": 0.12413856054036786, "grad_norm": 2.84375, "learning_rate": 1.8059143199943542e-05, "loss": 1.2776, "step": 5440 }, { "epoch": 0.12436675642371411, "grad_norm": 2.8125, "learning_rate": 1.8054438092549464e-05, "loss": 1.2627, "step": 5450 }, { "epoch": 0.12459495230706039, "grad_norm": 2.96875, "learning_rate": 1.804973298515539e-05, "loss": 1.2423, "step": 5460 }, { "epoch": 0.12482314819040664, "grad_norm": 2.890625, "learning_rate": 1.8045027877761312e-05, "loss": 1.2439, "step": 5470 }, { "epoch": 0.1250513440737529, "grad_norm": 2.96875, "learning_rate": 1.8040322770367237e-05, "loss": 1.217, "step": 5480 }, { "epoch": 0.12527953995709917, "grad_norm": 2.796875, "learning_rate": 1.803561766297316e-05, "loss": 1.2653, "step": 5490 }, { "epoch": 0.12550773584044544, "grad_norm": 3.015625, "learning_rate": 1.8030912555579085e-05, "loss": 1.2117, "step": 5500 }, { "epoch": 0.12573593172379172, "grad_norm": 2.9375, "learning_rate": 1.8026207448185007e-05, "loss": 1.2218, "step": 5510 }, { "epoch": 0.12596412760713796, "grad_norm": 3.171875, "learning_rate": 1.8021502340790932e-05, "loss": 1.2856, "step": 5520 }, { "epoch": 0.12619232349048423, "grad_norm": 3.140625, "learning_rate": 1.8016797233396854e-05, "loss": 1.2522, "step": 5530 }, { "epoch": 0.1264205193738305, "grad_norm": 2.984375, "learning_rate": 1.801209212600278e-05, "loss": 1.2559, "step": 5540 }, { "epoch": 0.12664871525717675, "grad_norm": 2.796875, "learning_rate": 1.80073870186087e-05, "loss": 1.2996, "step": 5550 }, { "epoch": 0.12687691114052302, "grad_norm": 3.015625, "learning_rate": 1.8002681911214627e-05, "loss": 1.2467, "step": 5560 }, { "epoch": 0.1271051070238693, "grad_norm": 2.828125, "learning_rate": 1.799797680382055e-05, "loss": 1.2308, "step": 5570 }, { "epoch": 0.12733330290721556, "grad_norm": 3.234375, "learning_rate": 1.7993271696426474e-05, "loss": 1.2991, "step": 5580 }, { "epoch": 0.1275614987905618, "grad_norm": 3.21875, "learning_rate": 1.7988566589032396e-05, "loss": 1.2194, "step": 5590 }, { "epoch": 0.12778969467390808, "grad_norm": 2.890625, "learning_rate": 1.7983861481638322e-05, "loss": 1.2444, "step": 5600 }, { "epoch": 0.12801789055725435, "grad_norm": 2.96875, "learning_rate": 1.7979156374244244e-05, "loss": 1.2831, "step": 5610 }, { "epoch": 0.12824608644060062, "grad_norm": 2.828125, "learning_rate": 1.797445126685017e-05, "loss": 1.2437, "step": 5620 }, { "epoch": 0.12847428232394686, "grad_norm": 3.03125, "learning_rate": 1.796974615945609e-05, "loss": 1.239, "step": 5630 }, { "epoch": 0.12870247820729314, "grad_norm": 2.921875, "learning_rate": 1.7965041052062017e-05, "loss": 1.2702, "step": 5640 }, { "epoch": 0.1289306740906394, "grad_norm": 2.921875, "learning_rate": 1.796033594466794e-05, "loss": 1.2606, "step": 5650 }, { "epoch": 0.12915886997398568, "grad_norm": 3.015625, "learning_rate": 1.7955630837273864e-05, "loss": 1.3044, "step": 5660 }, { "epoch": 0.12938706585733192, "grad_norm": 2.9375, "learning_rate": 1.7950925729879786e-05, "loss": 1.2438, "step": 5670 }, { "epoch": 0.1296152617406782, "grad_norm": 2.84375, "learning_rate": 1.794622062248571e-05, "loss": 1.1546, "step": 5680 }, { "epoch": 0.12984345762402447, "grad_norm": 2.78125, "learning_rate": 1.7941515515091633e-05, "loss": 1.286, "step": 5690 }, { "epoch": 0.13007165350737074, "grad_norm": 3.5, "learning_rate": 1.793681040769756e-05, "loss": 1.219, "step": 5700 }, { "epoch": 0.13029984939071698, "grad_norm": 3.140625, "learning_rate": 1.793210530030348e-05, "loss": 1.2426, "step": 5710 }, { "epoch": 0.13052804527406325, "grad_norm": 3.296875, "learning_rate": 1.7927400192909406e-05, "loss": 1.2158, "step": 5720 }, { "epoch": 0.13075624115740953, "grad_norm": 2.75, "learning_rate": 1.7922695085515328e-05, "loss": 1.277, "step": 5730 }, { "epoch": 0.1309844370407558, "grad_norm": 2.734375, "learning_rate": 1.7917989978121254e-05, "loss": 1.2435, "step": 5740 }, { "epoch": 0.13121263292410204, "grad_norm": 2.921875, "learning_rate": 1.7913284870727176e-05, "loss": 1.2143, "step": 5750 }, { "epoch": 0.1314408288074483, "grad_norm": 2.890625, "learning_rate": 1.79085797633331e-05, "loss": 1.258, "step": 5760 }, { "epoch": 0.13166902469079458, "grad_norm": 2.640625, "learning_rate": 1.7903874655939023e-05, "loss": 1.2309, "step": 5770 }, { "epoch": 0.13189722057414086, "grad_norm": 2.9375, "learning_rate": 1.789916954854495e-05, "loss": 1.23, "step": 5780 }, { "epoch": 0.1321254164574871, "grad_norm": 3.140625, "learning_rate": 1.789446444115087e-05, "loss": 1.2269, "step": 5790 }, { "epoch": 0.13235361234083337, "grad_norm": 2.671875, "learning_rate": 1.7889759333756796e-05, "loss": 1.2321, "step": 5800 }, { "epoch": 0.13258180822417964, "grad_norm": 3.109375, "learning_rate": 1.7885054226362718e-05, "loss": 1.2601, "step": 5810 }, { "epoch": 0.1328100041075259, "grad_norm": 2.75, "learning_rate": 1.7880349118968643e-05, "loss": 1.2247, "step": 5820 }, { "epoch": 0.13303819999087216, "grad_norm": 3.03125, "learning_rate": 1.7875644011574565e-05, "loss": 1.2723, "step": 5830 }, { "epoch": 0.13326639587421843, "grad_norm": 3.921875, "learning_rate": 1.787093890418049e-05, "loss": 1.2394, "step": 5840 }, { "epoch": 0.1334945917575647, "grad_norm": 2.9375, "learning_rate": 1.7866233796786413e-05, "loss": 1.2017, "step": 5850 }, { "epoch": 0.13372278764091095, "grad_norm": 3.0625, "learning_rate": 1.7861528689392338e-05, "loss": 1.2101, "step": 5860 }, { "epoch": 0.13395098352425722, "grad_norm": 2.78125, "learning_rate": 1.785682358199826e-05, "loss": 1.2719, "step": 5870 }, { "epoch": 0.1341791794076035, "grad_norm": 2.828125, "learning_rate": 1.7852118474604186e-05, "loss": 1.2616, "step": 5880 }, { "epoch": 0.13440737529094976, "grad_norm": 3.09375, "learning_rate": 1.7847413367210108e-05, "loss": 1.2283, "step": 5890 }, { "epoch": 0.134635571174296, "grad_norm": 3.140625, "learning_rate": 1.7842708259816033e-05, "loss": 1.2967, "step": 5900 }, { "epoch": 0.13486376705764228, "grad_norm": 2.96875, "learning_rate": 1.7838003152421955e-05, "loss": 1.2652, "step": 5910 }, { "epoch": 0.13509196294098855, "grad_norm": 3.0, "learning_rate": 1.783329804502788e-05, "loss": 1.2421, "step": 5920 }, { "epoch": 0.13532015882433482, "grad_norm": 2.984375, "learning_rate": 1.7828592937633803e-05, "loss": 1.2841, "step": 5930 }, { "epoch": 0.13554835470768106, "grad_norm": 3.140625, "learning_rate": 1.7823887830239725e-05, "loss": 1.2951, "step": 5940 }, { "epoch": 0.13577655059102733, "grad_norm": 3.1875, "learning_rate": 1.781918272284565e-05, "loss": 1.2944, "step": 5950 }, { "epoch": 0.1360047464743736, "grad_norm": 2.90625, "learning_rate": 1.7814477615451572e-05, "loss": 1.2784, "step": 5960 }, { "epoch": 0.13623294235771988, "grad_norm": 3.03125, "learning_rate": 1.7809772508057497e-05, "loss": 1.2043, "step": 5970 }, { "epoch": 0.13646113824106612, "grad_norm": 3.078125, "learning_rate": 1.780506740066342e-05, "loss": 1.2594, "step": 5980 }, { "epoch": 0.1366893341244124, "grad_norm": 2.921875, "learning_rate": 1.7800362293269345e-05, "loss": 1.3073, "step": 5990 }, { "epoch": 0.13691753000775866, "grad_norm": 2.8125, "learning_rate": 1.7795657185875267e-05, "loss": 1.2917, "step": 6000 }, { "epoch": 0.13714572589110494, "grad_norm": 2.953125, "learning_rate": 1.7790952078481192e-05, "loss": 1.2648, "step": 6010 }, { "epoch": 0.13737392177445118, "grad_norm": 3.03125, "learning_rate": 1.7786246971087114e-05, "loss": 1.2818, "step": 6020 }, { "epoch": 0.13760211765779745, "grad_norm": 3.078125, "learning_rate": 1.778154186369304e-05, "loss": 1.2916, "step": 6030 }, { "epoch": 0.13783031354114372, "grad_norm": 3.484375, "learning_rate": 1.777683675629896e-05, "loss": 1.2765, "step": 6040 }, { "epoch": 0.13805850942449, "grad_norm": 3.03125, "learning_rate": 1.7772131648904887e-05, "loss": 1.1827, "step": 6050 }, { "epoch": 0.13828670530783624, "grad_norm": 2.8125, "learning_rate": 1.776742654151081e-05, "loss": 1.2283, "step": 6060 }, { "epoch": 0.1385149011911825, "grad_norm": 2.828125, "learning_rate": 1.7762721434116735e-05, "loss": 1.2519, "step": 6070 }, { "epoch": 0.13874309707452878, "grad_norm": 3.328125, "learning_rate": 1.7758016326722657e-05, "loss": 1.246, "step": 6080 }, { "epoch": 0.13897129295787505, "grad_norm": 2.78125, "learning_rate": 1.7753311219328582e-05, "loss": 1.237, "step": 6090 }, { "epoch": 0.1391994888412213, "grad_norm": 2.96875, "learning_rate": 1.7748606111934504e-05, "loss": 1.2032, "step": 6100 }, { "epoch": 0.13942768472456757, "grad_norm": 2.828125, "learning_rate": 1.774390100454043e-05, "loss": 1.2937, "step": 6110 }, { "epoch": 0.13965588060791384, "grad_norm": 3.078125, "learning_rate": 1.773919589714635e-05, "loss": 1.279, "step": 6120 }, { "epoch": 0.13988407649126008, "grad_norm": 3.140625, "learning_rate": 1.7734490789752277e-05, "loss": 1.2794, "step": 6130 }, { "epoch": 0.14011227237460636, "grad_norm": 3.078125, "learning_rate": 1.77297856823582e-05, "loss": 1.2253, "step": 6140 }, { "epoch": 0.14034046825795263, "grad_norm": 2.9375, "learning_rate": 1.7725080574964124e-05, "loss": 1.252, "step": 6150 }, { "epoch": 0.1405686641412989, "grad_norm": 3.046875, "learning_rate": 1.7720375467570046e-05, "loss": 1.2493, "step": 6160 }, { "epoch": 0.14079686002464514, "grad_norm": 2.921875, "learning_rate": 1.771567036017597e-05, "loss": 1.2164, "step": 6170 }, { "epoch": 0.14102505590799141, "grad_norm": 2.953125, "learning_rate": 1.7710965252781897e-05, "loss": 1.2976, "step": 6180 }, { "epoch": 0.1412532517913377, "grad_norm": 3.03125, "learning_rate": 1.770626014538782e-05, "loss": 1.2307, "step": 6190 }, { "epoch": 0.14148144767468396, "grad_norm": 3.09375, "learning_rate": 1.7701555037993744e-05, "loss": 1.2403, "step": 6200 }, { "epoch": 0.1417096435580302, "grad_norm": 2.875, "learning_rate": 1.7696849930599666e-05, "loss": 1.2503, "step": 6210 }, { "epoch": 0.14193783944137647, "grad_norm": 3.046875, "learning_rate": 1.7692144823205592e-05, "loss": 1.2889, "step": 6220 }, { "epoch": 0.14216603532472274, "grad_norm": 3.25, "learning_rate": 1.7687439715811514e-05, "loss": 1.2622, "step": 6230 }, { "epoch": 0.14239423120806902, "grad_norm": 3.046875, "learning_rate": 1.768273460841744e-05, "loss": 1.3461, "step": 6240 }, { "epoch": 0.14262242709141526, "grad_norm": 3.21875, "learning_rate": 1.767802950102336e-05, "loss": 1.2733, "step": 6250 }, { "epoch": 0.14285062297476153, "grad_norm": 2.8125, "learning_rate": 1.7673324393629287e-05, "loss": 1.2585, "step": 6260 }, { "epoch": 0.1430788188581078, "grad_norm": 2.859375, "learning_rate": 1.766861928623521e-05, "loss": 1.2415, "step": 6270 }, { "epoch": 0.14330701474145408, "grad_norm": 2.921875, "learning_rate": 1.7663914178841134e-05, "loss": 1.2524, "step": 6280 }, { "epoch": 0.14353521062480032, "grad_norm": 2.921875, "learning_rate": 1.7659209071447056e-05, "loss": 1.2286, "step": 6290 }, { "epoch": 0.1437634065081466, "grad_norm": 3.0, "learning_rate": 1.765450396405298e-05, "loss": 1.2617, "step": 6300 }, { "epoch": 0.14399160239149286, "grad_norm": 2.828125, "learning_rate": 1.7649798856658904e-05, "loss": 1.2547, "step": 6310 }, { "epoch": 0.14421979827483913, "grad_norm": 2.65625, "learning_rate": 1.764509374926483e-05, "loss": 1.2538, "step": 6320 }, { "epoch": 0.14444799415818538, "grad_norm": 3.0625, "learning_rate": 1.764038864187075e-05, "loss": 1.2811, "step": 6330 }, { "epoch": 0.14467619004153165, "grad_norm": 3.078125, "learning_rate": 1.7635683534476676e-05, "loss": 1.2877, "step": 6340 }, { "epoch": 0.14490438592487792, "grad_norm": 2.703125, "learning_rate": 1.76309784270826e-05, "loss": 1.2913, "step": 6350 }, { "epoch": 0.1451325818082242, "grad_norm": 2.90625, "learning_rate": 1.7626273319688524e-05, "loss": 1.2183, "step": 6360 }, { "epoch": 0.14536077769157044, "grad_norm": 3.0, "learning_rate": 1.7621568212294446e-05, "loss": 1.18, "step": 6370 }, { "epoch": 0.1455889735749167, "grad_norm": 2.796875, "learning_rate": 1.761686310490037e-05, "loss": 1.2378, "step": 6380 }, { "epoch": 0.14581716945826298, "grad_norm": 2.90625, "learning_rate": 1.7612157997506293e-05, "loss": 1.2825, "step": 6390 }, { "epoch": 0.14604536534160925, "grad_norm": 3.015625, "learning_rate": 1.760745289011222e-05, "loss": 1.2144, "step": 6400 }, { "epoch": 0.1462735612249555, "grad_norm": 2.984375, "learning_rate": 1.760274778271814e-05, "loss": 1.2253, "step": 6410 }, { "epoch": 0.14650175710830177, "grad_norm": 3.171875, "learning_rate": 1.7598042675324066e-05, "loss": 1.2837, "step": 6420 }, { "epoch": 0.14672995299164804, "grad_norm": 3.0, "learning_rate": 1.7593337567929988e-05, "loss": 1.2847, "step": 6430 }, { "epoch": 0.14695814887499428, "grad_norm": 3.109375, "learning_rate": 1.7588632460535914e-05, "loss": 1.195, "step": 6440 }, { "epoch": 0.14718634475834055, "grad_norm": 3.09375, "learning_rate": 1.7583927353141836e-05, "loss": 1.2393, "step": 6450 }, { "epoch": 0.14741454064168683, "grad_norm": 2.828125, "learning_rate": 1.757922224574776e-05, "loss": 1.2869, "step": 6460 }, { "epoch": 0.1476427365250331, "grad_norm": 2.921875, "learning_rate": 1.7574517138353683e-05, "loss": 1.2533, "step": 6470 }, { "epoch": 0.14787093240837934, "grad_norm": 2.890625, "learning_rate": 1.756981203095961e-05, "loss": 1.1952, "step": 6480 }, { "epoch": 0.1480991282917256, "grad_norm": 2.90625, "learning_rate": 1.756510692356553e-05, "loss": 1.2557, "step": 6490 }, { "epoch": 0.14832732417507188, "grad_norm": 3.15625, "learning_rate": 1.7560401816171456e-05, "loss": 1.1968, "step": 6500 }, { "epoch": 0.14855552005841816, "grad_norm": 3.109375, "learning_rate": 1.7555696708777378e-05, "loss": 1.2667, "step": 6510 }, { "epoch": 0.1487837159417644, "grad_norm": 2.96875, "learning_rate": 1.7550991601383303e-05, "loss": 1.2411, "step": 6520 }, { "epoch": 0.14901191182511067, "grad_norm": 2.8125, "learning_rate": 1.7546286493989225e-05, "loss": 1.2893, "step": 6530 }, { "epoch": 0.14924010770845694, "grad_norm": 2.96875, "learning_rate": 1.754158138659515e-05, "loss": 1.2387, "step": 6540 }, { "epoch": 0.14946830359180321, "grad_norm": 2.96875, "learning_rate": 1.7536876279201073e-05, "loss": 1.1935, "step": 6550 }, { "epoch": 0.14969649947514946, "grad_norm": 3.1875, "learning_rate": 1.7532171171806998e-05, "loss": 1.2669, "step": 6560 }, { "epoch": 0.14992469535849573, "grad_norm": 2.90625, "learning_rate": 1.752746606441292e-05, "loss": 1.2812, "step": 6570 }, { "epoch": 0.150152891241842, "grad_norm": 2.859375, "learning_rate": 1.7522760957018846e-05, "loss": 1.206, "step": 6580 }, { "epoch": 0.15038108712518827, "grad_norm": 3.0625, "learning_rate": 1.7518055849624768e-05, "loss": 1.2076, "step": 6590 }, { "epoch": 0.15060928300853452, "grad_norm": 3.1875, "learning_rate": 1.7513350742230693e-05, "loss": 1.1793, "step": 6600 }, { "epoch": 0.1508374788918808, "grad_norm": 3.171875, "learning_rate": 1.7508645634836615e-05, "loss": 1.2306, "step": 6610 }, { "epoch": 0.15106567477522706, "grad_norm": 3.265625, "learning_rate": 1.750394052744254e-05, "loss": 1.2069, "step": 6620 }, { "epoch": 0.15129387065857333, "grad_norm": 2.96875, "learning_rate": 1.7499235420048462e-05, "loss": 1.2143, "step": 6630 }, { "epoch": 0.15152206654191958, "grad_norm": 3.09375, "learning_rate": 1.7494530312654388e-05, "loss": 1.3262, "step": 6640 }, { "epoch": 0.15175026242526585, "grad_norm": 2.734375, "learning_rate": 1.748982520526031e-05, "loss": 1.2349, "step": 6650 }, { "epoch": 0.15197845830861212, "grad_norm": 3.046875, "learning_rate": 1.7485120097866235e-05, "loss": 1.247, "step": 6660 }, { "epoch": 0.1522066541919584, "grad_norm": 2.953125, "learning_rate": 1.7480414990472157e-05, "loss": 1.2259, "step": 6670 }, { "epoch": 0.15243485007530463, "grad_norm": 2.984375, "learning_rate": 1.7475709883078083e-05, "loss": 1.2278, "step": 6680 }, { "epoch": 0.1526630459586509, "grad_norm": 3.078125, "learning_rate": 1.7471004775684005e-05, "loss": 1.2491, "step": 6690 }, { "epoch": 0.15289124184199718, "grad_norm": 2.703125, "learning_rate": 1.746629966828993e-05, "loss": 1.2678, "step": 6700 }, { "epoch": 0.15311943772534342, "grad_norm": 3.265625, "learning_rate": 1.7461594560895852e-05, "loss": 1.2059, "step": 6710 }, { "epoch": 0.1533476336086897, "grad_norm": 3.0625, "learning_rate": 1.7456889453501778e-05, "loss": 1.2429, "step": 6720 }, { "epoch": 0.15357582949203596, "grad_norm": 2.890625, "learning_rate": 1.74521843461077e-05, "loss": 1.3174, "step": 6730 }, { "epoch": 0.15380402537538224, "grad_norm": 2.84375, "learning_rate": 1.7447479238713625e-05, "loss": 1.1911, "step": 6740 }, { "epoch": 0.15403222125872848, "grad_norm": 2.875, "learning_rate": 1.744277413131955e-05, "loss": 1.2386, "step": 6750 }, { "epoch": 0.15426041714207475, "grad_norm": 2.984375, "learning_rate": 1.7438069023925472e-05, "loss": 1.2324, "step": 6760 }, { "epoch": 0.15448861302542102, "grad_norm": 2.921875, "learning_rate": 1.7433363916531398e-05, "loss": 1.2242, "step": 6770 }, { "epoch": 0.1547168089087673, "grad_norm": 2.78125, "learning_rate": 1.742865880913732e-05, "loss": 1.2817, "step": 6780 }, { "epoch": 0.15494500479211354, "grad_norm": 2.703125, "learning_rate": 1.7423953701743245e-05, "loss": 1.1993, "step": 6790 }, { "epoch": 0.1551732006754598, "grad_norm": 3.046875, "learning_rate": 1.7419248594349167e-05, "loss": 1.2244, "step": 6800 }, { "epoch": 0.15540139655880608, "grad_norm": 2.8125, "learning_rate": 1.7414543486955093e-05, "loss": 1.2154, "step": 6810 }, { "epoch": 0.15562959244215235, "grad_norm": 3.078125, "learning_rate": 1.7409838379561015e-05, "loss": 1.2563, "step": 6820 }, { "epoch": 0.1558577883254986, "grad_norm": 3.0625, "learning_rate": 1.740513327216694e-05, "loss": 1.2159, "step": 6830 }, { "epoch": 0.15608598420884487, "grad_norm": 2.90625, "learning_rate": 1.7400428164772862e-05, "loss": 1.262, "step": 6840 }, { "epoch": 0.15631418009219114, "grad_norm": 3.15625, "learning_rate": 1.7395723057378787e-05, "loss": 1.2336, "step": 6850 }, { "epoch": 0.1565423759755374, "grad_norm": 3.21875, "learning_rate": 1.739101794998471e-05, "loss": 1.2873, "step": 6860 }, { "epoch": 0.15677057185888366, "grad_norm": 2.796875, "learning_rate": 1.7386312842590635e-05, "loss": 1.2407, "step": 6870 }, { "epoch": 0.15699876774222993, "grad_norm": 3.171875, "learning_rate": 1.7381607735196557e-05, "loss": 1.2528, "step": 6880 }, { "epoch": 0.1572269636255762, "grad_norm": 3.0, "learning_rate": 1.7376902627802482e-05, "loss": 1.2919, "step": 6890 }, { "epoch": 0.15745515950892247, "grad_norm": 3.5, "learning_rate": 1.7372197520408404e-05, "loss": 1.2635, "step": 6900 }, { "epoch": 0.15768335539226871, "grad_norm": 3.0625, "learning_rate": 1.736749241301433e-05, "loss": 1.2135, "step": 6910 }, { "epoch": 0.157911551275615, "grad_norm": 3.0625, "learning_rate": 1.7362787305620252e-05, "loss": 1.2121, "step": 6920 }, { "epoch": 0.15813974715896126, "grad_norm": 3.171875, "learning_rate": 1.7358082198226177e-05, "loss": 1.2251, "step": 6930 }, { "epoch": 0.15836794304230753, "grad_norm": 2.859375, "learning_rate": 1.73533770908321e-05, "loss": 1.2839, "step": 6940 }, { "epoch": 0.15859613892565377, "grad_norm": 2.90625, "learning_rate": 1.7348671983438025e-05, "loss": 1.2039, "step": 6950 }, { "epoch": 0.15882433480900005, "grad_norm": 2.90625, "learning_rate": 1.7343966876043947e-05, "loss": 1.2413, "step": 6960 }, { "epoch": 0.15905253069234632, "grad_norm": 3.078125, "learning_rate": 1.7339261768649872e-05, "loss": 1.2439, "step": 6970 }, { "epoch": 0.1592807265756926, "grad_norm": 2.8125, "learning_rate": 1.7334556661255794e-05, "loss": 1.232, "step": 6980 }, { "epoch": 0.15950892245903883, "grad_norm": 3.171875, "learning_rate": 1.732985155386172e-05, "loss": 1.2376, "step": 6990 }, { "epoch": 0.1597371183423851, "grad_norm": 2.9375, "learning_rate": 1.732514644646764e-05, "loss": 1.1671, "step": 7000 }, { "epoch": 0.15996531422573138, "grad_norm": 3.203125, "learning_rate": 1.7320441339073567e-05, "loss": 1.298, "step": 7010 }, { "epoch": 0.16019351010907762, "grad_norm": 2.8125, "learning_rate": 1.731573623167949e-05, "loss": 1.2112, "step": 7020 }, { "epoch": 0.1604217059924239, "grad_norm": 3.078125, "learning_rate": 1.7311031124285414e-05, "loss": 1.2599, "step": 7030 }, { "epoch": 0.16064990187577016, "grad_norm": 2.90625, "learning_rate": 1.7306326016891336e-05, "loss": 1.274, "step": 7040 }, { "epoch": 0.16087809775911643, "grad_norm": 3.046875, "learning_rate": 1.7301620909497262e-05, "loss": 1.2567, "step": 7050 }, { "epoch": 0.16110629364246268, "grad_norm": 2.953125, "learning_rate": 1.7296915802103184e-05, "loss": 1.2401, "step": 7060 }, { "epoch": 0.16133448952580895, "grad_norm": 2.84375, "learning_rate": 1.729221069470911e-05, "loss": 1.2482, "step": 7070 }, { "epoch": 0.16156268540915522, "grad_norm": 3.171875, "learning_rate": 1.728750558731503e-05, "loss": 1.2447, "step": 7080 }, { "epoch": 0.1617908812925015, "grad_norm": 2.875, "learning_rate": 1.7282800479920957e-05, "loss": 1.3049, "step": 7090 }, { "epoch": 0.16201907717584774, "grad_norm": 2.828125, "learning_rate": 1.727809537252688e-05, "loss": 1.2663, "step": 7100 }, { "epoch": 0.162247273059194, "grad_norm": 2.75, "learning_rate": 1.7273390265132804e-05, "loss": 1.2596, "step": 7110 }, { "epoch": 0.16247546894254028, "grad_norm": 2.84375, "learning_rate": 1.7268685157738726e-05, "loss": 1.2227, "step": 7120 }, { "epoch": 0.16270366482588655, "grad_norm": 3.21875, "learning_rate": 1.726398005034465e-05, "loss": 1.2698, "step": 7130 }, { "epoch": 0.1629318607092328, "grad_norm": 2.84375, "learning_rate": 1.7259274942950573e-05, "loss": 1.2448, "step": 7140 }, { "epoch": 0.16316005659257907, "grad_norm": 3.296875, "learning_rate": 1.72545698355565e-05, "loss": 1.2286, "step": 7150 }, { "epoch": 0.16338825247592534, "grad_norm": 3.078125, "learning_rate": 1.724986472816242e-05, "loss": 1.194, "step": 7160 }, { "epoch": 0.1636164483592716, "grad_norm": 3.046875, "learning_rate": 1.7245159620768346e-05, "loss": 1.2211, "step": 7170 }, { "epoch": 0.16384464424261785, "grad_norm": 2.9375, "learning_rate": 1.7240454513374268e-05, "loss": 1.2029, "step": 7180 }, { "epoch": 0.16407284012596413, "grad_norm": 2.96875, "learning_rate": 1.7235749405980194e-05, "loss": 1.2151, "step": 7190 }, { "epoch": 0.1643010360093104, "grad_norm": 3.15625, "learning_rate": 1.7231044298586116e-05, "loss": 1.2413, "step": 7200 }, { "epoch": 0.16452923189265667, "grad_norm": 2.71875, "learning_rate": 1.722633919119204e-05, "loss": 1.213, "step": 7210 }, { "epoch": 0.1647574277760029, "grad_norm": 2.59375, "learning_rate": 1.7221634083797963e-05, "loss": 1.2342, "step": 7220 }, { "epoch": 0.16498562365934918, "grad_norm": 3.0625, "learning_rate": 1.721692897640389e-05, "loss": 1.1909, "step": 7230 }, { "epoch": 0.16521381954269546, "grad_norm": 2.625, "learning_rate": 1.721222386900981e-05, "loss": 1.2339, "step": 7240 }, { "epoch": 0.16544201542604173, "grad_norm": 2.640625, "learning_rate": 1.7207518761615736e-05, "loss": 1.2173, "step": 7250 }, { "epoch": 0.16567021130938797, "grad_norm": 3.328125, "learning_rate": 1.7202813654221658e-05, "loss": 1.2121, "step": 7260 }, { "epoch": 0.16589840719273424, "grad_norm": 3.015625, "learning_rate": 1.7198108546827583e-05, "loss": 1.256, "step": 7270 }, { "epoch": 0.16612660307608051, "grad_norm": 3.3125, "learning_rate": 1.7193403439433505e-05, "loss": 1.2925, "step": 7280 }, { "epoch": 0.16635479895942676, "grad_norm": 2.984375, "learning_rate": 1.718869833203943e-05, "loss": 1.2107, "step": 7290 }, { "epoch": 0.16658299484277303, "grad_norm": 2.515625, "learning_rate": 1.7183993224645353e-05, "loss": 1.2052, "step": 7300 }, { "epoch": 0.1668111907261193, "grad_norm": 3.125, "learning_rate": 1.7179288117251278e-05, "loss": 1.2695, "step": 7310 }, { "epoch": 0.16703938660946557, "grad_norm": 3.015625, "learning_rate": 1.7174583009857204e-05, "loss": 1.2359, "step": 7320 }, { "epoch": 0.16726758249281182, "grad_norm": 3.046875, "learning_rate": 1.7169877902463126e-05, "loss": 1.2219, "step": 7330 }, { "epoch": 0.1674957783761581, "grad_norm": 2.984375, "learning_rate": 1.716517279506905e-05, "loss": 1.2281, "step": 7340 }, { "epoch": 0.16772397425950436, "grad_norm": 3.359375, "learning_rate": 1.7160467687674973e-05, "loss": 1.289, "step": 7350 }, { "epoch": 0.16795217014285063, "grad_norm": 3.015625, "learning_rate": 1.71557625802809e-05, "loss": 1.2174, "step": 7360 }, { "epoch": 0.16818036602619688, "grad_norm": 3.234375, "learning_rate": 1.715105747288682e-05, "loss": 1.2504, "step": 7370 }, { "epoch": 0.16840856190954315, "grad_norm": 3.15625, "learning_rate": 1.7146352365492746e-05, "loss": 1.2153, "step": 7380 }, { "epoch": 0.16863675779288942, "grad_norm": 2.796875, "learning_rate": 1.7141647258098668e-05, "loss": 1.2925, "step": 7390 }, { "epoch": 0.1688649536762357, "grad_norm": 3.140625, "learning_rate": 1.7136942150704593e-05, "loss": 1.2358, "step": 7400 }, { "epoch": 0.16909314955958193, "grad_norm": 2.90625, "learning_rate": 1.7132237043310515e-05, "loss": 1.275, "step": 7410 }, { "epoch": 0.1693213454429282, "grad_norm": 2.96875, "learning_rate": 1.712753193591644e-05, "loss": 1.1721, "step": 7420 }, { "epoch": 0.16954954132627448, "grad_norm": 2.796875, "learning_rate": 1.7122826828522363e-05, "loss": 1.2111, "step": 7430 }, { "epoch": 0.16977773720962075, "grad_norm": 2.921875, "learning_rate": 1.7118121721128288e-05, "loss": 1.2425, "step": 7440 }, { "epoch": 0.170005933092967, "grad_norm": 2.703125, "learning_rate": 1.711341661373421e-05, "loss": 1.1983, "step": 7450 }, { "epoch": 0.17023412897631326, "grad_norm": 2.890625, "learning_rate": 1.7108711506340136e-05, "loss": 1.2396, "step": 7460 }, { "epoch": 0.17046232485965954, "grad_norm": 2.8125, "learning_rate": 1.7104006398946058e-05, "loss": 1.2773, "step": 7470 }, { "epoch": 0.1706905207430058, "grad_norm": 3.046875, "learning_rate": 1.7099301291551983e-05, "loss": 1.216, "step": 7480 }, { "epoch": 0.17091871662635205, "grad_norm": 2.953125, "learning_rate": 1.7094596184157905e-05, "loss": 1.2754, "step": 7490 }, { "epoch": 0.17114691250969832, "grad_norm": 3.046875, "learning_rate": 1.708989107676383e-05, "loss": 1.2823, "step": 7500 }, { "epoch": 0.1713751083930446, "grad_norm": 2.984375, "learning_rate": 1.7085185969369752e-05, "loss": 1.2593, "step": 7510 }, { "epoch": 0.17160330427639087, "grad_norm": 3.078125, "learning_rate": 1.7080480861975678e-05, "loss": 1.2618, "step": 7520 }, { "epoch": 0.1718315001597371, "grad_norm": 2.921875, "learning_rate": 1.70757757545816e-05, "loss": 1.1969, "step": 7530 }, { "epoch": 0.17205969604308338, "grad_norm": 3.25, "learning_rate": 1.7071070647187525e-05, "loss": 1.2147, "step": 7540 }, { "epoch": 0.17228789192642965, "grad_norm": 3.25, "learning_rate": 1.7066365539793447e-05, "loss": 1.2507, "step": 7550 }, { "epoch": 0.17251608780977593, "grad_norm": 3.03125, "learning_rate": 1.7061660432399373e-05, "loss": 1.2206, "step": 7560 }, { "epoch": 0.17274428369312217, "grad_norm": 2.875, "learning_rate": 1.7056955325005295e-05, "loss": 1.3046, "step": 7570 }, { "epoch": 0.17297247957646844, "grad_norm": 2.875, "learning_rate": 1.705225021761122e-05, "loss": 1.2473, "step": 7580 }, { "epoch": 0.1732006754598147, "grad_norm": 2.96875, "learning_rate": 1.7047545110217142e-05, "loss": 1.2497, "step": 7590 }, { "epoch": 0.17342887134316096, "grad_norm": 2.84375, "learning_rate": 1.7042840002823068e-05, "loss": 1.2577, "step": 7600 }, { "epoch": 0.17365706722650723, "grad_norm": 3.203125, "learning_rate": 1.703813489542899e-05, "loss": 1.2982, "step": 7610 }, { "epoch": 0.1738852631098535, "grad_norm": 2.875, "learning_rate": 1.7033429788034915e-05, "loss": 1.1961, "step": 7620 }, { "epoch": 0.17411345899319977, "grad_norm": 2.921875, "learning_rate": 1.7028724680640837e-05, "loss": 1.2168, "step": 7630 }, { "epoch": 0.17434165487654602, "grad_norm": 3.078125, "learning_rate": 1.7024019573246762e-05, "loss": 1.2051, "step": 7640 }, { "epoch": 0.1745698507598923, "grad_norm": 2.921875, "learning_rate": 1.7019314465852684e-05, "loss": 1.231, "step": 7650 }, { "epoch": 0.17479804664323856, "grad_norm": 3.03125, "learning_rate": 1.701460935845861e-05, "loss": 1.1906, "step": 7660 }, { "epoch": 0.17502624252658483, "grad_norm": 3.015625, "learning_rate": 1.7009904251064532e-05, "loss": 1.2849, "step": 7670 }, { "epoch": 0.17525443840993107, "grad_norm": 3.140625, "learning_rate": 1.7005199143670457e-05, "loss": 1.2748, "step": 7680 }, { "epoch": 0.17548263429327735, "grad_norm": 3.109375, "learning_rate": 1.700049403627638e-05, "loss": 1.2084, "step": 7690 }, { "epoch": 0.17571083017662362, "grad_norm": 2.890625, "learning_rate": 1.6995788928882305e-05, "loss": 1.2322, "step": 7700 }, { "epoch": 0.1759390260599699, "grad_norm": 3.21875, "learning_rate": 1.6991083821488227e-05, "loss": 1.226, "step": 7710 }, { "epoch": 0.17616722194331613, "grad_norm": 2.75, "learning_rate": 1.6986378714094152e-05, "loss": 1.2844, "step": 7720 }, { "epoch": 0.1763954178266624, "grad_norm": 2.953125, "learning_rate": 1.6981673606700074e-05, "loss": 1.2484, "step": 7730 }, { "epoch": 0.17662361371000868, "grad_norm": 3.109375, "learning_rate": 1.6976968499306e-05, "loss": 1.2107, "step": 7740 }, { "epoch": 0.17685180959335495, "grad_norm": 3.0, "learning_rate": 1.697226339191192e-05, "loss": 1.2365, "step": 7750 }, { "epoch": 0.1770800054767012, "grad_norm": 2.953125, "learning_rate": 1.6967558284517847e-05, "loss": 1.3016, "step": 7760 }, { "epoch": 0.17730820136004746, "grad_norm": 2.90625, "learning_rate": 1.696285317712377e-05, "loss": 1.2608, "step": 7770 }, { "epoch": 0.17753639724339373, "grad_norm": 2.796875, "learning_rate": 1.6958148069729694e-05, "loss": 1.231, "step": 7780 }, { "epoch": 0.17776459312674, "grad_norm": 2.90625, "learning_rate": 1.6953442962335616e-05, "loss": 1.2222, "step": 7790 }, { "epoch": 0.17799278901008625, "grad_norm": 3.046875, "learning_rate": 1.694873785494154e-05, "loss": 1.2736, "step": 7800 }, { "epoch": 0.17822098489343252, "grad_norm": 2.890625, "learning_rate": 1.6944032747547464e-05, "loss": 1.2609, "step": 7810 }, { "epoch": 0.1784491807767788, "grad_norm": 3.09375, "learning_rate": 1.6939327640153386e-05, "loss": 1.2165, "step": 7820 }, { "epoch": 0.17867737666012506, "grad_norm": 3.046875, "learning_rate": 1.693462253275931e-05, "loss": 1.2544, "step": 7830 }, { "epoch": 0.1789055725434713, "grad_norm": 3.125, "learning_rate": 1.6929917425365233e-05, "loss": 1.189, "step": 7840 }, { "epoch": 0.17913376842681758, "grad_norm": 3.015625, "learning_rate": 1.692521231797116e-05, "loss": 1.2573, "step": 7850 }, { "epoch": 0.17936196431016385, "grad_norm": 2.703125, "learning_rate": 1.692050721057708e-05, "loss": 1.178, "step": 7860 }, { "epoch": 0.1795901601935101, "grad_norm": 2.921875, "learning_rate": 1.6915802103183006e-05, "loss": 1.2282, "step": 7870 }, { "epoch": 0.17981835607685637, "grad_norm": 3.1875, "learning_rate": 1.6911096995788928e-05, "loss": 1.2442, "step": 7880 }, { "epoch": 0.18004655196020264, "grad_norm": 3.1875, "learning_rate": 1.6906391888394854e-05, "loss": 1.2972, "step": 7890 }, { "epoch": 0.1802747478435489, "grad_norm": 3.046875, "learning_rate": 1.6901686781000776e-05, "loss": 1.2147, "step": 7900 }, { "epoch": 0.18050294372689515, "grad_norm": 2.84375, "learning_rate": 1.68969816736067e-05, "loss": 1.2531, "step": 7910 }, { "epoch": 0.18073113961024143, "grad_norm": 3.015625, "learning_rate": 1.6892276566212623e-05, "loss": 1.2508, "step": 7920 }, { "epoch": 0.1809593354935877, "grad_norm": 2.765625, "learning_rate": 1.688757145881855e-05, "loss": 1.2334, "step": 7930 }, { "epoch": 0.18118753137693397, "grad_norm": 2.953125, "learning_rate": 1.688286635142447e-05, "loss": 1.2346, "step": 7940 }, { "epoch": 0.1814157272602802, "grad_norm": 2.921875, "learning_rate": 1.6878161244030396e-05, "loss": 1.2435, "step": 7950 }, { "epoch": 0.18164392314362648, "grad_norm": 2.9375, "learning_rate": 1.6873456136636318e-05, "loss": 1.2238, "step": 7960 }, { "epoch": 0.18187211902697276, "grad_norm": 3.125, "learning_rate": 1.6868751029242243e-05, "loss": 1.2472, "step": 7970 }, { "epoch": 0.18210031491031903, "grad_norm": 2.875, "learning_rate": 1.6864045921848165e-05, "loss": 1.265, "step": 7980 }, { "epoch": 0.18232851079366527, "grad_norm": 2.828125, "learning_rate": 1.685934081445409e-05, "loss": 1.2329, "step": 7990 }, { "epoch": 0.18255670667701154, "grad_norm": 3.0, "learning_rate": 1.6854635707060013e-05, "loss": 1.2732, "step": 8000 }, { "epoch": 0.18278490256035781, "grad_norm": 2.90625, "learning_rate": 1.6849930599665938e-05, "loss": 1.2611, "step": 8010 }, { "epoch": 0.1830130984437041, "grad_norm": 2.75, "learning_rate": 1.684522549227186e-05, "loss": 1.2042, "step": 8020 }, { "epoch": 0.18324129432705033, "grad_norm": 3.140625, "learning_rate": 1.6840520384877786e-05, "loss": 1.2802, "step": 8030 }, { "epoch": 0.1834694902103966, "grad_norm": 2.953125, "learning_rate": 1.6835815277483708e-05, "loss": 1.2261, "step": 8040 }, { "epoch": 0.18369768609374287, "grad_norm": 2.890625, "learning_rate": 1.6831110170089633e-05, "loss": 1.2182, "step": 8050 }, { "epoch": 0.18392588197708915, "grad_norm": 3.203125, "learning_rate": 1.6826405062695555e-05, "loss": 1.2519, "step": 8060 }, { "epoch": 0.1841540778604354, "grad_norm": 2.8125, "learning_rate": 1.682169995530148e-05, "loss": 1.1387, "step": 8070 }, { "epoch": 0.18438227374378166, "grad_norm": 2.796875, "learning_rate": 1.6816994847907406e-05, "loss": 1.2409, "step": 8080 }, { "epoch": 0.18461046962712793, "grad_norm": 3.046875, "learning_rate": 1.6812289740513328e-05, "loss": 1.2371, "step": 8090 }, { "epoch": 0.1848386655104742, "grad_norm": 2.796875, "learning_rate": 1.6807584633119253e-05, "loss": 1.2651, "step": 8100 }, { "epoch": 0.18506686139382045, "grad_norm": 2.859375, "learning_rate": 1.6802879525725175e-05, "loss": 1.2355, "step": 8110 }, { "epoch": 0.18529505727716672, "grad_norm": 3.3125, "learning_rate": 1.67981744183311e-05, "loss": 1.2193, "step": 8120 }, { "epoch": 0.185523253160513, "grad_norm": 2.828125, "learning_rate": 1.6793469310937023e-05, "loss": 1.2621, "step": 8130 }, { "epoch": 0.18575144904385926, "grad_norm": 2.953125, "learning_rate": 1.6788764203542948e-05, "loss": 1.2112, "step": 8140 }, { "epoch": 0.1859796449272055, "grad_norm": 3.203125, "learning_rate": 1.678405909614887e-05, "loss": 1.2499, "step": 8150 }, { "epoch": 0.18620784081055178, "grad_norm": 3.125, "learning_rate": 1.6779353988754795e-05, "loss": 1.2114, "step": 8160 }, { "epoch": 0.18643603669389805, "grad_norm": 2.875, "learning_rate": 1.6774648881360718e-05, "loss": 1.2144, "step": 8170 }, { "epoch": 0.1866642325772443, "grad_norm": 2.859375, "learning_rate": 1.6769943773966643e-05, "loss": 1.2474, "step": 8180 }, { "epoch": 0.18689242846059057, "grad_norm": 2.84375, "learning_rate": 1.6765238666572565e-05, "loss": 1.2681, "step": 8190 }, { "epoch": 0.18712062434393684, "grad_norm": 2.953125, "learning_rate": 1.676053355917849e-05, "loss": 1.2765, "step": 8200 }, { "epoch": 0.1873488202272831, "grad_norm": 3.3125, "learning_rate": 1.6755828451784412e-05, "loss": 1.235, "step": 8210 }, { "epoch": 0.18757701611062935, "grad_norm": 2.671875, "learning_rate": 1.6751123344390338e-05, "loss": 1.2618, "step": 8220 }, { "epoch": 0.18780521199397562, "grad_norm": 3.0, "learning_rate": 1.674641823699626e-05, "loss": 1.2591, "step": 8230 }, { "epoch": 0.1880334078773219, "grad_norm": 2.90625, "learning_rate": 1.6741713129602185e-05, "loss": 1.217, "step": 8240 }, { "epoch": 0.18826160376066817, "grad_norm": 3.15625, "learning_rate": 1.6737008022208107e-05, "loss": 1.2133, "step": 8250 }, { "epoch": 0.1884897996440144, "grad_norm": 3.046875, "learning_rate": 1.6732302914814033e-05, "loss": 1.2446, "step": 8260 }, { "epoch": 0.18871799552736068, "grad_norm": 3.09375, "learning_rate": 1.6727597807419955e-05, "loss": 1.2114, "step": 8270 }, { "epoch": 0.18894619141070695, "grad_norm": 3.0, "learning_rate": 1.672289270002588e-05, "loss": 1.2521, "step": 8280 }, { "epoch": 0.18917438729405323, "grad_norm": 2.921875, "learning_rate": 1.6718187592631802e-05, "loss": 1.2455, "step": 8290 }, { "epoch": 0.18940258317739947, "grad_norm": 3.34375, "learning_rate": 1.6713482485237727e-05, "loss": 1.2078, "step": 8300 }, { "epoch": 0.18963077906074574, "grad_norm": 3.0, "learning_rate": 1.670877737784365e-05, "loss": 1.1979, "step": 8310 }, { "epoch": 0.189858974944092, "grad_norm": 3.046875, "learning_rate": 1.6704072270449575e-05, "loss": 1.3055, "step": 8320 }, { "epoch": 0.19008717082743828, "grad_norm": 3.0, "learning_rate": 1.6699367163055497e-05, "loss": 1.2601, "step": 8330 }, { "epoch": 0.19031536671078453, "grad_norm": 2.796875, "learning_rate": 1.6694662055661422e-05, "loss": 1.2484, "step": 8340 }, { "epoch": 0.1905435625941308, "grad_norm": 2.96875, "learning_rate": 1.6689956948267344e-05, "loss": 1.2764, "step": 8350 }, { "epoch": 0.19077175847747707, "grad_norm": 2.859375, "learning_rate": 1.668525184087327e-05, "loss": 1.3503, "step": 8360 }, { "epoch": 0.19099995436082334, "grad_norm": 2.84375, "learning_rate": 1.6680546733479192e-05, "loss": 1.2904, "step": 8370 }, { "epoch": 0.1912281502441696, "grad_norm": 2.765625, "learning_rate": 1.6675841626085117e-05, "loss": 1.1493, "step": 8380 }, { "epoch": 0.19145634612751586, "grad_norm": 3.421875, "learning_rate": 1.667113651869104e-05, "loss": 1.2052, "step": 8390 }, { "epoch": 0.19168454201086213, "grad_norm": 2.796875, "learning_rate": 1.6666431411296965e-05, "loss": 1.2106, "step": 8400 }, { "epoch": 0.1919127378942084, "grad_norm": 3.296875, "learning_rate": 1.6661726303902887e-05, "loss": 1.2471, "step": 8410 }, { "epoch": 0.19214093377755465, "grad_norm": 2.84375, "learning_rate": 1.6657021196508812e-05, "loss": 1.2609, "step": 8420 }, { "epoch": 0.19236912966090092, "grad_norm": 3.234375, "learning_rate": 1.6652316089114734e-05, "loss": 1.2476, "step": 8430 }, { "epoch": 0.1925973255442472, "grad_norm": 2.765625, "learning_rate": 1.664761098172066e-05, "loss": 1.2086, "step": 8440 }, { "epoch": 0.19282552142759343, "grad_norm": 2.9375, "learning_rate": 1.664290587432658e-05, "loss": 1.2825, "step": 8450 }, { "epoch": 0.1930537173109397, "grad_norm": 3.109375, "learning_rate": 1.6638200766932507e-05, "loss": 1.2512, "step": 8460 }, { "epoch": 0.19328191319428598, "grad_norm": 3.640625, "learning_rate": 1.663349565953843e-05, "loss": 1.2766, "step": 8470 }, { "epoch": 0.19351010907763225, "grad_norm": 2.890625, "learning_rate": 1.6628790552144354e-05, "loss": 1.1933, "step": 8480 }, { "epoch": 0.1937383049609785, "grad_norm": 3.125, "learning_rate": 1.6624085444750276e-05, "loss": 1.2565, "step": 8490 }, { "epoch": 0.19396650084432476, "grad_norm": 3.09375, "learning_rate": 1.6619380337356202e-05, "loss": 1.2277, "step": 8500 }, { "epoch": 0.19419469672767103, "grad_norm": 3.25, "learning_rate": 1.6614675229962124e-05, "loss": 1.2493, "step": 8510 }, { "epoch": 0.1944228926110173, "grad_norm": 2.890625, "learning_rate": 1.660997012256805e-05, "loss": 1.24, "step": 8520 }, { "epoch": 0.19465108849436355, "grad_norm": 3.671875, "learning_rate": 1.660526501517397e-05, "loss": 1.2416, "step": 8530 }, { "epoch": 0.19487928437770982, "grad_norm": 2.8125, "learning_rate": 1.6600559907779897e-05, "loss": 1.202, "step": 8540 }, { "epoch": 0.1951074802610561, "grad_norm": 3.421875, "learning_rate": 1.659585480038582e-05, "loss": 1.1873, "step": 8550 }, { "epoch": 0.19533567614440236, "grad_norm": 3.046875, "learning_rate": 1.6591149692991744e-05, "loss": 1.2359, "step": 8560 }, { "epoch": 0.1955638720277486, "grad_norm": 2.921875, "learning_rate": 1.6586444585597666e-05, "loss": 1.2217, "step": 8570 }, { "epoch": 0.19579206791109488, "grad_norm": 2.859375, "learning_rate": 1.658173947820359e-05, "loss": 1.2365, "step": 8580 }, { "epoch": 0.19602026379444115, "grad_norm": 2.796875, "learning_rate": 1.6577034370809513e-05, "loss": 1.2042, "step": 8590 }, { "epoch": 0.19624845967778742, "grad_norm": 3.0625, "learning_rate": 1.657232926341544e-05, "loss": 1.1977, "step": 8600 }, { "epoch": 0.19647665556113367, "grad_norm": 3.0625, "learning_rate": 1.656762415602136e-05, "loss": 1.1981, "step": 8610 }, { "epoch": 0.19670485144447994, "grad_norm": 3.15625, "learning_rate": 1.6562919048627286e-05, "loss": 1.252, "step": 8620 }, { "epoch": 0.1969330473278262, "grad_norm": 2.703125, "learning_rate": 1.6558213941233208e-05, "loss": 1.2504, "step": 8630 }, { "epoch": 0.19716124321117248, "grad_norm": 3.03125, "learning_rate": 1.6553508833839134e-05, "loss": 1.2624, "step": 8640 }, { "epoch": 0.19738943909451873, "grad_norm": 2.875, "learning_rate": 1.654880372644506e-05, "loss": 1.226, "step": 8650 }, { "epoch": 0.197617634977865, "grad_norm": 2.6875, "learning_rate": 1.654409861905098e-05, "loss": 1.2627, "step": 8660 }, { "epoch": 0.19784583086121127, "grad_norm": 3.140625, "learning_rate": 1.6539393511656907e-05, "loss": 1.194, "step": 8670 }, { "epoch": 0.19807402674455754, "grad_norm": 3.03125, "learning_rate": 1.653468840426283e-05, "loss": 1.1666, "step": 8680 }, { "epoch": 0.19830222262790378, "grad_norm": 2.796875, "learning_rate": 1.6529983296868754e-05, "loss": 1.224, "step": 8690 }, { "epoch": 0.19853041851125006, "grad_norm": 2.859375, "learning_rate": 1.6525278189474676e-05, "loss": 1.2479, "step": 8700 }, { "epoch": 0.19875861439459633, "grad_norm": 3.296875, "learning_rate": 1.65205730820806e-05, "loss": 1.2069, "step": 8710 }, { "epoch": 0.1989868102779426, "grad_norm": 3.1875, "learning_rate": 1.6515867974686523e-05, "loss": 1.2209, "step": 8720 }, { "epoch": 0.19921500616128884, "grad_norm": 2.828125, "learning_rate": 1.651116286729245e-05, "loss": 1.2077, "step": 8730 }, { "epoch": 0.19944320204463512, "grad_norm": 2.828125, "learning_rate": 1.650645775989837e-05, "loss": 1.2508, "step": 8740 }, { "epoch": 0.1996713979279814, "grad_norm": 3.21875, "learning_rate": 1.6501752652504296e-05, "loss": 1.2285, "step": 8750 }, { "epoch": 0.19989959381132763, "grad_norm": 2.671875, "learning_rate": 1.6497047545110218e-05, "loss": 1.1902, "step": 8760 }, { "epoch": 0.2001277896946739, "grad_norm": 3.0625, "learning_rate": 1.6492342437716144e-05, "loss": 1.2066, "step": 8770 }, { "epoch": 0.20035598557802017, "grad_norm": 3.296875, "learning_rate": 1.6487637330322066e-05, "loss": 1.2115, "step": 8780 }, { "epoch": 0.20058418146136645, "grad_norm": 2.828125, "learning_rate": 1.648293222292799e-05, "loss": 1.1974, "step": 8790 }, { "epoch": 0.2008123773447127, "grad_norm": 2.796875, "learning_rate": 1.6478227115533913e-05, "loss": 1.2178, "step": 8800 }, { "epoch": 0.20104057322805896, "grad_norm": 3.265625, "learning_rate": 1.647352200813984e-05, "loss": 1.2097, "step": 8810 }, { "epoch": 0.20126876911140523, "grad_norm": 2.84375, "learning_rate": 1.646881690074576e-05, "loss": 1.2313, "step": 8820 }, { "epoch": 0.2014969649947515, "grad_norm": 2.890625, "learning_rate": 1.6464111793351686e-05, "loss": 1.2914, "step": 8830 }, { "epoch": 0.20172516087809775, "grad_norm": 3.046875, "learning_rate": 1.6459406685957608e-05, "loss": 1.2425, "step": 8840 }, { "epoch": 0.20195335676144402, "grad_norm": 3.109375, "learning_rate": 1.6454701578563533e-05, "loss": 1.2101, "step": 8850 }, { "epoch": 0.2021815526447903, "grad_norm": 3.140625, "learning_rate": 1.6449996471169455e-05, "loss": 1.2225, "step": 8860 }, { "epoch": 0.20240974852813656, "grad_norm": 2.8125, "learning_rate": 1.644529136377538e-05, "loss": 1.2123, "step": 8870 }, { "epoch": 0.2026379444114828, "grad_norm": 3.0, "learning_rate": 1.6440586256381303e-05, "loss": 1.2377, "step": 8880 }, { "epoch": 0.20286614029482908, "grad_norm": 3.1875, "learning_rate": 1.6435881148987228e-05, "loss": 1.2362, "step": 8890 }, { "epoch": 0.20309433617817535, "grad_norm": 3.0, "learning_rate": 1.643117604159315e-05, "loss": 1.2281, "step": 8900 }, { "epoch": 0.20332253206152162, "grad_norm": 3.359375, "learning_rate": 1.6426470934199076e-05, "loss": 1.2131, "step": 8910 }, { "epoch": 0.20355072794486787, "grad_norm": 3.265625, "learning_rate": 1.6421765826804998e-05, "loss": 1.2275, "step": 8920 }, { "epoch": 0.20377892382821414, "grad_norm": 2.984375, "learning_rate": 1.6417060719410923e-05, "loss": 1.2489, "step": 8930 }, { "epoch": 0.2040071197115604, "grad_norm": 3.296875, "learning_rate": 1.6412355612016845e-05, "loss": 1.2617, "step": 8940 }, { "epoch": 0.20423531559490668, "grad_norm": 3.390625, "learning_rate": 1.640765050462277e-05, "loss": 1.1925, "step": 8950 }, { "epoch": 0.20446351147825292, "grad_norm": 2.734375, "learning_rate": 1.6402945397228692e-05, "loss": 1.2315, "step": 8960 }, { "epoch": 0.2046917073615992, "grad_norm": 2.9375, "learning_rate": 1.6398240289834618e-05, "loss": 1.2203, "step": 8970 }, { "epoch": 0.20491990324494547, "grad_norm": 3.40625, "learning_rate": 1.639353518244054e-05, "loss": 1.2104, "step": 8980 }, { "epoch": 0.20514809912829174, "grad_norm": 2.9375, "learning_rate": 1.6388830075046465e-05, "loss": 1.2316, "step": 8990 }, { "epoch": 0.20537629501163798, "grad_norm": 3.203125, "learning_rate": 1.6384124967652387e-05, "loss": 1.2321, "step": 9000 }, { "epoch": 0.20560449089498425, "grad_norm": 2.984375, "learning_rate": 1.6379419860258313e-05, "loss": 1.2253, "step": 9010 }, { "epoch": 0.20583268677833053, "grad_norm": 2.96875, "learning_rate": 1.6374714752864235e-05, "loss": 1.2575, "step": 9020 }, { "epoch": 0.20606088266167677, "grad_norm": 3.015625, "learning_rate": 1.637000964547016e-05, "loss": 1.1955, "step": 9030 }, { "epoch": 0.20628907854502304, "grad_norm": 3.25, "learning_rate": 1.6365304538076082e-05, "loss": 1.2619, "step": 9040 }, { "epoch": 0.2065172744283693, "grad_norm": 2.9375, "learning_rate": 1.6360599430682008e-05, "loss": 1.1997, "step": 9050 }, { "epoch": 0.20674547031171558, "grad_norm": 3.0625, "learning_rate": 1.635589432328793e-05, "loss": 1.2155, "step": 9060 }, { "epoch": 0.20697366619506183, "grad_norm": 2.578125, "learning_rate": 1.6351189215893855e-05, "loss": 1.2284, "step": 9070 }, { "epoch": 0.2072018620784081, "grad_norm": 2.921875, "learning_rate": 1.6346484108499777e-05, "loss": 1.2549, "step": 9080 }, { "epoch": 0.20743005796175437, "grad_norm": 2.921875, "learning_rate": 1.6341779001105702e-05, "loss": 1.2642, "step": 9090 }, { "epoch": 0.20765825384510064, "grad_norm": 3.0625, "learning_rate": 1.6337073893711624e-05, "loss": 1.252, "step": 9100 }, { "epoch": 0.2078864497284469, "grad_norm": 3.0, "learning_rate": 1.633236878631755e-05, "loss": 1.2527, "step": 9110 }, { "epoch": 0.20811464561179316, "grad_norm": 2.796875, "learning_rate": 1.6327663678923472e-05, "loss": 1.2323, "step": 9120 }, { "epoch": 0.20834284149513943, "grad_norm": 3.171875, "learning_rate": 1.6322958571529397e-05, "loss": 1.2304, "step": 9130 }, { "epoch": 0.2085710373784857, "grad_norm": 3.453125, "learning_rate": 1.631825346413532e-05, "loss": 1.2907, "step": 9140 }, { "epoch": 0.20879923326183195, "grad_norm": 2.90625, "learning_rate": 1.6313548356741245e-05, "loss": 1.2535, "step": 9150 }, { "epoch": 0.20902742914517822, "grad_norm": 2.859375, "learning_rate": 1.6308843249347167e-05, "loss": 1.2341, "step": 9160 }, { "epoch": 0.2092556250285245, "grad_norm": 2.796875, "learning_rate": 1.6304138141953092e-05, "loss": 1.2072, "step": 9170 }, { "epoch": 0.20948382091187076, "grad_norm": 3.046875, "learning_rate": 1.6299433034559014e-05, "loss": 1.2074, "step": 9180 }, { "epoch": 0.209712016795217, "grad_norm": 2.84375, "learning_rate": 1.629472792716494e-05, "loss": 1.2382, "step": 9190 }, { "epoch": 0.20994021267856328, "grad_norm": 2.984375, "learning_rate": 1.629002281977086e-05, "loss": 1.2143, "step": 9200 }, { "epoch": 0.21016840856190955, "grad_norm": 2.953125, "learning_rate": 1.6285317712376787e-05, "loss": 1.2377, "step": 9210 }, { "epoch": 0.21039660444525582, "grad_norm": 3.09375, "learning_rate": 1.628061260498271e-05, "loss": 1.2578, "step": 9220 }, { "epoch": 0.21062480032860206, "grad_norm": 3.296875, "learning_rate": 1.6275907497588634e-05, "loss": 1.2393, "step": 9230 }, { "epoch": 0.21085299621194833, "grad_norm": 3.015625, "learning_rate": 1.627120239019456e-05, "loss": 1.2501, "step": 9240 }, { "epoch": 0.2110811920952946, "grad_norm": 3.015625, "learning_rate": 1.6266497282800482e-05, "loss": 1.2043, "step": 9250 }, { "epoch": 0.21130938797864088, "grad_norm": 2.734375, "learning_rate": 1.6261792175406407e-05, "loss": 1.2088, "step": 9260 }, { "epoch": 0.21153758386198712, "grad_norm": 2.9375, "learning_rate": 1.625708706801233e-05, "loss": 1.1972, "step": 9270 }, { "epoch": 0.2117657797453334, "grad_norm": 3.03125, "learning_rate": 1.6252381960618255e-05, "loss": 1.2615, "step": 9280 }, { "epoch": 0.21199397562867967, "grad_norm": 3.140625, "learning_rate": 1.6247676853224177e-05, "loss": 1.2385, "step": 9290 }, { "epoch": 0.21222217151202594, "grad_norm": 3.0, "learning_rate": 1.6242971745830102e-05, "loss": 1.1936, "step": 9300 }, { "epoch": 0.21245036739537218, "grad_norm": 2.921875, "learning_rate": 1.6238266638436024e-05, "loss": 1.2059, "step": 9310 }, { "epoch": 0.21267856327871845, "grad_norm": 2.84375, "learning_rate": 1.623356153104195e-05, "loss": 1.1747, "step": 9320 }, { "epoch": 0.21290675916206472, "grad_norm": 2.953125, "learning_rate": 1.622885642364787e-05, "loss": 1.2798, "step": 9330 }, { "epoch": 0.21313495504541097, "grad_norm": 3.046875, "learning_rate": 1.6224151316253797e-05, "loss": 1.1945, "step": 9340 }, { "epoch": 0.21336315092875724, "grad_norm": 2.78125, "learning_rate": 1.621944620885972e-05, "loss": 1.2655, "step": 9350 }, { "epoch": 0.2135913468121035, "grad_norm": 2.96875, "learning_rate": 1.6214741101465644e-05, "loss": 1.2627, "step": 9360 }, { "epoch": 0.21381954269544978, "grad_norm": 3.109375, "learning_rate": 1.6210035994071566e-05, "loss": 1.2118, "step": 9370 }, { "epoch": 0.21404773857879603, "grad_norm": 3.046875, "learning_rate": 1.6205330886677492e-05, "loss": 1.2575, "step": 9380 }, { "epoch": 0.2142759344621423, "grad_norm": 3.15625, "learning_rate": 1.6200625779283414e-05, "loss": 1.2487, "step": 9390 }, { "epoch": 0.21450413034548857, "grad_norm": 3.0625, "learning_rate": 1.619592067188934e-05, "loss": 1.2209, "step": 9400 }, { "epoch": 0.21473232622883484, "grad_norm": 2.875, "learning_rate": 1.619121556449526e-05, "loss": 1.2439, "step": 9410 }, { "epoch": 0.21496052211218108, "grad_norm": 2.765625, "learning_rate": 1.6186510457101187e-05, "loss": 1.2373, "step": 9420 }, { "epoch": 0.21518871799552736, "grad_norm": 2.828125, "learning_rate": 1.618180534970711e-05, "loss": 1.2356, "step": 9430 }, { "epoch": 0.21541691387887363, "grad_norm": 2.828125, "learning_rate": 1.6177100242313034e-05, "loss": 1.2488, "step": 9440 }, { "epoch": 0.2156451097622199, "grad_norm": 2.84375, "learning_rate": 1.6172395134918956e-05, "loss": 1.2302, "step": 9450 }, { "epoch": 0.21587330564556614, "grad_norm": 3.0625, "learning_rate": 1.616769002752488e-05, "loss": 1.2619, "step": 9460 }, { "epoch": 0.21610150152891242, "grad_norm": 3.0, "learning_rate": 1.6162984920130804e-05, "loss": 1.2002, "step": 9470 }, { "epoch": 0.2163296974122587, "grad_norm": 3.4375, "learning_rate": 1.615827981273673e-05, "loss": 1.2303, "step": 9480 }, { "epoch": 0.21655789329560496, "grad_norm": 3.0625, "learning_rate": 1.615357470534265e-05, "loss": 1.261, "step": 9490 }, { "epoch": 0.2167860891789512, "grad_norm": 2.75, "learning_rate": 1.6148869597948576e-05, "loss": 1.2069, "step": 9500 }, { "epoch": 0.21701428506229747, "grad_norm": 3.1875, "learning_rate": 1.61441644905545e-05, "loss": 1.2473, "step": 9510 }, { "epoch": 0.21724248094564375, "grad_norm": 3.046875, "learning_rate": 1.6139459383160424e-05, "loss": 1.2196, "step": 9520 }, { "epoch": 0.21747067682899002, "grad_norm": 2.953125, "learning_rate": 1.6134754275766346e-05, "loss": 1.2077, "step": 9530 }, { "epoch": 0.21769887271233626, "grad_norm": 3.046875, "learning_rate": 1.613004916837227e-05, "loss": 1.1796, "step": 9540 }, { "epoch": 0.21792706859568253, "grad_norm": 3.296875, "learning_rate": 1.6125344060978193e-05, "loss": 1.2555, "step": 9550 }, { "epoch": 0.2181552644790288, "grad_norm": 3.140625, "learning_rate": 1.612063895358412e-05, "loss": 1.2043, "step": 9560 }, { "epoch": 0.21838346036237508, "grad_norm": 3.09375, "learning_rate": 1.611593384619004e-05, "loss": 1.2371, "step": 9570 }, { "epoch": 0.21861165624572132, "grad_norm": 3.015625, "learning_rate": 1.6111228738795966e-05, "loss": 1.2308, "step": 9580 }, { "epoch": 0.2188398521290676, "grad_norm": 2.8125, "learning_rate": 1.6106523631401888e-05, "loss": 1.2808, "step": 9590 }, { "epoch": 0.21906804801241386, "grad_norm": 2.8125, "learning_rate": 1.6101818524007813e-05, "loss": 1.1971, "step": 9600 }, { "epoch": 0.2192962438957601, "grad_norm": 3.0625, "learning_rate": 1.6097113416613735e-05, "loss": 1.2058, "step": 9610 }, { "epoch": 0.21952443977910638, "grad_norm": 3.046875, "learning_rate": 1.609240830921966e-05, "loss": 1.2024, "step": 9620 }, { "epoch": 0.21975263566245265, "grad_norm": 3.015625, "learning_rate": 1.6087703201825583e-05, "loss": 1.1794, "step": 9630 }, { "epoch": 0.21998083154579892, "grad_norm": 3.375, "learning_rate": 1.6082998094431505e-05, "loss": 1.2134, "step": 9640 }, { "epoch": 0.22020902742914517, "grad_norm": 3.03125, "learning_rate": 1.607829298703743e-05, "loss": 1.1885, "step": 9650 }, { "epoch": 0.22043722331249144, "grad_norm": 3.109375, "learning_rate": 1.6073587879643352e-05, "loss": 1.2228, "step": 9660 }, { "epoch": 0.2206654191958377, "grad_norm": 2.859375, "learning_rate": 1.6068882772249278e-05, "loss": 1.202, "step": 9670 }, { "epoch": 0.22089361507918398, "grad_norm": 2.9375, "learning_rate": 1.60641776648552e-05, "loss": 1.2108, "step": 9680 }, { "epoch": 0.22112181096253022, "grad_norm": 3.03125, "learning_rate": 1.6059472557461125e-05, "loss": 1.2458, "step": 9690 }, { "epoch": 0.2213500068458765, "grad_norm": 3.0, "learning_rate": 1.6054767450067047e-05, "loss": 1.2029, "step": 9700 }, { "epoch": 0.22157820272922277, "grad_norm": 3.015625, "learning_rate": 1.6050062342672973e-05, "loss": 1.1826, "step": 9710 }, { "epoch": 0.22180639861256904, "grad_norm": 2.9375, "learning_rate": 1.6045357235278895e-05, "loss": 1.2282, "step": 9720 }, { "epoch": 0.22203459449591528, "grad_norm": 3.28125, "learning_rate": 1.604065212788482e-05, "loss": 1.2441, "step": 9730 }, { "epoch": 0.22226279037926155, "grad_norm": 3.1875, "learning_rate": 1.6035947020490742e-05, "loss": 1.2339, "step": 9740 }, { "epoch": 0.22249098626260783, "grad_norm": 3.296875, "learning_rate": 1.6031241913096667e-05, "loss": 1.2271, "step": 9750 }, { "epoch": 0.2227191821459541, "grad_norm": 3.078125, "learning_rate": 1.602653680570259e-05, "loss": 1.2271, "step": 9760 }, { "epoch": 0.22294737802930034, "grad_norm": 3.1875, "learning_rate": 1.6021831698308515e-05, "loss": 1.2855, "step": 9770 }, { "epoch": 0.2231755739126466, "grad_norm": 3.0, "learning_rate": 1.6017126590914437e-05, "loss": 1.2563, "step": 9780 }, { "epoch": 0.22340376979599288, "grad_norm": 2.875, "learning_rate": 1.6012421483520362e-05, "loss": 1.2667, "step": 9790 }, { "epoch": 0.22363196567933916, "grad_norm": 2.953125, "learning_rate": 1.6007716376126284e-05, "loss": 1.2325, "step": 9800 }, { "epoch": 0.2238601615626854, "grad_norm": 3.140625, "learning_rate": 1.600301126873221e-05, "loss": 1.2873, "step": 9810 }, { "epoch": 0.22408835744603167, "grad_norm": 2.96875, "learning_rate": 1.5998306161338132e-05, "loss": 1.2253, "step": 9820 }, { "epoch": 0.22431655332937794, "grad_norm": 2.9375, "learning_rate": 1.5993601053944057e-05, "loss": 1.219, "step": 9830 }, { "epoch": 0.22454474921272422, "grad_norm": 3.0, "learning_rate": 1.598889594654998e-05, "loss": 1.2209, "step": 9840 }, { "epoch": 0.22477294509607046, "grad_norm": 3.15625, "learning_rate": 1.5984190839155905e-05, "loss": 1.2125, "step": 9850 }, { "epoch": 0.22500114097941673, "grad_norm": 3.03125, "learning_rate": 1.5979485731761827e-05, "loss": 1.2451, "step": 9860 }, { "epoch": 0.225229336862763, "grad_norm": 3.03125, "learning_rate": 1.5974780624367752e-05, "loss": 1.2371, "step": 9870 }, { "epoch": 0.22545753274610927, "grad_norm": 3.078125, "learning_rate": 1.5970075516973674e-05, "loss": 1.2315, "step": 9880 }, { "epoch": 0.22568572862945552, "grad_norm": 3.21875, "learning_rate": 1.59653704095796e-05, "loss": 1.2656, "step": 9890 }, { "epoch": 0.2259139245128018, "grad_norm": 3.015625, "learning_rate": 1.596066530218552e-05, "loss": 1.1967, "step": 9900 }, { "epoch": 0.22614212039614806, "grad_norm": 3.0, "learning_rate": 1.5955960194791447e-05, "loss": 1.1916, "step": 9910 }, { "epoch": 0.2263703162794943, "grad_norm": 3.015625, "learning_rate": 1.595125508739737e-05, "loss": 1.2228, "step": 9920 }, { "epoch": 0.22659851216284058, "grad_norm": 3.046875, "learning_rate": 1.5946549980003294e-05, "loss": 1.2501, "step": 9930 }, { "epoch": 0.22682670804618685, "grad_norm": 3.15625, "learning_rate": 1.5941844872609216e-05, "loss": 1.2497, "step": 9940 }, { "epoch": 0.22705490392953312, "grad_norm": 3.3125, "learning_rate": 1.5937139765215142e-05, "loss": 1.1685, "step": 9950 }, { "epoch": 0.22728309981287936, "grad_norm": 3.09375, "learning_rate": 1.5932434657821064e-05, "loss": 1.2034, "step": 9960 }, { "epoch": 0.22751129569622563, "grad_norm": 3.078125, "learning_rate": 1.592772955042699e-05, "loss": 1.1938, "step": 9970 }, { "epoch": 0.2277394915795719, "grad_norm": 3.015625, "learning_rate": 1.592302444303291e-05, "loss": 1.2094, "step": 9980 }, { "epoch": 0.22796768746291818, "grad_norm": 3.109375, "learning_rate": 1.5918319335638837e-05, "loss": 1.2194, "step": 9990 }, { "epoch": 0.22819588334626442, "grad_norm": 2.984375, "learning_rate": 1.5913614228244762e-05, "loss": 1.2077, "step": 10000 }, { "epoch": 0.2284240792296107, "grad_norm": 3.046875, "learning_rate": 1.5908909120850684e-05, "loss": 1.2208, "step": 10010 }, { "epoch": 0.22865227511295697, "grad_norm": 3.109375, "learning_rate": 1.590420401345661e-05, "loss": 1.2242, "step": 10020 }, { "epoch": 0.22888047099630324, "grad_norm": 3.125, "learning_rate": 1.589949890606253e-05, "loss": 1.2118, "step": 10030 }, { "epoch": 0.22910866687964948, "grad_norm": 3.046875, "learning_rate": 1.5894793798668457e-05, "loss": 1.1964, "step": 10040 }, { "epoch": 0.22933686276299575, "grad_norm": 2.859375, "learning_rate": 1.589008869127438e-05, "loss": 1.2671, "step": 10050 }, { "epoch": 0.22956505864634202, "grad_norm": 3.359375, "learning_rate": 1.5885383583880304e-05, "loss": 1.2592, "step": 10060 }, { "epoch": 0.2297932545296883, "grad_norm": 2.859375, "learning_rate": 1.5880678476486226e-05, "loss": 1.2516, "step": 10070 }, { "epoch": 0.23002145041303454, "grad_norm": 3.0625, "learning_rate": 1.587597336909215e-05, "loss": 1.2485, "step": 10080 }, { "epoch": 0.2302496462963808, "grad_norm": 3.1875, "learning_rate": 1.5871268261698074e-05, "loss": 1.1679, "step": 10090 }, { "epoch": 0.23047784217972708, "grad_norm": 2.890625, "learning_rate": 1.5866563154304e-05, "loss": 1.2396, "step": 10100 }, { "epoch": 0.23070603806307335, "grad_norm": 2.921875, "learning_rate": 1.586185804690992e-05, "loss": 1.1536, "step": 10110 }, { "epoch": 0.2309342339464196, "grad_norm": 3.09375, "learning_rate": 1.5857152939515847e-05, "loss": 1.1746, "step": 10120 }, { "epoch": 0.23116242982976587, "grad_norm": 3.09375, "learning_rate": 1.585244783212177e-05, "loss": 1.2041, "step": 10130 }, { "epoch": 0.23139062571311214, "grad_norm": 2.609375, "learning_rate": 1.5847742724727694e-05, "loss": 1.1882, "step": 10140 }, { "epoch": 0.2316188215964584, "grad_norm": 2.703125, "learning_rate": 1.5843037617333616e-05, "loss": 1.2003, "step": 10150 }, { "epoch": 0.23184701747980466, "grad_norm": 3.0, "learning_rate": 1.583833250993954e-05, "loss": 1.2207, "step": 10160 }, { "epoch": 0.23207521336315093, "grad_norm": 2.8125, "learning_rate": 1.5833627402545463e-05, "loss": 1.2495, "step": 10170 }, { "epoch": 0.2323034092464972, "grad_norm": 3.296875, "learning_rate": 1.582892229515139e-05, "loss": 1.2113, "step": 10180 }, { "epoch": 0.23253160512984344, "grad_norm": 3.125, "learning_rate": 1.582421718775731e-05, "loss": 1.2979, "step": 10190 }, { "epoch": 0.23275980101318972, "grad_norm": 3.03125, "learning_rate": 1.5819512080363236e-05, "loss": 1.2664, "step": 10200 }, { "epoch": 0.232987996896536, "grad_norm": 3.1875, "learning_rate": 1.5814806972969158e-05, "loss": 1.2043, "step": 10210 }, { "epoch": 0.23321619277988226, "grad_norm": 2.75, "learning_rate": 1.5810101865575084e-05, "loss": 1.2391, "step": 10220 }, { "epoch": 0.2334443886632285, "grad_norm": 3.109375, "learning_rate": 1.5805396758181006e-05, "loss": 1.2001, "step": 10230 }, { "epoch": 0.23367258454657477, "grad_norm": 2.796875, "learning_rate": 1.580069165078693e-05, "loss": 1.1994, "step": 10240 }, { "epoch": 0.23390078042992105, "grad_norm": 2.890625, "learning_rate": 1.5795986543392853e-05, "loss": 1.1867, "step": 10250 }, { "epoch": 0.23412897631326732, "grad_norm": 2.734375, "learning_rate": 1.579128143599878e-05, "loss": 1.2427, "step": 10260 }, { "epoch": 0.23435717219661356, "grad_norm": 2.859375, "learning_rate": 1.57865763286047e-05, "loss": 1.2284, "step": 10270 }, { "epoch": 0.23458536807995983, "grad_norm": 2.890625, "learning_rate": 1.5781871221210626e-05, "loss": 1.2205, "step": 10280 }, { "epoch": 0.2348135639633061, "grad_norm": 3.046875, "learning_rate": 1.5777166113816548e-05, "loss": 1.234, "step": 10290 }, { "epoch": 0.23504175984665238, "grad_norm": 3.046875, "learning_rate": 1.5772461006422473e-05, "loss": 1.2435, "step": 10300 }, { "epoch": 0.23526995572999862, "grad_norm": 2.84375, "learning_rate": 1.5767755899028395e-05, "loss": 1.2304, "step": 10310 }, { "epoch": 0.2354981516133449, "grad_norm": 2.984375, "learning_rate": 1.576305079163432e-05, "loss": 1.2045, "step": 10320 }, { "epoch": 0.23572634749669116, "grad_norm": 2.8125, "learning_rate": 1.5758345684240243e-05, "loss": 1.2271, "step": 10330 }, { "epoch": 0.23595454338003743, "grad_norm": 3.171875, "learning_rate": 1.5753640576846168e-05, "loss": 1.1787, "step": 10340 }, { "epoch": 0.23618273926338368, "grad_norm": 2.921875, "learning_rate": 1.574893546945209e-05, "loss": 1.2273, "step": 10350 }, { "epoch": 0.23641093514672995, "grad_norm": 2.9375, "learning_rate": 1.5744230362058016e-05, "loss": 1.2261, "step": 10360 }, { "epoch": 0.23663913103007622, "grad_norm": 3.0, "learning_rate": 1.5739525254663938e-05, "loss": 1.2406, "step": 10370 }, { "epoch": 0.2368673269134225, "grad_norm": 3.109375, "learning_rate": 1.5734820147269863e-05, "loss": 1.2096, "step": 10380 }, { "epoch": 0.23709552279676874, "grad_norm": 3.015625, "learning_rate": 1.5730115039875785e-05, "loss": 1.1715, "step": 10390 }, { "epoch": 0.237323718680115, "grad_norm": 2.8125, "learning_rate": 1.572540993248171e-05, "loss": 1.1993, "step": 10400 }, { "epoch": 0.23755191456346128, "grad_norm": 3.078125, "learning_rate": 1.5720704825087632e-05, "loss": 1.2547, "step": 10410 }, { "epoch": 0.23778011044680755, "grad_norm": 3.109375, "learning_rate": 1.5715999717693558e-05, "loss": 1.2577, "step": 10420 }, { "epoch": 0.2380083063301538, "grad_norm": 3.09375, "learning_rate": 1.571129461029948e-05, "loss": 1.2618, "step": 10430 }, { "epoch": 0.23823650221350007, "grad_norm": 2.796875, "learning_rate": 1.5706589502905405e-05, "loss": 1.1878, "step": 10440 }, { "epoch": 0.23846469809684634, "grad_norm": 3.046875, "learning_rate": 1.5701884395511327e-05, "loss": 1.2325, "step": 10450 }, { "epoch": 0.2386928939801926, "grad_norm": 2.875, "learning_rate": 1.5697179288117253e-05, "loss": 1.1809, "step": 10460 }, { "epoch": 0.23892108986353885, "grad_norm": 2.953125, "learning_rate": 1.5692474180723175e-05, "loss": 1.3004, "step": 10470 }, { "epoch": 0.23914928574688513, "grad_norm": 3.0, "learning_rate": 1.56877690733291e-05, "loss": 1.2388, "step": 10480 }, { "epoch": 0.2393774816302314, "grad_norm": 2.984375, "learning_rate": 1.5683063965935022e-05, "loss": 1.2123, "step": 10490 }, { "epoch": 0.23960567751357764, "grad_norm": 2.859375, "learning_rate": 1.5678358858540948e-05, "loss": 1.1964, "step": 10500 }, { "epoch": 0.2398338733969239, "grad_norm": 3.390625, "learning_rate": 1.567365375114687e-05, "loss": 1.2141, "step": 10510 }, { "epoch": 0.24006206928027018, "grad_norm": 2.796875, "learning_rate": 1.5668948643752795e-05, "loss": 1.17, "step": 10520 }, { "epoch": 0.24029026516361646, "grad_norm": 2.9375, "learning_rate": 1.5664243536358717e-05, "loss": 1.2269, "step": 10530 }, { "epoch": 0.2405184610469627, "grad_norm": 3.0625, "learning_rate": 1.5659538428964642e-05, "loss": 1.2257, "step": 10540 }, { "epoch": 0.24074665693030897, "grad_norm": 3.265625, "learning_rate": 1.5654833321570564e-05, "loss": 1.263, "step": 10550 }, { "epoch": 0.24097485281365524, "grad_norm": 2.96875, "learning_rate": 1.565012821417649e-05, "loss": 1.2121, "step": 10560 }, { "epoch": 0.24120304869700152, "grad_norm": 2.703125, "learning_rate": 1.5645423106782415e-05, "loss": 1.1657, "step": 10570 }, { "epoch": 0.24143124458034776, "grad_norm": 3.0625, "learning_rate": 1.5640717999388337e-05, "loss": 1.2295, "step": 10580 }, { "epoch": 0.24165944046369403, "grad_norm": 3.171875, "learning_rate": 1.5636012891994263e-05, "loss": 1.2416, "step": 10590 }, { "epoch": 0.2418876363470403, "grad_norm": 3.125, "learning_rate": 1.5631307784600185e-05, "loss": 1.1164, "step": 10600 }, { "epoch": 0.24211583223038657, "grad_norm": 2.9375, "learning_rate": 1.562660267720611e-05, "loss": 1.2456, "step": 10610 }, { "epoch": 0.24234402811373282, "grad_norm": 3.03125, "learning_rate": 1.5621897569812032e-05, "loss": 1.2137, "step": 10620 }, { "epoch": 0.2425722239970791, "grad_norm": 3.125, "learning_rate": 1.5617192462417958e-05, "loss": 1.2055, "step": 10630 }, { "epoch": 0.24280041988042536, "grad_norm": 2.828125, "learning_rate": 1.561248735502388e-05, "loss": 1.2155, "step": 10640 }, { "epoch": 0.24302861576377163, "grad_norm": 3.109375, "learning_rate": 1.5607782247629805e-05, "loss": 1.2164, "step": 10650 }, { "epoch": 0.24325681164711788, "grad_norm": 3.015625, "learning_rate": 1.5603077140235727e-05, "loss": 1.2582, "step": 10660 }, { "epoch": 0.24348500753046415, "grad_norm": 3.1875, "learning_rate": 1.5598372032841652e-05, "loss": 1.2111, "step": 10670 }, { "epoch": 0.24371320341381042, "grad_norm": 2.9375, "learning_rate": 1.5593666925447574e-05, "loss": 1.2469, "step": 10680 }, { "epoch": 0.2439413992971567, "grad_norm": 2.75, "learning_rate": 1.55889618180535e-05, "loss": 1.1767, "step": 10690 }, { "epoch": 0.24416959518050294, "grad_norm": 4.03125, "learning_rate": 1.5584256710659422e-05, "loss": 1.2617, "step": 10700 }, { "epoch": 0.2443977910638492, "grad_norm": 3.015625, "learning_rate": 1.5579551603265347e-05, "loss": 1.2542, "step": 10710 }, { "epoch": 0.24462598694719548, "grad_norm": 2.984375, "learning_rate": 1.557484649587127e-05, "loss": 1.2068, "step": 10720 }, { "epoch": 0.24485418283054175, "grad_norm": 3.109375, "learning_rate": 1.5570141388477195e-05, "loss": 1.1685, "step": 10730 }, { "epoch": 0.245082378713888, "grad_norm": 3.0, "learning_rate": 1.5565436281083117e-05, "loss": 1.1642, "step": 10740 }, { "epoch": 0.24531057459723427, "grad_norm": 2.921875, "learning_rate": 1.5560731173689042e-05, "loss": 1.1956, "step": 10750 }, { "epoch": 0.24553877048058054, "grad_norm": 2.84375, "learning_rate": 1.5556026066294964e-05, "loss": 1.1974, "step": 10760 }, { "epoch": 0.24576696636392678, "grad_norm": 2.875, "learning_rate": 1.555132095890089e-05, "loss": 1.1999, "step": 10770 }, { "epoch": 0.24599516224727305, "grad_norm": 3.265625, "learning_rate": 1.554661585150681e-05, "loss": 1.2033, "step": 10780 }, { "epoch": 0.24622335813061932, "grad_norm": 3.140625, "learning_rate": 1.5541910744112737e-05, "loss": 1.2514, "step": 10790 }, { "epoch": 0.2464515540139656, "grad_norm": 3.28125, "learning_rate": 1.553720563671866e-05, "loss": 1.2211, "step": 10800 }, { "epoch": 0.24667974989731184, "grad_norm": 3.0, "learning_rate": 1.5532500529324584e-05, "loss": 1.2156, "step": 10810 }, { "epoch": 0.2469079457806581, "grad_norm": 3.0625, "learning_rate": 1.5527795421930506e-05, "loss": 1.2073, "step": 10820 }, { "epoch": 0.24713614166400438, "grad_norm": 3.140625, "learning_rate": 1.5523090314536432e-05, "loss": 1.2383, "step": 10830 }, { "epoch": 0.24736433754735065, "grad_norm": 3.109375, "learning_rate": 1.5518385207142354e-05, "loss": 1.187, "step": 10840 }, { "epoch": 0.2475925334306969, "grad_norm": 3.078125, "learning_rate": 1.551368009974828e-05, "loss": 1.2681, "step": 10850 }, { "epoch": 0.24782072931404317, "grad_norm": 2.953125, "learning_rate": 1.55089749923542e-05, "loss": 1.2341, "step": 10860 }, { "epoch": 0.24804892519738944, "grad_norm": 3.0, "learning_rate": 1.5504269884960127e-05, "loss": 1.2122, "step": 10870 }, { "epoch": 0.2482771210807357, "grad_norm": 2.828125, "learning_rate": 1.549956477756605e-05, "loss": 1.2273, "step": 10880 }, { "epoch": 0.24850531696408196, "grad_norm": 2.84375, "learning_rate": 1.5494859670171974e-05, "loss": 1.2278, "step": 10890 }, { "epoch": 0.24873351284742823, "grad_norm": 3.0, "learning_rate": 1.5490154562777896e-05, "loss": 1.2284, "step": 10900 }, { "epoch": 0.2489617087307745, "grad_norm": 3.0, "learning_rate": 1.548544945538382e-05, "loss": 1.2208, "step": 10910 }, { "epoch": 0.24918990461412077, "grad_norm": 3.015625, "learning_rate": 1.5480744347989743e-05, "loss": 1.2392, "step": 10920 }, { "epoch": 0.24941810049746702, "grad_norm": 2.9375, "learning_rate": 1.547603924059567e-05, "loss": 1.1777, "step": 10930 }, { "epoch": 0.2496462963808133, "grad_norm": 3.109375, "learning_rate": 1.547133413320159e-05, "loss": 1.2712, "step": 10940 }, { "epoch": 0.24987449226415956, "grad_norm": 3.03125, "learning_rate": 1.5466629025807516e-05, "loss": 1.2382, "step": 10950 }, { "epoch": 0.2501026881475058, "grad_norm": 2.890625, "learning_rate": 1.546192391841344e-05, "loss": 1.2482, "step": 10960 }, { "epoch": 0.2503308840308521, "grad_norm": 3.25, "learning_rate": 1.5457218811019364e-05, "loss": 1.2621, "step": 10970 }, { "epoch": 0.25055907991419835, "grad_norm": 3.0, "learning_rate": 1.5452513703625286e-05, "loss": 1.1997, "step": 10980 }, { "epoch": 0.2507872757975446, "grad_norm": 3.140625, "learning_rate": 1.544780859623121e-05, "loss": 1.193, "step": 10990 }, { "epoch": 0.2510154716808909, "grad_norm": 3.234375, "learning_rate": 1.5443103488837133e-05, "loss": 1.1715, "step": 11000 }, { "epoch": 0.25124366756423716, "grad_norm": 3.265625, "learning_rate": 1.543839838144306e-05, "loss": 1.1873, "step": 11010 }, { "epoch": 0.25147186344758343, "grad_norm": 2.875, "learning_rate": 1.543369327404898e-05, "loss": 1.1859, "step": 11020 }, { "epoch": 0.25170005933092965, "grad_norm": 2.734375, "learning_rate": 1.5428988166654906e-05, "loss": 1.2123, "step": 11030 }, { "epoch": 0.2519282552142759, "grad_norm": 3.171875, "learning_rate": 1.5424283059260828e-05, "loss": 1.1889, "step": 11040 }, { "epoch": 0.2521564510976222, "grad_norm": 3.03125, "learning_rate": 1.5419577951866753e-05, "loss": 1.2163, "step": 11050 }, { "epoch": 0.25238464698096846, "grad_norm": 3.75, "learning_rate": 1.5414872844472675e-05, "loss": 1.1763, "step": 11060 }, { "epoch": 0.25261284286431474, "grad_norm": 3.09375, "learning_rate": 1.54101677370786e-05, "loss": 1.222, "step": 11070 }, { "epoch": 0.252841038747661, "grad_norm": 3.171875, "learning_rate": 1.5405462629684523e-05, "loss": 1.2512, "step": 11080 }, { "epoch": 0.2530692346310073, "grad_norm": 3.046875, "learning_rate": 1.5400757522290448e-05, "loss": 1.2386, "step": 11090 }, { "epoch": 0.2532974305143535, "grad_norm": 3.234375, "learning_rate": 1.539605241489637e-05, "loss": 1.2165, "step": 11100 }, { "epoch": 0.25352562639769977, "grad_norm": 3.171875, "learning_rate": 1.5391347307502296e-05, "loss": 1.239, "step": 11110 }, { "epoch": 0.25375382228104604, "grad_norm": 3.265625, "learning_rate": 1.5386642200108218e-05, "loss": 1.2068, "step": 11120 }, { "epoch": 0.2539820181643923, "grad_norm": 3.28125, "learning_rate": 1.5381937092714143e-05, "loss": 1.2373, "step": 11130 }, { "epoch": 0.2542102140477386, "grad_norm": 3.0625, "learning_rate": 1.537723198532007e-05, "loss": 1.1943, "step": 11140 }, { "epoch": 0.25443840993108485, "grad_norm": 3.203125, "learning_rate": 1.537252687792599e-05, "loss": 1.2667, "step": 11150 }, { "epoch": 0.2546666058144311, "grad_norm": 3.046875, "learning_rate": 1.5367821770531916e-05, "loss": 1.2869, "step": 11160 }, { "epoch": 0.2548948016977774, "grad_norm": 2.78125, "learning_rate": 1.5363116663137838e-05, "loss": 1.1895, "step": 11170 }, { "epoch": 0.2551229975811236, "grad_norm": 2.875, "learning_rate": 1.5358411555743763e-05, "loss": 1.2264, "step": 11180 }, { "epoch": 0.2553511934644699, "grad_norm": 2.921875, "learning_rate": 1.5353706448349685e-05, "loss": 1.2246, "step": 11190 }, { "epoch": 0.25557938934781615, "grad_norm": 2.890625, "learning_rate": 1.534900134095561e-05, "loss": 1.1979, "step": 11200 }, { "epoch": 0.2558075852311624, "grad_norm": 2.90625, "learning_rate": 1.5344296233561533e-05, "loss": 1.1761, "step": 11210 }, { "epoch": 0.2560357811145087, "grad_norm": 2.90625, "learning_rate": 1.5339591126167458e-05, "loss": 1.2083, "step": 11220 }, { "epoch": 0.25626397699785497, "grad_norm": 3.078125, "learning_rate": 1.533488601877338e-05, "loss": 1.1816, "step": 11230 }, { "epoch": 0.25649217288120124, "grad_norm": 3.125, "learning_rate": 1.5330180911379306e-05, "loss": 1.2263, "step": 11240 }, { "epoch": 0.2567203687645475, "grad_norm": 2.953125, "learning_rate": 1.5325475803985228e-05, "loss": 1.2242, "step": 11250 }, { "epoch": 0.25694856464789373, "grad_norm": 3.265625, "learning_rate": 1.5320770696591153e-05, "loss": 1.202, "step": 11260 }, { "epoch": 0.25717676053124, "grad_norm": 2.75, "learning_rate": 1.5316065589197075e-05, "loss": 1.2326, "step": 11270 }, { "epoch": 0.2574049564145863, "grad_norm": 3.21875, "learning_rate": 1.5311360481803e-05, "loss": 1.2124, "step": 11280 }, { "epoch": 0.25763315229793254, "grad_norm": 3.234375, "learning_rate": 1.5306655374408923e-05, "loss": 1.2686, "step": 11290 }, { "epoch": 0.2578613481812788, "grad_norm": 3.375, "learning_rate": 1.5301950267014848e-05, "loss": 1.197, "step": 11300 }, { "epoch": 0.2580895440646251, "grad_norm": 3.25, "learning_rate": 1.529724515962077e-05, "loss": 1.2404, "step": 11310 }, { "epoch": 0.25831773994797136, "grad_norm": 2.96875, "learning_rate": 1.5292540052226695e-05, "loss": 1.2452, "step": 11320 }, { "epoch": 0.25854593583131763, "grad_norm": 3.203125, "learning_rate": 1.5287834944832617e-05, "loss": 1.213, "step": 11330 }, { "epoch": 0.25877413171466385, "grad_norm": 3.078125, "learning_rate": 1.5283129837438543e-05, "loss": 1.2022, "step": 11340 }, { "epoch": 0.2590023275980101, "grad_norm": 3.1875, "learning_rate": 1.5278424730044465e-05, "loss": 1.2319, "step": 11350 }, { "epoch": 0.2592305234813564, "grad_norm": 3.171875, "learning_rate": 1.527371962265039e-05, "loss": 1.2391, "step": 11360 }, { "epoch": 0.25945871936470266, "grad_norm": 3.0625, "learning_rate": 1.5269014515256312e-05, "loss": 1.2063, "step": 11370 }, { "epoch": 0.25968691524804893, "grad_norm": 3.109375, "learning_rate": 1.5264309407862238e-05, "loss": 1.1833, "step": 11380 }, { "epoch": 0.2599151111313952, "grad_norm": 2.875, "learning_rate": 1.525960430046816e-05, "loss": 1.2197, "step": 11390 }, { "epoch": 0.2601433070147415, "grad_norm": 3.140625, "learning_rate": 1.5254899193074083e-05, "loss": 1.253, "step": 11400 }, { "epoch": 0.2603715028980877, "grad_norm": 2.8125, "learning_rate": 1.5250194085680007e-05, "loss": 1.2512, "step": 11410 }, { "epoch": 0.26059969878143396, "grad_norm": 3.34375, "learning_rate": 1.5245488978285933e-05, "loss": 1.2069, "step": 11420 }, { "epoch": 0.26082789466478024, "grad_norm": 3.3125, "learning_rate": 1.5240783870891856e-05, "loss": 1.2273, "step": 11430 }, { "epoch": 0.2610560905481265, "grad_norm": 3.078125, "learning_rate": 1.523607876349778e-05, "loss": 1.2534, "step": 11440 }, { "epoch": 0.2612842864314728, "grad_norm": 3.1875, "learning_rate": 1.5231373656103704e-05, "loss": 1.219, "step": 11450 }, { "epoch": 0.26151248231481905, "grad_norm": 3.0625, "learning_rate": 1.5226668548709627e-05, "loss": 1.2044, "step": 11460 }, { "epoch": 0.2617406781981653, "grad_norm": 2.8125, "learning_rate": 1.5221963441315551e-05, "loss": 1.2161, "step": 11470 }, { "epoch": 0.2619688740815116, "grad_norm": 3.296875, "learning_rate": 1.5217258333921471e-05, "loss": 1.1763, "step": 11480 }, { "epoch": 0.2621970699648578, "grad_norm": 3.09375, "learning_rate": 1.5212553226527395e-05, "loss": 1.2035, "step": 11490 }, { "epoch": 0.2624252658482041, "grad_norm": 3.046875, "learning_rate": 1.5207848119133319e-05, "loss": 1.2428, "step": 11500 }, { "epoch": 0.26265346173155035, "grad_norm": 3.21875, "learning_rate": 1.5203143011739243e-05, "loss": 1.2238, "step": 11510 }, { "epoch": 0.2628816576148966, "grad_norm": 3.03125, "learning_rate": 1.5198437904345166e-05, "loss": 1.1791, "step": 11520 }, { "epoch": 0.2631098534982429, "grad_norm": 2.953125, "learning_rate": 1.519373279695109e-05, "loss": 1.2491, "step": 11530 }, { "epoch": 0.26333804938158917, "grad_norm": 3.15625, "learning_rate": 1.5189027689557014e-05, "loss": 1.1972, "step": 11540 }, { "epoch": 0.26356624526493544, "grad_norm": 2.96875, "learning_rate": 1.5184322582162937e-05, "loss": 1.2072, "step": 11550 }, { "epoch": 0.2637944411482817, "grad_norm": 2.84375, "learning_rate": 1.5179617474768861e-05, "loss": 1.2304, "step": 11560 }, { "epoch": 0.2640226370316279, "grad_norm": 3.015625, "learning_rate": 1.5174912367374785e-05, "loss": 1.1815, "step": 11570 }, { "epoch": 0.2642508329149742, "grad_norm": 2.71875, "learning_rate": 1.5170207259980709e-05, "loss": 1.2328, "step": 11580 }, { "epoch": 0.26447902879832047, "grad_norm": 3.09375, "learning_rate": 1.5165502152586632e-05, "loss": 1.151, "step": 11590 }, { "epoch": 0.26470722468166674, "grad_norm": 2.9375, "learning_rate": 1.5160797045192558e-05, "loss": 1.2247, "step": 11600 }, { "epoch": 0.264935420565013, "grad_norm": 3.078125, "learning_rate": 1.5156091937798481e-05, "loss": 1.201, "step": 11610 }, { "epoch": 0.2651636164483593, "grad_norm": 2.96875, "learning_rate": 1.5151386830404405e-05, "loss": 1.1911, "step": 11620 }, { "epoch": 0.26539181233170556, "grad_norm": 3.046875, "learning_rate": 1.5146681723010329e-05, "loss": 1.245, "step": 11630 }, { "epoch": 0.2656200082150518, "grad_norm": 2.828125, "learning_rate": 1.5141976615616252e-05, "loss": 1.1856, "step": 11640 }, { "epoch": 0.26584820409839804, "grad_norm": 2.796875, "learning_rate": 1.5137271508222176e-05, "loss": 1.1885, "step": 11650 }, { "epoch": 0.2660763999817443, "grad_norm": 3.265625, "learning_rate": 1.51325664008281e-05, "loss": 1.228, "step": 11660 }, { "epoch": 0.2663045958650906, "grad_norm": 2.921875, "learning_rate": 1.5127861293434024e-05, "loss": 1.1744, "step": 11670 }, { "epoch": 0.26653279174843686, "grad_norm": 3.03125, "learning_rate": 1.5123156186039947e-05, "loss": 1.2298, "step": 11680 }, { "epoch": 0.26676098763178313, "grad_norm": 3.203125, "learning_rate": 1.5118451078645871e-05, "loss": 1.2289, "step": 11690 }, { "epoch": 0.2669891835151294, "grad_norm": 3.265625, "learning_rate": 1.5113745971251795e-05, "loss": 1.2239, "step": 11700 }, { "epoch": 0.2672173793984757, "grad_norm": 3.28125, "learning_rate": 1.5109040863857718e-05, "loss": 1.2313, "step": 11710 }, { "epoch": 0.2674455752818219, "grad_norm": 3.046875, "learning_rate": 1.5104335756463642e-05, "loss": 1.2247, "step": 11720 }, { "epoch": 0.26767377116516816, "grad_norm": 3.265625, "learning_rate": 1.5099630649069566e-05, "loss": 1.1539, "step": 11730 }, { "epoch": 0.26790196704851443, "grad_norm": 3.015625, "learning_rate": 1.509492554167549e-05, "loss": 1.2104, "step": 11740 }, { "epoch": 0.2681301629318607, "grad_norm": 2.96875, "learning_rate": 1.5090220434281413e-05, "loss": 1.2803, "step": 11750 }, { "epoch": 0.268358358815207, "grad_norm": 2.8125, "learning_rate": 1.5085515326887337e-05, "loss": 1.1974, "step": 11760 }, { "epoch": 0.26858655469855325, "grad_norm": 2.90625, "learning_rate": 1.508081021949326e-05, "loss": 1.2368, "step": 11770 }, { "epoch": 0.2688147505818995, "grad_norm": 3.015625, "learning_rate": 1.5076105112099184e-05, "loss": 1.2267, "step": 11780 }, { "epoch": 0.2690429464652458, "grad_norm": 3.28125, "learning_rate": 1.5071400004705108e-05, "loss": 1.2216, "step": 11790 }, { "epoch": 0.269271142348592, "grad_norm": 3.3125, "learning_rate": 1.5066694897311032e-05, "loss": 1.2014, "step": 11800 }, { "epoch": 0.2694993382319383, "grad_norm": 2.96875, "learning_rate": 1.5061989789916956e-05, "loss": 1.2407, "step": 11810 }, { "epoch": 0.26972753411528455, "grad_norm": 2.859375, "learning_rate": 1.505728468252288e-05, "loss": 1.1953, "step": 11820 }, { "epoch": 0.2699557299986308, "grad_norm": 3.1875, "learning_rate": 1.5052579575128803e-05, "loss": 1.2256, "step": 11830 }, { "epoch": 0.2701839258819771, "grad_norm": 2.921875, "learning_rate": 1.5047874467734727e-05, "loss": 1.2495, "step": 11840 }, { "epoch": 0.27041212176532337, "grad_norm": 3.046875, "learning_rate": 1.504316936034065e-05, "loss": 1.2216, "step": 11850 }, { "epoch": 0.27064031764866964, "grad_norm": 3.0, "learning_rate": 1.5038464252946574e-05, "loss": 1.188, "step": 11860 }, { "epoch": 0.2708685135320159, "grad_norm": 3.203125, "learning_rate": 1.5033759145552498e-05, "loss": 1.2099, "step": 11870 }, { "epoch": 0.2710967094153621, "grad_norm": 2.734375, "learning_rate": 1.5029054038158422e-05, "loss": 1.2611, "step": 11880 }, { "epoch": 0.2713249052987084, "grad_norm": 2.875, "learning_rate": 1.5024348930764345e-05, "loss": 1.1934, "step": 11890 }, { "epoch": 0.27155310118205467, "grad_norm": 2.8125, "learning_rate": 1.5019643823370269e-05, "loss": 1.164, "step": 11900 }, { "epoch": 0.27178129706540094, "grad_norm": 2.90625, "learning_rate": 1.5014938715976193e-05, "loss": 1.2064, "step": 11910 }, { "epoch": 0.2720094929487472, "grad_norm": 3.078125, "learning_rate": 1.5010233608582116e-05, "loss": 1.2058, "step": 11920 }, { "epoch": 0.2722376888320935, "grad_norm": 2.625, "learning_rate": 1.500552850118804e-05, "loss": 1.2112, "step": 11930 }, { "epoch": 0.27246588471543975, "grad_norm": 3.265625, "learning_rate": 1.5000823393793964e-05, "loss": 1.2141, "step": 11940 }, { "epoch": 0.27269408059878597, "grad_norm": 3.125, "learning_rate": 1.4996118286399888e-05, "loss": 1.1945, "step": 11950 }, { "epoch": 0.27292227648213224, "grad_norm": 3.0, "learning_rate": 1.4991413179005811e-05, "loss": 1.1971, "step": 11960 }, { "epoch": 0.2731504723654785, "grad_norm": 2.859375, "learning_rate": 1.4986708071611735e-05, "loss": 1.2377, "step": 11970 }, { "epoch": 0.2733786682488248, "grad_norm": 2.9375, "learning_rate": 1.4982002964217659e-05, "loss": 1.2027, "step": 11980 }, { "epoch": 0.27360686413217106, "grad_norm": 3.015625, "learning_rate": 1.4977297856823582e-05, "loss": 1.1486, "step": 11990 }, { "epoch": 0.27383506001551733, "grad_norm": 3.109375, "learning_rate": 1.4972592749429506e-05, "loss": 1.2413, "step": 12000 }, { "epoch": 0.2740632558988636, "grad_norm": 3.34375, "learning_rate": 1.496788764203543e-05, "loss": 1.2195, "step": 12010 }, { "epoch": 0.27429145178220987, "grad_norm": 3.25, "learning_rate": 1.4963182534641354e-05, "loss": 1.2176, "step": 12020 }, { "epoch": 0.2745196476655561, "grad_norm": 3.234375, "learning_rate": 1.4958477427247277e-05, "loss": 1.1863, "step": 12030 }, { "epoch": 0.27474784354890236, "grad_norm": 3.0625, "learning_rate": 1.4953772319853201e-05, "loss": 1.2275, "step": 12040 }, { "epoch": 0.27497603943224863, "grad_norm": 3.546875, "learning_rate": 1.4949067212459125e-05, "loss": 1.237, "step": 12050 }, { "epoch": 0.2752042353155949, "grad_norm": 2.90625, "learning_rate": 1.4944362105065048e-05, "loss": 1.2495, "step": 12060 }, { "epoch": 0.2754324311989412, "grad_norm": 2.96875, "learning_rate": 1.4939656997670972e-05, "loss": 1.2489, "step": 12070 }, { "epoch": 0.27566062708228745, "grad_norm": 2.984375, "learning_rate": 1.4934951890276896e-05, "loss": 1.2296, "step": 12080 }, { "epoch": 0.2758888229656337, "grad_norm": 3.203125, "learning_rate": 1.493024678288282e-05, "loss": 1.1958, "step": 12090 }, { "epoch": 0.27611701884898, "grad_norm": 3.25, "learning_rate": 1.4925541675488743e-05, "loss": 1.22, "step": 12100 }, { "epoch": 0.2763452147323262, "grad_norm": 2.84375, "learning_rate": 1.4920836568094667e-05, "loss": 1.2326, "step": 12110 }, { "epoch": 0.2765734106156725, "grad_norm": 3.265625, "learning_rate": 1.491613146070059e-05, "loss": 1.2199, "step": 12120 }, { "epoch": 0.27680160649901875, "grad_norm": 2.765625, "learning_rate": 1.4911426353306514e-05, "loss": 1.1627, "step": 12130 }, { "epoch": 0.277029802382365, "grad_norm": 3.09375, "learning_rate": 1.4906721245912438e-05, "loss": 1.1984, "step": 12140 }, { "epoch": 0.2772579982657113, "grad_norm": 2.984375, "learning_rate": 1.4902016138518362e-05, "loss": 1.2493, "step": 12150 }, { "epoch": 0.27748619414905756, "grad_norm": 3.046875, "learning_rate": 1.4897311031124286e-05, "loss": 1.2463, "step": 12160 }, { "epoch": 0.27771439003240384, "grad_norm": 3.265625, "learning_rate": 1.489260592373021e-05, "loss": 1.2337, "step": 12170 }, { "epoch": 0.2779425859157501, "grad_norm": 3.328125, "learning_rate": 1.4887900816336135e-05, "loss": 1.266, "step": 12180 }, { "epoch": 0.2781707817990963, "grad_norm": 3.296875, "learning_rate": 1.4883195708942058e-05, "loss": 1.2526, "step": 12190 }, { "epoch": 0.2783989776824426, "grad_norm": 2.90625, "learning_rate": 1.4878490601547982e-05, "loss": 1.1954, "step": 12200 }, { "epoch": 0.27862717356578887, "grad_norm": 3.234375, "learning_rate": 1.4873785494153906e-05, "loss": 1.1969, "step": 12210 }, { "epoch": 0.27885536944913514, "grad_norm": 2.78125, "learning_rate": 1.486908038675983e-05, "loss": 1.2085, "step": 12220 }, { "epoch": 0.2790835653324814, "grad_norm": 3.59375, "learning_rate": 1.4864375279365753e-05, "loss": 1.1777, "step": 12230 }, { "epoch": 0.2793117612158277, "grad_norm": 2.90625, "learning_rate": 1.4859670171971677e-05, "loss": 1.2416, "step": 12240 }, { "epoch": 0.27953995709917395, "grad_norm": 2.6875, "learning_rate": 1.48549650645776e-05, "loss": 1.184, "step": 12250 }, { "epoch": 0.27976815298252017, "grad_norm": 2.953125, "learning_rate": 1.4850259957183524e-05, "loss": 1.2507, "step": 12260 }, { "epoch": 0.27999634886586644, "grad_norm": 3.234375, "learning_rate": 1.4845554849789448e-05, "loss": 1.2061, "step": 12270 }, { "epoch": 0.2802245447492127, "grad_norm": 3.0625, "learning_rate": 1.4840849742395372e-05, "loss": 1.2106, "step": 12280 }, { "epoch": 0.280452740632559, "grad_norm": 3.328125, "learning_rate": 1.4836144635001295e-05, "loss": 1.1919, "step": 12290 }, { "epoch": 0.28068093651590525, "grad_norm": 3.03125, "learning_rate": 1.483143952760722e-05, "loss": 1.1927, "step": 12300 }, { "epoch": 0.2809091323992515, "grad_norm": 2.90625, "learning_rate": 1.4826734420213143e-05, "loss": 1.1973, "step": 12310 }, { "epoch": 0.2811373282825978, "grad_norm": 3.15625, "learning_rate": 1.4822029312819067e-05, "loss": 1.1916, "step": 12320 }, { "epoch": 0.28136552416594407, "grad_norm": 3.171875, "learning_rate": 1.481732420542499e-05, "loss": 1.2726, "step": 12330 }, { "epoch": 0.2815937200492903, "grad_norm": 2.921875, "learning_rate": 1.4812619098030914e-05, "loss": 1.2292, "step": 12340 }, { "epoch": 0.28182191593263656, "grad_norm": 2.75, "learning_rate": 1.4807913990636838e-05, "loss": 1.1863, "step": 12350 }, { "epoch": 0.28205011181598283, "grad_norm": 3.421875, "learning_rate": 1.4803208883242761e-05, "loss": 1.2465, "step": 12360 }, { "epoch": 0.2822783076993291, "grad_norm": 3.0, "learning_rate": 1.4798503775848685e-05, "loss": 1.2196, "step": 12370 }, { "epoch": 0.2825065035826754, "grad_norm": 2.8125, "learning_rate": 1.4793798668454609e-05, "loss": 1.2329, "step": 12380 }, { "epoch": 0.28273469946602164, "grad_norm": 2.890625, "learning_rate": 1.4789093561060533e-05, "loss": 1.1639, "step": 12390 }, { "epoch": 0.2829628953493679, "grad_norm": 3.28125, "learning_rate": 1.4784388453666456e-05, "loss": 1.1765, "step": 12400 }, { "epoch": 0.2831910912327142, "grad_norm": 2.875, "learning_rate": 1.477968334627238e-05, "loss": 1.1825, "step": 12410 }, { "epoch": 0.2834192871160604, "grad_norm": 3.015625, "learning_rate": 1.4774978238878304e-05, "loss": 1.1715, "step": 12420 }, { "epoch": 0.2836474829994067, "grad_norm": 2.796875, "learning_rate": 1.4770273131484227e-05, "loss": 1.1939, "step": 12430 }, { "epoch": 0.28387567888275295, "grad_norm": 2.96875, "learning_rate": 1.4765568024090151e-05, "loss": 1.2408, "step": 12440 }, { "epoch": 0.2841038747660992, "grad_norm": 3.234375, "learning_rate": 1.4760862916696075e-05, "loss": 1.2935, "step": 12450 }, { "epoch": 0.2843320706494455, "grad_norm": 3.09375, "learning_rate": 1.4756157809301999e-05, "loss": 1.2226, "step": 12460 }, { "epoch": 0.28456026653279176, "grad_norm": 2.84375, "learning_rate": 1.4751452701907922e-05, "loss": 1.1737, "step": 12470 }, { "epoch": 0.28478846241613803, "grad_norm": 3.15625, "learning_rate": 1.4746747594513846e-05, "loss": 1.2135, "step": 12480 }, { "epoch": 0.2850166582994843, "grad_norm": 3.328125, "learning_rate": 1.474204248711977e-05, "loss": 1.2333, "step": 12490 }, { "epoch": 0.2852448541828305, "grad_norm": 2.796875, "learning_rate": 1.4737337379725693e-05, "loss": 1.2297, "step": 12500 }, { "epoch": 0.2854730500661768, "grad_norm": 3.390625, "learning_rate": 1.4732632272331617e-05, "loss": 1.2076, "step": 12510 }, { "epoch": 0.28570124594952306, "grad_norm": 2.84375, "learning_rate": 1.4727927164937541e-05, "loss": 1.2363, "step": 12520 }, { "epoch": 0.28592944183286934, "grad_norm": 2.953125, "learning_rate": 1.4723222057543465e-05, "loss": 1.1981, "step": 12530 }, { "epoch": 0.2861576377162156, "grad_norm": 3.015625, "learning_rate": 1.4718516950149388e-05, "loss": 1.2356, "step": 12540 }, { "epoch": 0.2863858335995619, "grad_norm": 3.265625, "learning_rate": 1.4713811842755312e-05, "loss": 1.2562, "step": 12550 }, { "epoch": 0.28661402948290815, "grad_norm": 2.90625, "learning_rate": 1.4709106735361236e-05, "loss": 1.1886, "step": 12560 }, { "epoch": 0.28684222536625437, "grad_norm": 3.171875, "learning_rate": 1.470440162796716e-05, "loss": 1.2359, "step": 12570 }, { "epoch": 0.28707042124960064, "grad_norm": 3.4375, "learning_rate": 1.4699696520573083e-05, "loss": 1.1782, "step": 12580 }, { "epoch": 0.2872986171329469, "grad_norm": 2.96875, "learning_rate": 1.4694991413179007e-05, "loss": 1.1736, "step": 12590 }, { "epoch": 0.2875268130162932, "grad_norm": 2.734375, "learning_rate": 1.469028630578493e-05, "loss": 1.2025, "step": 12600 }, { "epoch": 0.28775500889963945, "grad_norm": 3.109375, "learning_rate": 1.4685581198390854e-05, "loss": 1.1978, "step": 12610 }, { "epoch": 0.2879832047829857, "grad_norm": 3.046875, "learning_rate": 1.4680876090996778e-05, "loss": 1.1907, "step": 12620 }, { "epoch": 0.288211400666332, "grad_norm": 3.0, "learning_rate": 1.4676170983602702e-05, "loss": 1.2286, "step": 12630 }, { "epoch": 0.28843959654967827, "grad_norm": 3.15625, "learning_rate": 1.4671465876208625e-05, "loss": 1.2031, "step": 12640 }, { "epoch": 0.2886677924330245, "grad_norm": 3.046875, "learning_rate": 1.4666760768814549e-05, "loss": 1.2198, "step": 12650 }, { "epoch": 0.28889598831637076, "grad_norm": 2.890625, "learning_rate": 1.4662055661420473e-05, "loss": 1.1942, "step": 12660 }, { "epoch": 0.289124184199717, "grad_norm": 3.140625, "learning_rate": 1.4657350554026397e-05, "loss": 1.2404, "step": 12670 }, { "epoch": 0.2893523800830633, "grad_norm": 3.1875, "learning_rate": 1.465264544663232e-05, "loss": 1.2416, "step": 12680 }, { "epoch": 0.28958057596640957, "grad_norm": 3.109375, "learning_rate": 1.4647940339238244e-05, "loss": 1.2627, "step": 12690 }, { "epoch": 0.28980877184975584, "grad_norm": 2.5625, "learning_rate": 1.4643235231844168e-05, "loss": 1.1792, "step": 12700 }, { "epoch": 0.2900369677331021, "grad_norm": 2.796875, "learning_rate": 1.4638530124450091e-05, "loss": 1.1731, "step": 12710 }, { "epoch": 0.2902651636164484, "grad_norm": 3.1875, "learning_rate": 1.4633825017056015e-05, "loss": 1.2295, "step": 12720 }, { "epoch": 0.2904933594997946, "grad_norm": 3.03125, "learning_rate": 1.4629119909661939e-05, "loss": 1.259, "step": 12730 }, { "epoch": 0.2907215553831409, "grad_norm": 2.8125, "learning_rate": 1.4624414802267863e-05, "loss": 1.1758, "step": 12740 }, { "epoch": 0.29094975126648714, "grad_norm": 3.0625, "learning_rate": 1.4619709694873788e-05, "loss": 1.1624, "step": 12750 }, { "epoch": 0.2911779471498334, "grad_norm": 2.984375, "learning_rate": 1.4615004587479712e-05, "loss": 1.2543, "step": 12760 }, { "epoch": 0.2914061430331797, "grad_norm": 2.875, "learning_rate": 1.4610299480085635e-05, "loss": 1.2456, "step": 12770 }, { "epoch": 0.29163433891652596, "grad_norm": 2.96875, "learning_rate": 1.4605594372691559e-05, "loss": 1.166, "step": 12780 }, { "epoch": 0.29186253479987223, "grad_norm": 3.078125, "learning_rate": 1.4600889265297483e-05, "loss": 1.2313, "step": 12790 }, { "epoch": 0.2920907306832185, "grad_norm": 3.28125, "learning_rate": 1.4596184157903407e-05, "loss": 1.1835, "step": 12800 }, { "epoch": 0.2923189265665647, "grad_norm": 3.15625, "learning_rate": 1.459147905050933e-05, "loss": 1.2022, "step": 12810 }, { "epoch": 0.292547122449911, "grad_norm": 3.0625, "learning_rate": 1.4586773943115254e-05, "loss": 1.2939, "step": 12820 }, { "epoch": 0.29277531833325726, "grad_norm": 2.625, "learning_rate": 1.4582068835721178e-05, "loss": 1.1899, "step": 12830 }, { "epoch": 0.29300351421660353, "grad_norm": 3.15625, "learning_rate": 1.4577363728327101e-05, "loss": 1.206, "step": 12840 }, { "epoch": 0.2932317100999498, "grad_norm": 3.078125, "learning_rate": 1.4572658620933025e-05, "loss": 1.1752, "step": 12850 }, { "epoch": 0.2934599059832961, "grad_norm": 3.09375, "learning_rate": 1.4567953513538949e-05, "loss": 1.2518, "step": 12860 }, { "epoch": 0.29368810186664235, "grad_norm": 3.125, "learning_rate": 1.4563248406144872e-05, "loss": 1.1751, "step": 12870 }, { "epoch": 0.29391629774998856, "grad_norm": 2.96875, "learning_rate": 1.4558543298750796e-05, "loss": 1.1864, "step": 12880 }, { "epoch": 0.29414449363333484, "grad_norm": 3.234375, "learning_rate": 1.455383819135672e-05, "loss": 1.2198, "step": 12890 }, { "epoch": 0.2943726895166811, "grad_norm": 3.140625, "learning_rate": 1.4549133083962644e-05, "loss": 1.1407, "step": 12900 }, { "epoch": 0.2946008854000274, "grad_norm": 3.484375, "learning_rate": 1.4544427976568567e-05, "loss": 1.2192, "step": 12910 }, { "epoch": 0.29482908128337365, "grad_norm": 3.421875, "learning_rate": 1.4539722869174491e-05, "loss": 1.2513, "step": 12920 }, { "epoch": 0.2950572771667199, "grad_norm": 2.71875, "learning_rate": 1.4535017761780415e-05, "loss": 1.2101, "step": 12930 }, { "epoch": 0.2952854730500662, "grad_norm": 3.09375, "learning_rate": 1.4530312654386338e-05, "loss": 1.2363, "step": 12940 }, { "epoch": 0.29551366893341247, "grad_norm": 3.125, "learning_rate": 1.4525607546992262e-05, "loss": 1.2475, "step": 12950 }, { "epoch": 0.2957418648167587, "grad_norm": 3.125, "learning_rate": 1.4520902439598186e-05, "loss": 1.2431, "step": 12960 }, { "epoch": 0.29597006070010495, "grad_norm": 2.671875, "learning_rate": 1.451619733220411e-05, "loss": 1.1809, "step": 12970 }, { "epoch": 0.2961982565834512, "grad_norm": 3.125, "learning_rate": 1.4511492224810033e-05, "loss": 1.2512, "step": 12980 }, { "epoch": 0.2964264524667975, "grad_norm": 2.796875, "learning_rate": 1.4506787117415957e-05, "loss": 1.2085, "step": 12990 }, { "epoch": 0.29665464835014377, "grad_norm": 3.09375, "learning_rate": 1.450208201002188e-05, "loss": 1.1726, "step": 13000 }, { "epoch": 0.29688284423349004, "grad_norm": 3.15625, "learning_rate": 1.4497376902627804e-05, "loss": 1.2154, "step": 13010 }, { "epoch": 0.2971110401168363, "grad_norm": 3.09375, "learning_rate": 1.4492671795233728e-05, "loss": 1.2204, "step": 13020 }, { "epoch": 0.2973392360001826, "grad_norm": 3.015625, "learning_rate": 1.4487966687839652e-05, "loss": 1.1929, "step": 13030 }, { "epoch": 0.2975674318835288, "grad_norm": 2.75, "learning_rate": 1.4483261580445576e-05, "loss": 1.221, "step": 13040 }, { "epoch": 0.29779562776687507, "grad_norm": 3.15625, "learning_rate": 1.44785564730515e-05, "loss": 1.2341, "step": 13050 }, { "epoch": 0.29802382365022134, "grad_norm": 2.984375, "learning_rate": 1.4473851365657423e-05, "loss": 1.1667, "step": 13060 }, { "epoch": 0.2982520195335676, "grad_norm": 2.984375, "learning_rate": 1.4469146258263347e-05, "loss": 1.1911, "step": 13070 }, { "epoch": 0.2984802154169139, "grad_norm": 3.390625, "learning_rate": 1.446444115086927e-05, "loss": 1.1971, "step": 13080 }, { "epoch": 0.29870841130026016, "grad_norm": 3.3125, "learning_rate": 1.4459736043475194e-05, "loss": 1.1981, "step": 13090 }, { "epoch": 0.29893660718360643, "grad_norm": 3.078125, "learning_rate": 1.4455030936081118e-05, "loss": 1.2091, "step": 13100 }, { "epoch": 0.29916480306695264, "grad_norm": 2.84375, "learning_rate": 1.4450325828687042e-05, "loss": 1.2005, "step": 13110 }, { "epoch": 0.2993929989502989, "grad_norm": 3.046875, "learning_rate": 1.4445620721292965e-05, "loss": 1.1926, "step": 13120 }, { "epoch": 0.2996211948336452, "grad_norm": 3.171875, "learning_rate": 1.4440915613898889e-05, "loss": 1.2368, "step": 13130 }, { "epoch": 0.29984939071699146, "grad_norm": 3.09375, "learning_rate": 1.4436210506504813e-05, "loss": 1.1925, "step": 13140 }, { "epoch": 0.30007758660033773, "grad_norm": 3.296875, "learning_rate": 1.4431505399110736e-05, "loss": 1.2562, "step": 13150 }, { "epoch": 0.300305782483684, "grad_norm": 2.953125, "learning_rate": 1.442680029171666e-05, "loss": 1.2001, "step": 13160 }, { "epoch": 0.3005339783670303, "grad_norm": 2.984375, "learning_rate": 1.4422095184322584e-05, "loss": 1.1501, "step": 13170 }, { "epoch": 0.30076217425037655, "grad_norm": 3.0625, "learning_rate": 1.4417390076928508e-05, "loss": 1.181, "step": 13180 }, { "epoch": 0.30099037013372276, "grad_norm": 2.96875, "learning_rate": 1.4412684969534431e-05, "loss": 1.1905, "step": 13190 }, { "epoch": 0.30121856601706903, "grad_norm": 3.015625, "learning_rate": 1.4407979862140355e-05, "loss": 1.2101, "step": 13200 }, { "epoch": 0.3014467619004153, "grad_norm": 3.1875, "learning_rate": 1.4403274754746279e-05, "loss": 1.162, "step": 13210 }, { "epoch": 0.3016749577837616, "grad_norm": 2.953125, "learning_rate": 1.4398569647352202e-05, "loss": 1.1661, "step": 13220 }, { "epoch": 0.30190315366710785, "grad_norm": 2.703125, "learning_rate": 1.4393864539958126e-05, "loss": 1.1984, "step": 13230 }, { "epoch": 0.3021313495504541, "grad_norm": 2.78125, "learning_rate": 1.438915943256405e-05, "loss": 1.148, "step": 13240 }, { "epoch": 0.3023595454338004, "grad_norm": 2.734375, "learning_rate": 1.4384454325169974e-05, "loss": 1.2644, "step": 13250 }, { "epoch": 0.30258774131714666, "grad_norm": 3.015625, "learning_rate": 1.4379749217775897e-05, "loss": 1.2437, "step": 13260 }, { "epoch": 0.3028159372004929, "grad_norm": 3.453125, "learning_rate": 1.4375044110381821e-05, "loss": 1.2274, "step": 13270 }, { "epoch": 0.30304413308383915, "grad_norm": 2.84375, "learning_rate": 1.4370339002987745e-05, "loss": 1.1997, "step": 13280 }, { "epoch": 0.3032723289671854, "grad_norm": 2.953125, "learning_rate": 1.4365633895593668e-05, "loss": 1.1997, "step": 13290 }, { "epoch": 0.3035005248505317, "grad_norm": 3.078125, "learning_rate": 1.4360928788199592e-05, "loss": 1.1628, "step": 13300 }, { "epoch": 0.30372872073387797, "grad_norm": 3.15625, "learning_rate": 1.4356223680805516e-05, "loss": 1.2071, "step": 13310 }, { "epoch": 0.30395691661722424, "grad_norm": 3.359375, "learning_rate": 1.435151857341144e-05, "loss": 1.1921, "step": 13320 }, { "epoch": 0.3041851125005705, "grad_norm": 3.03125, "learning_rate": 1.4346813466017362e-05, "loss": 1.2044, "step": 13330 }, { "epoch": 0.3044133083839168, "grad_norm": 3.171875, "learning_rate": 1.4342108358623285e-05, "loss": 1.2484, "step": 13340 }, { "epoch": 0.304641504267263, "grad_norm": 3.25, "learning_rate": 1.4337403251229209e-05, "loss": 1.187, "step": 13350 }, { "epoch": 0.30486970015060927, "grad_norm": 2.953125, "learning_rate": 1.4332698143835133e-05, "loss": 1.1798, "step": 13360 }, { "epoch": 0.30509789603395554, "grad_norm": 3.171875, "learning_rate": 1.4327993036441056e-05, "loss": 1.2057, "step": 13370 }, { "epoch": 0.3053260919173018, "grad_norm": 3.046875, "learning_rate": 1.432328792904698e-05, "loss": 1.2144, "step": 13380 }, { "epoch": 0.3055542878006481, "grad_norm": 3.71875, "learning_rate": 1.4318582821652904e-05, "loss": 1.2433, "step": 13390 }, { "epoch": 0.30578248368399435, "grad_norm": 2.96875, "learning_rate": 1.4313877714258828e-05, "loss": 1.2052, "step": 13400 }, { "epoch": 0.3060106795673406, "grad_norm": 3.109375, "learning_rate": 1.4309172606864751e-05, "loss": 1.1657, "step": 13410 }, { "epoch": 0.30623887545068684, "grad_norm": 3.203125, "learning_rate": 1.4304467499470675e-05, "loss": 1.2071, "step": 13420 }, { "epoch": 0.3064670713340331, "grad_norm": 3.125, "learning_rate": 1.4299762392076599e-05, "loss": 1.1904, "step": 13430 }, { "epoch": 0.3066952672173794, "grad_norm": 2.953125, "learning_rate": 1.4295057284682522e-05, "loss": 1.2732, "step": 13440 }, { "epoch": 0.30692346310072566, "grad_norm": 3.03125, "learning_rate": 1.4290352177288446e-05, "loss": 1.1841, "step": 13450 }, { "epoch": 0.30715165898407193, "grad_norm": 2.765625, "learning_rate": 1.428564706989437e-05, "loss": 1.2394, "step": 13460 }, { "epoch": 0.3073798548674182, "grad_norm": 3.09375, "learning_rate": 1.4280941962500294e-05, "loss": 1.2529, "step": 13470 }, { "epoch": 0.3076080507507645, "grad_norm": 2.96875, "learning_rate": 1.4276236855106217e-05, "loss": 1.2395, "step": 13480 }, { "epoch": 0.30783624663411074, "grad_norm": 2.90625, "learning_rate": 1.4271531747712141e-05, "loss": 1.1967, "step": 13490 }, { "epoch": 0.30806444251745696, "grad_norm": 2.890625, "learning_rate": 1.4266826640318065e-05, "loss": 1.1972, "step": 13500 }, { "epoch": 0.30829263840080323, "grad_norm": 3.078125, "learning_rate": 1.426212153292399e-05, "loss": 1.258, "step": 13510 }, { "epoch": 0.3085208342841495, "grad_norm": 3.25, "learning_rate": 1.4257416425529914e-05, "loss": 1.1771, "step": 13520 }, { "epoch": 0.3087490301674958, "grad_norm": 3.25, "learning_rate": 1.4252711318135838e-05, "loss": 1.2233, "step": 13530 }, { "epoch": 0.30897722605084205, "grad_norm": 2.890625, "learning_rate": 1.4248006210741761e-05, "loss": 1.2073, "step": 13540 }, { "epoch": 0.3092054219341883, "grad_norm": 2.9375, "learning_rate": 1.4243301103347685e-05, "loss": 1.2742, "step": 13550 }, { "epoch": 0.3094336178175346, "grad_norm": 2.921875, "learning_rate": 1.4238595995953609e-05, "loss": 1.2068, "step": 13560 }, { "epoch": 0.30966181370088086, "grad_norm": 2.859375, "learning_rate": 1.4233890888559532e-05, "loss": 1.2147, "step": 13570 }, { "epoch": 0.3098900095842271, "grad_norm": 3.171875, "learning_rate": 1.4229185781165456e-05, "loss": 1.184, "step": 13580 }, { "epoch": 0.31011820546757335, "grad_norm": 2.765625, "learning_rate": 1.422448067377138e-05, "loss": 1.2224, "step": 13590 }, { "epoch": 0.3103464013509196, "grad_norm": 3.21875, "learning_rate": 1.4219775566377304e-05, "loss": 1.1943, "step": 13600 }, { "epoch": 0.3105745972342659, "grad_norm": 2.828125, "learning_rate": 1.4215070458983227e-05, "loss": 1.1859, "step": 13610 }, { "epoch": 0.31080279311761216, "grad_norm": 3.015625, "learning_rate": 1.4210365351589151e-05, "loss": 1.1881, "step": 13620 }, { "epoch": 0.31103098900095844, "grad_norm": 2.921875, "learning_rate": 1.4205660244195075e-05, "loss": 1.2548, "step": 13630 }, { "epoch": 0.3112591848843047, "grad_norm": 3.296875, "learning_rate": 1.4200955136800998e-05, "loss": 1.2202, "step": 13640 }, { "epoch": 0.311487380767651, "grad_norm": 2.609375, "learning_rate": 1.4196250029406922e-05, "loss": 1.1774, "step": 13650 }, { "epoch": 0.3117155766509972, "grad_norm": 3.078125, "learning_rate": 1.4191544922012846e-05, "loss": 1.213, "step": 13660 }, { "epoch": 0.31194377253434347, "grad_norm": 3.265625, "learning_rate": 1.418683981461877e-05, "loss": 1.2132, "step": 13670 }, { "epoch": 0.31217196841768974, "grad_norm": 2.84375, "learning_rate": 1.4182134707224693e-05, "loss": 1.2325, "step": 13680 }, { "epoch": 0.312400164301036, "grad_norm": 3.0, "learning_rate": 1.4177429599830617e-05, "loss": 1.2184, "step": 13690 }, { "epoch": 0.3126283601843823, "grad_norm": 3.1875, "learning_rate": 1.417272449243654e-05, "loss": 1.1969, "step": 13700 }, { "epoch": 0.31285655606772855, "grad_norm": 2.9375, "learning_rate": 1.4168019385042464e-05, "loss": 1.2356, "step": 13710 }, { "epoch": 0.3130847519510748, "grad_norm": 2.9375, "learning_rate": 1.4163314277648388e-05, "loss": 1.1178, "step": 13720 }, { "epoch": 0.31331294783442104, "grad_norm": 3.109375, "learning_rate": 1.4158609170254312e-05, "loss": 1.2411, "step": 13730 }, { "epoch": 0.3135411437177673, "grad_norm": 3.125, "learning_rate": 1.4153904062860235e-05, "loss": 1.22, "step": 13740 }, { "epoch": 0.3137693396011136, "grad_norm": 2.796875, "learning_rate": 1.414919895546616e-05, "loss": 1.1801, "step": 13750 }, { "epoch": 0.31399753548445986, "grad_norm": 2.84375, "learning_rate": 1.4144493848072083e-05, "loss": 1.1971, "step": 13760 }, { "epoch": 0.3142257313678061, "grad_norm": 3.28125, "learning_rate": 1.4139788740678007e-05, "loss": 1.231, "step": 13770 }, { "epoch": 0.3144539272511524, "grad_norm": 2.96875, "learning_rate": 1.413508363328393e-05, "loss": 1.2077, "step": 13780 }, { "epoch": 0.31468212313449867, "grad_norm": 3.140625, "learning_rate": 1.4130378525889854e-05, "loss": 1.1675, "step": 13790 }, { "epoch": 0.31491031901784494, "grad_norm": 3.109375, "learning_rate": 1.4125673418495778e-05, "loss": 1.2069, "step": 13800 }, { "epoch": 0.31513851490119116, "grad_norm": 2.859375, "learning_rate": 1.4120968311101701e-05, "loss": 1.1784, "step": 13810 }, { "epoch": 0.31536671078453743, "grad_norm": 2.96875, "learning_rate": 1.4116263203707625e-05, "loss": 1.2077, "step": 13820 }, { "epoch": 0.3155949066678837, "grad_norm": 3.0625, "learning_rate": 1.4111558096313549e-05, "loss": 1.2111, "step": 13830 }, { "epoch": 0.31582310255123, "grad_norm": 2.96875, "learning_rate": 1.4106852988919473e-05, "loss": 1.1521, "step": 13840 }, { "epoch": 0.31605129843457624, "grad_norm": 2.90625, "learning_rate": 1.4102147881525396e-05, "loss": 1.2157, "step": 13850 }, { "epoch": 0.3162794943179225, "grad_norm": 3.09375, "learning_rate": 1.409744277413132e-05, "loss": 1.2575, "step": 13860 }, { "epoch": 0.3165076902012688, "grad_norm": 3.015625, "learning_rate": 1.4092737666737244e-05, "loss": 1.2166, "step": 13870 }, { "epoch": 0.31673588608461506, "grad_norm": 3.140625, "learning_rate": 1.4088032559343167e-05, "loss": 1.1903, "step": 13880 }, { "epoch": 0.3169640819679613, "grad_norm": 3.25, "learning_rate": 1.4083327451949091e-05, "loss": 1.2131, "step": 13890 }, { "epoch": 0.31719227785130755, "grad_norm": 2.9375, "learning_rate": 1.4078622344555015e-05, "loss": 1.2064, "step": 13900 }, { "epoch": 0.3174204737346538, "grad_norm": 3.046875, "learning_rate": 1.4073917237160939e-05, "loss": 1.2113, "step": 13910 }, { "epoch": 0.3176486696180001, "grad_norm": 3.25, "learning_rate": 1.4069212129766862e-05, "loss": 1.1819, "step": 13920 }, { "epoch": 0.31787686550134636, "grad_norm": 3.0625, "learning_rate": 1.4064507022372786e-05, "loss": 1.215, "step": 13930 }, { "epoch": 0.31810506138469263, "grad_norm": 3.125, "learning_rate": 1.405980191497871e-05, "loss": 1.2316, "step": 13940 }, { "epoch": 0.3183332572680389, "grad_norm": 2.75, "learning_rate": 1.4055096807584633e-05, "loss": 1.1833, "step": 13950 }, { "epoch": 0.3185614531513852, "grad_norm": 2.90625, "learning_rate": 1.4050391700190557e-05, "loss": 1.1783, "step": 13960 }, { "epoch": 0.3187896490347314, "grad_norm": 2.984375, "learning_rate": 1.4045686592796481e-05, "loss": 1.2239, "step": 13970 }, { "epoch": 0.31901784491807766, "grad_norm": 2.84375, "learning_rate": 1.4040981485402405e-05, "loss": 1.1857, "step": 13980 }, { "epoch": 0.31924604080142394, "grad_norm": 3.046875, "learning_rate": 1.4036276378008328e-05, "loss": 1.1989, "step": 13990 }, { "epoch": 0.3194742366847702, "grad_norm": 2.90625, "learning_rate": 1.4031571270614252e-05, "loss": 1.242, "step": 14000 }, { "epoch": 0.3197024325681165, "grad_norm": 2.875, "learning_rate": 1.4026866163220176e-05, "loss": 1.1945, "step": 14010 }, { "epoch": 0.31993062845146275, "grad_norm": 3.515625, "learning_rate": 1.40221610558261e-05, "loss": 1.2098, "step": 14020 }, { "epoch": 0.320158824334809, "grad_norm": 2.859375, "learning_rate": 1.4017455948432023e-05, "loss": 1.1866, "step": 14030 }, { "epoch": 0.32038702021815524, "grad_norm": 3.140625, "learning_rate": 1.4012750841037947e-05, "loss": 1.2233, "step": 14040 }, { "epoch": 0.3206152161015015, "grad_norm": 3.046875, "learning_rate": 1.400804573364387e-05, "loss": 1.2092, "step": 14050 }, { "epoch": 0.3208434119848478, "grad_norm": 3.28125, "learning_rate": 1.4003340626249794e-05, "loss": 1.1528, "step": 14060 }, { "epoch": 0.32107160786819405, "grad_norm": 3.15625, "learning_rate": 1.3998635518855718e-05, "loss": 1.1615, "step": 14070 }, { "epoch": 0.3212998037515403, "grad_norm": 3.359375, "learning_rate": 1.3993930411461643e-05, "loss": 1.1604, "step": 14080 }, { "epoch": 0.3215279996348866, "grad_norm": 3.09375, "learning_rate": 1.3989225304067567e-05, "loss": 1.2487, "step": 14090 }, { "epoch": 0.32175619551823287, "grad_norm": 2.984375, "learning_rate": 1.398452019667349e-05, "loss": 1.1556, "step": 14100 }, { "epoch": 0.32198439140157914, "grad_norm": 2.921875, "learning_rate": 1.3979815089279415e-05, "loss": 1.1736, "step": 14110 }, { "epoch": 0.32221258728492536, "grad_norm": 2.90625, "learning_rate": 1.3975109981885338e-05, "loss": 1.2091, "step": 14120 }, { "epoch": 0.3224407831682716, "grad_norm": 3.328125, "learning_rate": 1.3970404874491262e-05, "loss": 1.2154, "step": 14130 }, { "epoch": 0.3226689790516179, "grad_norm": 2.953125, "learning_rate": 1.3965699767097186e-05, "loss": 1.2194, "step": 14140 }, { "epoch": 0.32289717493496417, "grad_norm": 3.078125, "learning_rate": 1.396099465970311e-05, "loss": 1.204, "step": 14150 }, { "epoch": 0.32312537081831044, "grad_norm": 3.046875, "learning_rate": 1.3956289552309033e-05, "loss": 1.2108, "step": 14160 }, { "epoch": 0.3233535667016567, "grad_norm": 3.09375, "learning_rate": 1.3951584444914957e-05, "loss": 1.2283, "step": 14170 }, { "epoch": 0.323581762585003, "grad_norm": 3.5, "learning_rate": 1.394687933752088e-05, "loss": 1.1934, "step": 14180 }, { "epoch": 0.32380995846834926, "grad_norm": 3.34375, "learning_rate": 1.3942174230126804e-05, "loss": 1.2406, "step": 14190 }, { "epoch": 0.3240381543516955, "grad_norm": 3.4375, "learning_rate": 1.3937469122732728e-05, "loss": 1.2024, "step": 14200 }, { "epoch": 0.32426635023504174, "grad_norm": 3.03125, "learning_rate": 1.3932764015338652e-05, "loss": 1.207, "step": 14210 }, { "epoch": 0.324494546118388, "grad_norm": 2.9375, "learning_rate": 1.3928058907944575e-05, "loss": 1.2454, "step": 14220 }, { "epoch": 0.3247227420017343, "grad_norm": 3.125, "learning_rate": 1.3923353800550499e-05, "loss": 1.1808, "step": 14230 }, { "epoch": 0.32495093788508056, "grad_norm": 3.03125, "learning_rate": 1.3918648693156423e-05, "loss": 1.1872, "step": 14240 }, { "epoch": 0.32517913376842683, "grad_norm": 3.1875, "learning_rate": 1.3913943585762347e-05, "loss": 1.2113, "step": 14250 }, { "epoch": 0.3254073296517731, "grad_norm": 3.21875, "learning_rate": 1.390923847836827e-05, "loss": 1.228, "step": 14260 }, { "epoch": 0.3256355255351193, "grad_norm": 3.125, "learning_rate": 1.3904533370974194e-05, "loss": 1.2244, "step": 14270 }, { "epoch": 0.3258637214184656, "grad_norm": 3.734375, "learning_rate": 1.3899828263580118e-05, "loss": 1.2005, "step": 14280 }, { "epoch": 0.32609191730181186, "grad_norm": 2.796875, "learning_rate": 1.3895123156186041e-05, "loss": 1.2011, "step": 14290 }, { "epoch": 0.32632011318515813, "grad_norm": 3.0625, "learning_rate": 1.3890418048791965e-05, "loss": 1.1483, "step": 14300 }, { "epoch": 0.3265483090685044, "grad_norm": 4.59375, "learning_rate": 1.3885712941397889e-05, "loss": 1.2069, "step": 14310 }, { "epoch": 0.3267765049518507, "grad_norm": 2.953125, "learning_rate": 1.3881007834003812e-05, "loss": 1.2382, "step": 14320 }, { "epoch": 0.32700470083519695, "grad_norm": 3.1875, "learning_rate": 1.3876302726609736e-05, "loss": 1.1998, "step": 14330 }, { "epoch": 0.3272328967185432, "grad_norm": 3.046875, "learning_rate": 1.387159761921566e-05, "loss": 1.2174, "step": 14340 }, { "epoch": 0.32746109260188944, "grad_norm": 2.984375, "learning_rate": 1.3866892511821584e-05, "loss": 1.2178, "step": 14350 }, { "epoch": 0.3276892884852357, "grad_norm": 2.96875, "learning_rate": 1.3862187404427507e-05, "loss": 1.2196, "step": 14360 }, { "epoch": 0.327917484368582, "grad_norm": 3.5, "learning_rate": 1.3857482297033431e-05, "loss": 1.1992, "step": 14370 }, { "epoch": 0.32814568025192825, "grad_norm": 3.078125, "learning_rate": 1.3852777189639355e-05, "loss": 1.2199, "step": 14380 }, { "epoch": 0.3283738761352745, "grad_norm": 3.21875, "learning_rate": 1.3848072082245278e-05, "loss": 1.2153, "step": 14390 }, { "epoch": 0.3286020720186208, "grad_norm": 3.0, "learning_rate": 1.3843366974851202e-05, "loss": 1.2228, "step": 14400 }, { "epoch": 0.32883026790196707, "grad_norm": 2.890625, "learning_rate": 1.3838661867457126e-05, "loss": 1.2367, "step": 14410 }, { "epoch": 0.32905846378531334, "grad_norm": 3.03125, "learning_rate": 1.383395676006305e-05, "loss": 1.1341, "step": 14420 }, { "epoch": 0.32928665966865955, "grad_norm": 3.140625, "learning_rate": 1.3829251652668973e-05, "loss": 1.2304, "step": 14430 }, { "epoch": 0.3295148555520058, "grad_norm": 2.890625, "learning_rate": 1.3824546545274897e-05, "loss": 1.2176, "step": 14440 }, { "epoch": 0.3297430514353521, "grad_norm": 3.171875, "learning_rate": 1.381984143788082e-05, "loss": 1.1924, "step": 14450 }, { "epoch": 0.32997124731869837, "grad_norm": 3.234375, "learning_rate": 1.3815136330486744e-05, "loss": 1.1911, "step": 14460 }, { "epoch": 0.33019944320204464, "grad_norm": 2.8125, "learning_rate": 1.3810431223092668e-05, "loss": 1.2127, "step": 14470 }, { "epoch": 0.3304276390853909, "grad_norm": 3.0, "learning_rate": 1.3805726115698592e-05, "loss": 1.2456, "step": 14480 }, { "epoch": 0.3306558349687372, "grad_norm": 2.8125, "learning_rate": 1.3801021008304516e-05, "loss": 1.1711, "step": 14490 }, { "epoch": 0.33088403085208345, "grad_norm": 2.84375, "learning_rate": 1.379631590091044e-05, "loss": 1.1721, "step": 14500 }, { "epoch": 0.33111222673542967, "grad_norm": 2.890625, "learning_rate": 1.3791610793516363e-05, "loss": 1.198, "step": 14510 }, { "epoch": 0.33134042261877594, "grad_norm": 2.78125, "learning_rate": 1.3786905686122287e-05, "loss": 1.2497, "step": 14520 }, { "epoch": 0.3315686185021222, "grad_norm": 3.0, "learning_rate": 1.378220057872821e-05, "loss": 1.2749, "step": 14530 }, { "epoch": 0.3317968143854685, "grad_norm": 2.734375, "learning_rate": 1.3777495471334134e-05, "loss": 1.1486, "step": 14540 }, { "epoch": 0.33202501026881476, "grad_norm": 3.265625, "learning_rate": 1.3772790363940058e-05, "loss": 1.2663, "step": 14550 }, { "epoch": 0.33225320615216103, "grad_norm": 3.5625, "learning_rate": 1.3768085256545982e-05, "loss": 1.1865, "step": 14560 }, { "epoch": 0.3324814020355073, "grad_norm": 3.0, "learning_rate": 1.3763380149151905e-05, "loss": 1.1811, "step": 14570 }, { "epoch": 0.3327095979188535, "grad_norm": 3.09375, "learning_rate": 1.3758675041757829e-05, "loss": 1.2021, "step": 14580 }, { "epoch": 0.3329377938021998, "grad_norm": 3.25, "learning_rate": 1.3753969934363753e-05, "loss": 1.1548, "step": 14590 }, { "epoch": 0.33316598968554606, "grad_norm": 3.21875, "learning_rate": 1.3749264826969676e-05, "loss": 1.1791, "step": 14600 }, { "epoch": 0.33339418556889233, "grad_norm": 2.9375, "learning_rate": 1.37445597195756e-05, "loss": 1.2549, "step": 14610 }, { "epoch": 0.3336223814522386, "grad_norm": 3.140625, "learning_rate": 1.3739854612181524e-05, "loss": 1.1835, "step": 14620 }, { "epoch": 0.3338505773355849, "grad_norm": 3.15625, "learning_rate": 1.3735149504787448e-05, "loss": 1.1639, "step": 14630 }, { "epoch": 0.33407877321893115, "grad_norm": 3.03125, "learning_rate": 1.3730444397393371e-05, "loss": 1.152, "step": 14640 }, { "epoch": 0.3343069691022774, "grad_norm": 3.359375, "learning_rate": 1.3725739289999295e-05, "loss": 1.1824, "step": 14650 }, { "epoch": 0.33453516498562363, "grad_norm": 2.8125, "learning_rate": 1.372103418260522e-05, "loss": 1.191, "step": 14660 }, { "epoch": 0.3347633608689699, "grad_norm": 2.84375, "learning_rate": 1.3716329075211144e-05, "loss": 1.2164, "step": 14670 }, { "epoch": 0.3349915567523162, "grad_norm": 2.96875, "learning_rate": 1.3711623967817068e-05, "loss": 1.2077, "step": 14680 }, { "epoch": 0.33521975263566245, "grad_norm": 3.296875, "learning_rate": 1.3706918860422992e-05, "loss": 1.1623, "step": 14690 }, { "epoch": 0.3354479485190087, "grad_norm": 3.0, "learning_rate": 1.3702213753028915e-05, "loss": 1.1797, "step": 14700 }, { "epoch": 0.335676144402355, "grad_norm": 3.046875, "learning_rate": 1.3697508645634839e-05, "loss": 1.1978, "step": 14710 }, { "epoch": 0.33590434028570126, "grad_norm": 3.1875, "learning_rate": 1.3692803538240763e-05, "loss": 1.1489, "step": 14720 }, { "epoch": 0.33613253616904754, "grad_norm": 2.703125, "learning_rate": 1.3688098430846686e-05, "loss": 1.2125, "step": 14730 }, { "epoch": 0.33636073205239375, "grad_norm": 3.09375, "learning_rate": 1.368339332345261e-05, "loss": 1.1587, "step": 14740 }, { "epoch": 0.33658892793574, "grad_norm": 2.96875, "learning_rate": 1.3678688216058534e-05, "loss": 1.1852, "step": 14750 }, { "epoch": 0.3368171238190863, "grad_norm": 3.28125, "learning_rate": 1.3673983108664458e-05, "loss": 1.2045, "step": 14760 }, { "epoch": 0.33704531970243257, "grad_norm": 3.09375, "learning_rate": 1.3669278001270381e-05, "loss": 1.1876, "step": 14770 }, { "epoch": 0.33727351558577884, "grad_norm": 3.125, "learning_rate": 1.3664572893876305e-05, "loss": 1.2033, "step": 14780 }, { "epoch": 0.3375017114691251, "grad_norm": 3.015625, "learning_rate": 1.3659867786482229e-05, "loss": 1.1502, "step": 14790 }, { "epoch": 0.3377299073524714, "grad_norm": 3.234375, "learning_rate": 1.3655162679088152e-05, "loss": 1.2277, "step": 14800 }, { "epoch": 0.33795810323581765, "grad_norm": 2.96875, "learning_rate": 1.3650457571694076e-05, "loss": 1.2208, "step": 14810 }, { "epoch": 0.33818629911916387, "grad_norm": 3.0625, "learning_rate": 1.36457524643e-05, "loss": 1.1929, "step": 14820 }, { "epoch": 0.33841449500251014, "grad_norm": 2.96875, "learning_rate": 1.3641047356905924e-05, "loss": 1.1762, "step": 14830 }, { "epoch": 0.3386426908858564, "grad_norm": 3.421875, "learning_rate": 1.3636342249511847e-05, "loss": 1.195, "step": 14840 }, { "epoch": 0.3388708867692027, "grad_norm": 2.9375, "learning_rate": 1.3631637142117771e-05, "loss": 1.1806, "step": 14850 }, { "epoch": 0.33909908265254896, "grad_norm": 3.1875, "learning_rate": 1.3626932034723695e-05, "loss": 1.2045, "step": 14860 }, { "epoch": 0.3393272785358952, "grad_norm": 3.453125, "learning_rate": 1.3622226927329618e-05, "loss": 1.2404, "step": 14870 }, { "epoch": 0.3395554744192415, "grad_norm": 3.34375, "learning_rate": 1.3617521819935542e-05, "loss": 1.2099, "step": 14880 }, { "epoch": 0.3397836703025877, "grad_norm": 3.171875, "learning_rate": 1.3612816712541466e-05, "loss": 1.198, "step": 14890 }, { "epoch": 0.340011866185934, "grad_norm": 3.375, "learning_rate": 1.360811160514739e-05, "loss": 1.2063, "step": 14900 }, { "epoch": 0.34024006206928026, "grad_norm": 3.03125, "learning_rate": 1.3603406497753313e-05, "loss": 1.1706, "step": 14910 }, { "epoch": 0.34046825795262653, "grad_norm": 3.1875, "learning_rate": 1.3598701390359237e-05, "loss": 1.2237, "step": 14920 }, { "epoch": 0.3406964538359728, "grad_norm": 3.09375, "learning_rate": 1.359399628296516e-05, "loss": 1.1967, "step": 14930 }, { "epoch": 0.3409246497193191, "grad_norm": 3.09375, "learning_rate": 1.3589291175571084e-05, "loss": 1.2034, "step": 14940 }, { "epoch": 0.34115284560266534, "grad_norm": 2.6875, "learning_rate": 1.3584586068177008e-05, "loss": 1.175, "step": 14950 }, { "epoch": 0.3413810414860116, "grad_norm": 2.890625, "learning_rate": 1.3579880960782932e-05, "loss": 1.1716, "step": 14960 }, { "epoch": 0.34160923736935783, "grad_norm": 3.03125, "learning_rate": 1.3575175853388855e-05, "loss": 1.2291, "step": 14970 }, { "epoch": 0.3418374332527041, "grad_norm": 3.03125, "learning_rate": 1.357047074599478e-05, "loss": 1.1691, "step": 14980 }, { "epoch": 0.3420656291360504, "grad_norm": 3.59375, "learning_rate": 1.3565765638600703e-05, "loss": 1.2115, "step": 14990 }, { "epoch": 0.34229382501939665, "grad_norm": 3.0, "learning_rate": 1.3561060531206627e-05, "loss": 1.1885, "step": 15000 }, { "epoch": 0.3425220209027429, "grad_norm": 3.96875, "learning_rate": 1.355635542381255e-05, "loss": 1.2239, "step": 15010 }, { "epoch": 0.3427502167860892, "grad_norm": 3.0625, "learning_rate": 1.3551650316418474e-05, "loss": 1.1962, "step": 15020 }, { "epoch": 0.34297841266943546, "grad_norm": 3.046875, "learning_rate": 1.3546945209024398e-05, "loss": 1.161, "step": 15030 }, { "epoch": 0.34320660855278173, "grad_norm": 3.15625, "learning_rate": 1.3542240101630321e-05, "loss": 1.2244, "step": 15040 }, { "epoch": 0.34343480443612795, "grad_norm": 3.296875, "learning_rate": 1.3537534994236245e-05, "loss": 1.1894, "step": 15050 }, { "epoch": 0.3436630003194742, "grad_norm": 2.75, "learning_rate": 1.3532829886842169e-05, "loss": 1.2354, "step": 15060 }, { "epoch": 0.3438911962028205, "grad_norm": 3.40625, "learning_rate": 1.3528124779448093e-05, "loss": 1.2192, "step": 15070 }, { "epoch": 0.34411939208616676, "grad_norm": 3.0, "learning_rate": 1.3523419672054016e-05, "loss": 1.2057, "step": 15080 }, { "epoch": 0.34434758796951304, "grad_norm": 2.984375, "learning_rate": 1.351871456465994e-05, "loss": 1.2015, "step": 15090 }, { "epoch": 0.3445757838528593, "grad_norm": 3.328125, "learning_rate": 1.3514009457265864e-05, "loss": 1.1839, "step": 15100 }, { "epoch": 0.3448039797362056, "grad_norm": 3.0, "learning_rate": 1.3509304349871787e-05, "loss": 1.1921, "step": 15110 }, { "epoch": 0.34503217561955185, "grad_norm": 3.234375, "learning_rate": 1.3504599242477711e-05, "loss": 1.1987, "step": 15120 }, { "epoch": 0.34526037150289807, "grad_norm": 3.421875, "learning_rate": 1.3499894135083635e-05, "loss": 1.2054, "step": 15130 }, { "epoch": 0.34548856738624434, "grad_norm": 3.078125, "learning_rate": 1.3495189027689559e-05, "loss": 1.2059, "step": 15140 }, { "epoch": 0.3457167632695906, "grad_norm": 3.171875, "learning_rate": 1.3490483920295482e-05, "loss": 1.2405, "step": 15150 }, { "epoch": 0.3459449591529369, "grad_norm": 2.875, "learning_rate": 1.3485778812901406e-05, "loss": 1.1984, "step": 15160 }, { "epoch": 0.34617315503628315, "grad_norm": 2.953125, "learning_rate": 1.348107370550733e-05, "loss": 1.219, "step": 15170 }, { "epoch": 0.3464013509196294, "grad_norm": 3.109375, "learning_rate": 1.3476368598113252e-05, "loss": 1.2116, "step": 15180 }, { "epoch": 0.3466295468029757, "grad_norm": 2.890625, "learning_rate": 1.3471663490719175e-05, "loss": 1.2076, "step": 15190 }, { "epoch": 0.3468577426863219, "grad_norm": 3.3125, "learning_rate": 1.34669583833251e-05, "loss": 1.1932, "step": 15200 }, { "epoch": 0.3470859385696682, "grad_norm": 2.84375, "learning_rate": 1.3462253275931023e-05, "loss": 1.238, "step": 15210 }, { "epoch": 0.34731413445301446, "grad_norm": 3.046875, "learning_rate": 1.3457548168536947e-05, "loss": 1.2001, "step": 15220 }, { "epoch": 0.3475423303363607, "grad_norm": 3.03125, "learning_rate": 1.345284306114287e-05, "loss": 1.1904, "step": 15230 }, { "epoch": 0.347770526219707, "grad_norm": 3.1875, "learning_rate": 1.3448137953748794e-05, "loss": 1.2016, "step": 15240 }, { "epoch": 0.34799872210305327, "grad_norm": 3.359375, "learning_rate": 1.3443432846354718e-05, "loss": 1.2336, "step": 15250 }, { "epoch": 0.34822691798639954, "grad_norm": 3.125, "learning_rate": 1.3438727738960641e-05, "loss": 1.1785, "step": 15260 }, { "epoch": 0.3484551138697458, "grad_norm": 3.03125, "learning_rate": 1.3434022631566565e-05, "loss": 1.2528, "step": 15270 }, { "epoch": 0.34868330975309203, "grad_norm": 3.1875, "learning_rate": 1.3429317524172489e-05, "loss": 1.2294, "step": 15280 }, { "epoch": 0.3489115056364383, "grad_norm": 3.046875, "learning_rate": 1.3424612416778413e-05, "loss": 1.2062, "step": 15290 }, { "epoch": 0.3491397015197846, "grad_norm": 3.328125, "learning_rate": 1.3419907309384336e-05, "loss": 1.2352, "step": 15300 }, { "epoch": 0.34936789740313084, "grad_norm": 3.5, "learning_rate": 1.341520220199026e-05, "loss": 1.1888, "step": 15310 }, { "epoch": 0.3495960932864771, "grad_norm": 3.484375, "learning_rate": 1.3410497094596184e-05, "loss": 1.2365, "step": 15320 }, { "epoch": 0.3498242891698234, "grad_norm": 3.046875, "learning_rate": 1.3405791987202107e-05, "loss": 1.2024, "step": 15330 }, { "epoch": 0.35005248505316966, "grad_norm": 3.296875, "learning_rate": 1.3401086879808031e-05, "loss": 1.218, "step": 15340 }, { "epoch": 0.35028068093651593, "grad_norm": 2.71875, "learning_rate": 1.3396381772413955e-05, "loss": 1.1904, "step": 15350 }, { "epoch": 0.35050887681986215, "grad_norm": 3.21875, "learning_rate": 1.3391676665019879e-05, "loss": 1.2249, "step": 15360 }, { "epoch": 0.3507370727032084, "grad_norm": 3.265625, "learning_rate": 1.3386971557625802e-05, "loss": 1.2309, "step": 15370 }, { "epoch": 0.3509652685865547, "grad_norm": 3.390625, "learning_rate": 1.3382266450231726e-05, "loss": 1.1939, "step": 15380 }, { "epoch": 0.35119346446990096, "grad_norm": 3.3125, "learning_rate": 1.337756134283765e-05, "loss": 1.2075, "step": 15390 }, { "epoch": 0.35142166035324723, "grad_norm": 3.234375, "learning_rate": 1.3372856235443573e-05, "loss": 1.2009, "step": 15400 }, { "epoch": 0.3516498562365935, "grad_norm": 3.5, "learning_rate": 1.3368151128049499e-05, "loss": 1.2064, "step": 15410 }, { "epoch": 0.3518780521199398, "grad_norm": 2.890625, "learning_rate": 1.3363446020655423e-05, "loss": 1.1637, "step": 15420 }, { "epoch": 0.352106248003286, "grad_norm": 2.796875, "learning_rate": 1.3358740913261346e-05, "loss": 1.2412, "step": 15430 }, { "epoch": 0.35233444388663226, "grad_norm": 3.046875, "learning_rate": 1.335403580586727e-05, "loss": 1.2063, "step": 15440 }, { "epoch": 0.35256263976997854, "grad_norm": 3.203125, "learning_rate": 1.3349330698473194e-05, "loss": 1.2261, "step": 15450 }, { "epoch": 0.3527908356533248, "grad_norm": 2.921875, "learning_rate": 1.3344625591079117e-05, "loss": 1.2231, "step": 15460 }, { "epoch": 0.3530190315366711, "grad_norm": 3.015625, "learning_rate": 1.3339920483685041e-05, "loss": 1.1942, "step": 15470 }, { "epoch": 0.35324722742001735, "grad_norm": 3.265625, "learning_rate": 1.3335215376290965e-05, "loss": 1.1827, "step": 15480 }, { "epoch": 0.3534754233033636, "grad_norm": 3.078125, "learning_rate": 1.3330510268896889e-05, "loss": 1.2152, "step": 15490 }, { "epoch": 0.3537036191867099, "grad_norm": 2.96875, "learning_rate": 1.3325805161502812e-05, "loss": 1.2246, "step": 15500 }, { "epoch": 0.3539318150700561, "grad_norm": 3.09375, "learning_rate": 1.3321100054108736e-05, "loss": 1.2056, "step": 15510 }, { "epoch": 0.3541600109534024, "grad_norm": 3.234375, "learning_rate": 1.331639494671466e-05, "loss": 1.245, "step": 15520 }, { "epoch": 0.35438820683674865, "grad_norm": 2.828125, "learning_rate": 1.3311689839320583e-05, "loss": 1.2286, "step": 15530 }, { "epoch": 0.3546164027200949, "grad_norm": 3.4375, "learning_rate": 1.3306984731926507e-05, "loss": 1.2112, "step": 15540 }, { "epoch": 0.3548445986034412, "grad_norm": 3.40625, "learning_rate": 1.330227962453243e-05, "loss": 1.2159, "step": 15550 }, { "epoch": 0.35507279448678747, "grad_norm": 3.3125, "learning_rate": 1.3297574517138355e-05, "loss": 1.186, "step": 15560 }, { "epoch": 0.35530099037013374, "grad_norm": 2.984375, "learning_rate": 1.3292869409744278e-05, "loss": 1.1474, "step": 15570 }, { "epoch": 0.35552918625348, "grad_norm": 3.140625, "learning_rate": 1.3288164302350202e-05, "loss": 1.212, "step": 15580 }, { "epoch": 0.35575738213682623, "grad_norm": 3.046875, "learning_rate": 1.3283459194956126e-05, "loss": 1.2264, "step": 15590 }, { "epoch": 0.3559855780201725, "grad_norm": 2.9375, "learning_rate": 1.327875408756205e-05, "loss": 1.2205, "step": 15600 }, { "epoch": 0.35621377390351877, "grad_norm": 3.109375, "learning_rate": 1.3274048980167973e-05, "loss": 1.1627, "step": 15610 }, { "epoch": 0.35644196978686504, "grad_norm": 3.03125, "learning_rate": 1.3269343872773897e-05, "loss": 1.185, "step": 15620 }, { "epoch": 0.3566701656702113, "grad_norm": 3.203125, "learning_rate": 1.326463876537982e-05, "loss": 1.2618, "step": 15630 }, { "epoch": 0.3568983615535576, "grad_norm": 2.984375, "learning_rate": 1.3259933657985744e-05, "loss": 1.232, "step": 15640 }, { "epoch": 0.35712655743690386, "grad_norm": 3.0625, "learning_rate": 1.3255228550591668e-05, "loss": 1.2192, "step": 15650 }, { "epoch": 0.35735475332025013, "grad_norm": 3.234375, "learning_rate": 1.3250523443197592e-05, "loss": 1.1644, "step": 15660 }, { "epoch": 0.35758294920359635, "grad_norm": 3.28125, "learning_rate": 1.3245818335803515e-05, "loss": 1.1844, "step": 15670 }, { "epoch": 0.3578111450869426, "grad_norm": 3.15625, "learning_rate": 1.3241113228409439e-05, "loss": 1.1788, "step": 15680 }, { "epoch": 0.3580393409702889, "grad_norm": 3.078125, "learning_rate": 1.3236408121015363e-05, "loss": 1.2106, "step": 15690 }, { "epoch": 0.35826753685363516, "grad_norm": 4.875, "learning_rate": 1.3231703013621286e-05, "loss": 1.1309, "step": 15700 }, { "epoch": 0.35849573273698143, "grad_norm": 2.75, "learning_rate": 1.322699790622721e-05, "loss": 1.2195, "step": 15710 }, { "epoch": 0.3587239286203277, "grad_norm": 2.921875, "learning_rate": 1.3222292798833134e-05, "loss": 1.2395, "step": 15720 }, { "epoch": 0.358952124503674, "grad_norm": 3.15625, "learning_rate": 1.3217587691439058e-05, "loss": 1.1979, "step": 15730 }, { "epoch": 0.3591803203870202, "grad_norm": 3.28125, "learning_rate": 1.3212882584044981e-05, "loss": 1.1978, "step": 15740 }, { "epoch": 0.35940851627036646, "grad_norm": 3.046875, "learning_rate": 1.3208177476650905e-05, "loss": 1.1386, "step": 15750 }, { "epoch": 0.35963671215371273, "grad_norm": 3.125, "learning_rate": 1.3203472369256829e-05, "loss": 1.2753, "step": 15760 }, { "epoch": 0.359864908037059, "grad_norm": 2.984375, "learning_rate": 1.3198767261862752e-05, "loss": 1.1789, "step": 15770 }, { "epoch": 0.3600931039204053, "grad_norm": 3.09375, "learning_rate": 1.3194062154468676e-05, "loss": 1.2123, "step": 15780 }, { "epoch": 0.36032129980375155, "grad_norm": 3.078125, "learning_rate": 1.31893570470746e-05, "loss": 1.2119, "step": 15790 }, { "epoch": 0.3605494956870978, "grad_norm": 3.3125, "learning_rate": 1.3184651939680524e-05, "loss": 1.213, "step": 15800 }, { "epoch": 0.3607776915704441, "grad_norm": 3.03125, "learning_rate": 1.3179946832286447e-05, "loss": 1.2254, "step": 15810 }, { "epoch": 0.3610058874537903, "grad_norm": 3.171875, "learning_rate": 1.3175241724892371e-05, "loss": 1.2053, "step": 15820 }, { "epoch": 0.3612340833371366, "grad_norm": 2.953125, "learning_rate": 1.3170536617498295e-05, "loss": 1.2365, "step": 15830 }, { "epoch": 0.36146227922048285, "grad_norm": 3.140625, "learning_rate": 1.3165831510104218e-05, "loss": 1.1934, "step": 15840 }, { "epoch": 0.3616904751038291, "grad_norm": 3.125, "learning_rate": 1.3161126402710142e-05, "loss": 1.1408, "step": 15850 }, { "epoch": 0.3619186709871754, "grad_norm": 3.015625, "learning_rate": 1.3156421295316066e-05, "loss": 1.2465, "step": 15860 }, { "epoch": 0.36214686687052167, "grad_norm": 2.890625, "learning_rate": 1.315171618792199e-05, "loss": 1.2194, "step": 15870 }, { "epoch": 0.36237506275386794, "grad_norm": 3.28125, "learning_rate": 1.3147011080527913e-05, "loss": 1.2426, "step": 15880 }, { "epoch": 0.3626032586372142, "grad_norm": 3.15625, "learning_rate": 1.3142305973133837e-05, "loss": 1.2381, "step": 15890 }, { "epoch": 0.3628314545205604, "grad_norm": 3.1875, "learning_rate": 1.313760086573976e-05, "loss": 1.2476, "step": 15900 }, { "epoch": 0.3630596504039067, "grad_norm": 2.953125, "learning_rate": 1.3132895758345684e-05, "loss": 1.2361, "step": 15910 }, { "epoch": 0.36328784628725297, "grad_norm": 3.3125, "learning_rate": 1.3128190650951608e-05, "loss": 1.2208, "step": 15920 }, { "epoch": 0.36351604217059924, "grad_norm": 3.140625, "learning_rate": 1.3123485543557532e-05, "loss": 1.203, "step": 15930 }, { "epoch": 0.3637442380539455, "grad_norm": 3.046875, "learning_rate": 1.3118780436163456e-05, "loss": 1.1826, "step": 15940 }, { "epoch": 0.3639724339372918, "grad_norm": 2.84375, "learning_rate": 1.311407532876938e-05, "loss": 1.1589, "step": 15950 }, { "epoch": 0.36420062982063806, "grad_norm": 3.1875, "learning_rate": 1.3109370221375303e-05, "loss": 1.1741, "step": 15960 }, { "epoch": 0.3644288257039843, "grad_norm": 3.1875, "learning_rate": 1.3104665113981227e-05, "loss": 1.1573, "step": 15970 }, { "epoch": 0.36465702158733054, "grad_norm": 3.0625, "learning_rate": 1.309996000658715e-05, "loss": 1.1857, "step": 15980 }, { "epoch": 0.3648852174706768, "grad_norm": 3.015625, "learning_rate": 1.3095254899193076e-05, "loss": 1.2343, "step": 15990 }, { "epoch": 0.3651134133540231, "grad_norm": 3.015625, "learning_rate": 1.3090549791799e-05, "loss": 1.1575, "step": 16000 }, { "epoch": 0.36534160923736936, "grad_norm": 3.140625, "learning_rate": 1.3085844684404923e-05, "loss": 1.1787, "step": 16010 }, { "epoch": 0.36556980512071563, "grad_norm": 3.0625, "learning_rate": 1.3081139577010847e-05, "loss": 1.2434, "step": 16020 }, { "epoch": 0.3657980010040619, "grad_norm": 2.984375, "learning_rate": 1.307643446961677e-05, "loss": 1.1878, "step": 16030 }, { "epoch": 0.3660261968874082, "grad_norm": 3.390625, "learning_rate": 1.3071729362222694e-05, "loss": 1.1887, "step": 16040 }, { "epoch": 0.3662543927707544, "grad_norm": 3.0, "learning_rate": 1.3067024254828618e-05, "loss": 1.1707, "step": 16050 }, { "epoch": 0.36648258865410066, "grad_norm": 3.015625, "learning_rate": 1.3062319147434542e-05, "loss": 1.1593, "step": 16060 }, { "epoch": 0.36671078453744693, "grad_norm": 2.875, "learning_rate": 1.3057614040040466e-05, "loss": 1.2315, "step": 16070 }, { "epoch": 0.3669389804207932, "grad_norm": 3.34375, "learning_rate": 1.305290893264639e-05, "loss": 1.2078, "step": 16080 }, { "epoch": 0.3671671763041395, "grad_norm": 2.90625, "learning_rate": 1.3048203825252313e-05, "loss": 1.2791, "step": 16090 }, { "epoch": 0.36739537218748575, "grad_norm": 2.875, "learning_rate": 1.3043498717858237e-05, "loss": 1.1562, "step": 16100 }, { "epoch": 0.367623568070832, "grad_norm": 2.90625, "learning_rate": 1.303879361046416e-05, "loss": 1.1838, "step": 16110 }, { "epoch": 0.3678517639541783, "grad_norm": 3.078125, "learning_rate": 1.3034088503070084e-05, "loss": 1.1935, "step": 16120 }, { "epoch": 0.3680799598375245, "grad_norm": 3.3125, "learning_rate": 1.3029383395676008e-05, "loss": 1.2088, "step": 16130 }, { "epoch": 0.3683081557208708, "grad_norm": 3.328125, "learning_rate": 1.3024678288281932e-05, "loss": 1.2538, "step": 16140 }, { "epoch": 0.36853635160421705, "grad_norm": 2.96875, "learning_rate": 1.3019973180887855e-05, "loss": 1.1922, "step": 16150 }, { "epoch": 0.3687645474875633, "grad_norm": 2.890625, "learning_rate": 1.3015268073493779e-05, "loss": 1.2076, "step": 16160 }, { "epoch": 0.3689927433709096, "grad_norm": 3.109375, "learning_rate": 1.3010562966099703e-05, "loss": 1.1793, "step": 16170 }, { "epoch": 0.36922093925425586, "grad_norm": 3.25, "learning_rate": 1.3005857858705626e-05, "loss": 1.1524, "step": 16180 }, { "epoch": 0.36944913513760214, "grad_norm": 3.625, "learning_rate": 1.300115275131155e-05, "loss": 1.2094, "step": 16190 }, { "epoch": 0.3696773310209484, "grad_norm": 2.859375, "learning_rate": 1.2996447643917474e-05, "loss": 1.1858, "step": 16200 }, { "epoch": 0.3699055269042946, "grad_norm": 3.15625, "learning_rate": 1.2991742536523398e-05, "loss": 1.1747, "step": 16210 }, { "epoch": 0.3701337227876409, "grad_norm": 3.1875, "learning_rate": 1.2987037429129321e-05, "loss": 1.1965, "step": 16220 }, { "epoch": 0.37036191867098717, "grad_norm": 2.953125, "learning_rate": 1.2982332321735245e-05, "loss": 1.2143, "step": 16230 }, { "epoch": 0.37059011455433344, "grad_norm": 3.40625, "learning_rate": 1.2977627214341169e-05, "loss": 1.2177, "step": 16240 }, { "epoch": 0.3708183104376797, "grad_norm": 2.890625, "learning_rate": 1.2972922106947092e-05, "loss": 1.2227, "step": 16250 }, { "epoch": 0.371046506321026, "grad_norm": 3.03125, "learning_rate": 1.2968216999553016e-05, "loss": 1.2451, "step": 16260 }, { "epoch": 0.37127470220437225, "grad_norm": 2.9375, "learning_rate": 1.296351189215894e-05, "loss": 1.1926, "step": 16270 }, { "epoch": 0.3715028980877185, "grad_norm": 2.875, "learning_rate": 1.2958806784764864e-05, "loss": 1.2023, "step": 16280 }, { "epoch": 0.37173109397106474, "grad_norm": 2.890625, "learning_rate": 1.2954101677370787e-05, "loss": 1.209, "step": 16290 }, { "epoch": 0.371959289854411, "grad_norm": 3.265625, "learning_rate": 1.2949396569976711e-05, "loss": 1.2786, "step": 16300 }, { "epoch": 0.3721874857377573, "grad_norm": 3.21875, "learning_rate": 1.2944691462582635e-05, "loss": 1.1895, "step": 16310 }, { "epoch": 0.37241568162110356, "grad_norm": 3.3125, "learning_rate": 1.2939986355188558e-05, "loss": 1.1487, "step": 16320 }, { "epoch": 0.3726438775044498, "grad_norm": 3.046875, "learning_rate": 1.2935281247794482e-05, "loss": 1.2483, "step": 16330 }, { "epoch": 0.3728720733877961, "grad_norm": 2.984375, "learning_rate": 1.2930576140400406e-05, "loss": 1.2016, "step": 16340 }, { "epoch": 0.37310026927114237, "grad_norm": 2.90625, "learning_rate": 1.292587103300633e-05, "loss": 1.1341, "step": 16350 }, { "epoch": 0.3733284651544886, "grad_norm": 3.046875, "learning_rate": 1.2921165925612253e-05, "loss": 1.1641, "step": 16360 }, { "epoch": 0.37355666103783486, "grad_norm": 2.859375, "learning_rate": 1.2916460818218177e-05, "loss": 1.2063, "step": 16370 }, { "epoch": 0.37378485692118113, "grad_norm": 3.0, "learning_rate": 1.29117557108241e-05, "loss": 1.2167, "step": 16380 }, { "epoch": 0.3740130528045274, "grad_norm": 3.046875, "learning_rate": 1.2907050603430024e-05, "loss": 1.167, "step": 16390 }, { "epoch": 0.3742412486878737, "grad_norm": 3.109375, "learning_rate": 1.2902345496035948e-05, "loss": 1.1908, "step": 16400 }, { "epoch": 0.37446944457121994, "grad_norm": 2.9375, "learning_rate": 1.2897640388641872e-05, "loss": 1.2402, "step": 16410 }, { "epoch": 0.3746976404545662, "grad_norm": 3.0, "learning_rate": 1.2892935281247795e-05, "loss": 1.1716, "step": 16420 }, { "epoch": 0.3749258363379125, "grad_norm": 3.21875, "learning_rate": 1.288823017385372e-05, "loss": 1.1787, "step": 16430 }, { "epoch": 0.3751540322212587, "grad_norm": 2.96875, "learning_rate": 1.2883525066459643e-05, "loss": 1.1748, "step": 16440 }, { "epoch": 0.375382228104605, "grad_norm": 3.078125, "learning_rate": 1.2878819959065567e-05, "loss": 1.2004, "step": 16450 }, { "epoch": 0.37561042398795125, "grad_norm": 3.015625, "learning_rate": 1.287411485167149e-05, "loss": 1.1846, "step": 16460 }, { "epoch": 0.3758386198712975, "grad_norm": 2.796875, "learning_rate": 1.2869409744277414e-05, "loss": 1.1962, "step": 16470 }, { "epoch": 0.3760668157546438, "grad_norm": 3.0625, "learning_rate": 1.2864704636883338e-05, "loss": 1.2507, "step": 16480 }, { "epoch": 0.37629501163799006, "grad_norm": 3.15625, "learning_rate": 1.2859999529489261e-05, "loss": 1.1598, "step": 16490 }, { "epoch": 0.37652320752133633, "grad_norm": 3.15625, "learning_rate": 1.2855294422095185e-05, "loss": 1.1638, "step": 16500 }, { "epoch": 0.3767514034046826, "grad_norm": 2.875, "learning_rate": 1.2850589314701109e-05, "loss": 1.2184, "step": 16510 }, { "epoch": 0.3769795992880288, "grad_norm": 2.90625, "learning_rate": 1.2845884207307033e-05, "loss": 1.2678, "step": 16520 }, { "epoch": 0.3772077951713751, "grad_norm": 3.046875, "learning_rate": 1.2841179099912956e-05, "loss": 1.1935, "step": 16530 }, { "epoch": 0.37743599105472136, "grad_norm": 3.21875, "learning_rate": 1.283647399251888e-05, "loss": 1.2447, "step": 16540 }, { "epoch": 0.37766418693806764, "grad_norm": 3.15625, "learning_rate": 1.2831768885124804e-05, "loss": 1.1932, "step": 16550 }, { "epoch": 0.3778923828214139, "grad_norm": 3.21875, "learning_rate": 1.2827063777730727e-05, "loss": 1.1753, "step": 16560 }, { "epoch": 0.3781205787047602, "grad_norm": 3.078125, "learning_rate": 1.2822358670336653e-05, "loss": 1.224, "step": 16570 }, { "epoch": 0.37834877458810645, "grad_norm": 3.015625, "learning_rate": 1.2817653562942577e-05, "loss": 1.1551, "step": 16580 }, { "epoch": 0.37857697047145267, "grad_norm": 2.984375, "learning_rate": 1.28129484555485e-05, "loss": 1.1638, "step": 16590 }, { "epoch": 0.37880516635479894, "grad_norm": 3.078125, "learning_rate": 1.2808243348154424e-05, "loss": 1.1726, "step": 16600 }, { "epoch": 0.3790333622381452, "grad_norm": 3.109375, "learning_rate": 1.2803538240760348e-05, "loss": 1.1901, "step": 16610 }, { "epoch": 0.3792615581214915, "grad_norm": 3.0625, "learning_rate": 1.2798833133366271e-05, "loss": 1.2205, "step": 16620 }, { "epoch": 0.37948975400483775, "grad_norm": 3.09375, "learning_rate": 1.2794128025972195e-05, "loss": 1.1921, "step": 16630 }, { "epoch": 0.379717949888184, "grad_norm": 2.921875, "learning_rate": 1.2789422918578119e-05, "loss": 1.1738, "step": 16640 }, { "epoch": 0.3799461457715303, "grad_norm": 2.84375, "learning_rate": 1.2784717811184043e-05, "loss": 1.2036, "step": 16650 }, { "epoch": 0.38017434165487657, "grad_norm": 2.828125, "learning_rate": 1.2780012703789966e-05, "loss": 1.2112, "step": 16660 }, { "epoch": 0.3804025375382228, "grad_norm": 3.265625, "learning_rate": 1.277530759639589e-05, "loss": 1.2476, "step": 16670 }, { "epoch": 0.38063073342156906, "grad_norm": 3.375, "learning_rate": 1.2770602489001814e-05, "loss": 1.1245, "step": 16680 }, { "epoch": 0.38085892930491533, "grad_norm": 3.0625, "learning_rate": 1.2765897381607737e-05, "loss": 1.1732, "step": 16690 }, { "epoch": 0.3810871251882616, "grad_norm": 3.0, "learning_rate": 1.2761192274213661e-05, "loss": 1.1911, "step": 16700 }, { "epoch": 0.38131532107160787, "grad_norm": 3.0625, "learning_rate": 1.2756487166819585e-05, "loss": 1.1713, "step": 16710 }, { "epoch": 0.38154351695495414, "grad_norm": 3.15625, "learning_rate": 1.2751782059425509e-05, "loss": 1.2042, "step": 16720 }, { "epoch": 0.3817717128383004, "grad_norm": 2.90625, "learning_rate": 1.2747076952031432e-05, "loss": 1.1705, "step": 16730 }, { "epoch": 0.3819999087216467, "grad_norm": 3.265625, "learning_rate": 1.2742371844637356e-05, "loss": 1.2086, "step": 16740 }, { "epoch": 0.3822281046049929, "grad_norm": 2.765625, "learning_rate": 1.273766673724328e-05, "loss": 1.1534, "step": 16750 }, { "epoch": 0.3824563004883392, "grad_norm": 2.84375, "learning_rate": 1.2732961629849203e-05, "loss": 1.2093, "step": 16760 }, { "epoch": 0.38268449637168545, "grad_norm": 3.078125, "learning_rate": 1.2728256522455127e-05, "loss": 1.2324, "step": 16770 }, { "epoch": 0.3829126922550317, "grad_norm": 3.03125, "learning_rate": 1.272355141506105e-05, "loss": 1.1854, "step": 16780 }, { "epoch": 0.383140888138378, "grad_norm": 3.421875, "learning_rate": 1.2718846307666975e-05, "loss": 1.2221, "step": 16790 }, { "epoch": 0.38336908402172426, "grad_norm": 3.109375, "learning_rate": 1.2714141200272898e-05, "loss": 1.2632, "step": 16800 }, { "epoch": 0.38359727990507053, "grad_norm": 3.03125, "learning_rate": 1.2709436092878822e-05, "loss": 1.2019, "step": 16810 }, { "epoch": 0.3838254757884168, "grad_norm": 3.09375, "learning_rate": 1.2704730985484746e-05, "loss": 1.2069, "step": 16820 }, { "epoch": 0.384053671671763, "grad_norm": 3.21875, "learning_rate": 1.270002587809067e-05, "loss": 1.1525, "step": 16830 }, { "epoch": 0.3842818675551093, "grad_norm": 2.875, "learning_rate": 1.2695320770696593e-05, "loss": 1.1976, "step": 16840 }, { "epoch": 0.38451006343845556, "grad_norm": 3.046875, "learning_rate": 1.2690615663302517e-05, "loss": 1.2349, "step": 16850 }, { "epoch": 0.38473825932180183, "grad_norm": 2.921875, "learning_rate": 1.268591055590844e-05, "loss": 1.2034, "step": 16860 }, { "epoch": 0.3849664552051481, "grad_norm": 3.078125, "learning_rate": 1.2681205448514364e-05, "loss": 1.1914, "step": 16870 }, { "epoch": 0.3851946510884944, "grad_norm": 3.296875, "learning_rate": 1.2676500341120288e-05, "loss": 1.2068, "step": 16880 }, { "epoch": 0.38542284697184065, "grad_norm": 2.90625, "learning_rate": 1.2671795233726212e-05, "loss": 1.1802, "step": 16890 }, { "epoch": 0.38565104285518687, "grad_norm": 3.359375, "learning_rate": 1.2667090126332135e-05, "loss": 1.1785, "step": 16900 }, { "epoch": 0.38587923873853314, "grad_norm": 3.0625, "learning_rate": 1.2662385018938059e-05, "loss": 1.2529, "step": 16910 }, { "epoch": 0.3861074346218794, "grad_norm": 3.21875, "learning_rate": 1.2657679911543983e-05, "loss": 1.165, "step": 16920 }, { "epoch": 0.3863356305052257, "grad_norm": 3.234375, "learning_rate": 1.2652974804149907e-05, "loss": 1.2021, "step": 16930 }, { "epoch": 0.38656382638857195, "grad_norm": 3.296875, "learning_rate": 1.264826969675583e-05, "loss": 1.1855, "step": 16940 }, { "epoch": 0.3867920222719182, "grad_norm": 3.515625, "learning_rate": 1.2643564589361754e-05, "loss": 1.2521, "step": 16950 }, { "epoch": 0.3870202181552645, "grad_norm": 2.953125, "learning_rate": 1.2638859481967678e-05, "loss": 1.1772, "step": 16960 }, { "epoch": 0.38724841403861077, "grad_norm": 3.15625, "learning_rate": 1.2634154374573601e-05, "loss": 1.2384, "step": 16970 }, { "epoch": 0.387476609921957, "grad_norm": 2.96875, "learning_rate": 1.2629449267179525e-05, "loss": 1.1623, "step": 16980 }, { "epoch": 0.38770480580530325, "grad_norm": 3.09375, "learning_rate": 1.2624744159785449e-05, "loss": 1.1692, "step": 16990 }, { "epoch": 0.3879330016886495, "grad_norm": 3.140625, "learning_rate": 1.2620039052391373e-05, "loss": 1.1925, "step": 17000 }, { "epoch": 0.3881611975719958, "grad_norm": 2.984375, "learning_rate": 1.2615333944997296e-05, "loss": 1.2224, "step": 17010 }, { "epoch": 0.38838939345534207, "grad_norm": 3.1875, "learning_rate": 1.261062883760322e-05, "loss": 1.1709, "step": 17020 }, { "epoch": 0.38861758933868834, "grad_norm": 2.9375, "learning_rate": 1.2605923730209142e-05, "loss": 1.2105, "step": 17030 }, { "epoch": 0.3888457852220346, "grad_norm": 3.078125, "learning_rate": 1.2601218622815066e-05, "loss": 1.1966, "step": 17040 }, { "epoch": 0.3890739811053809, "grad_norm": 3.203125, "learning_rate": 1.259651351542099e-05, "loss": 1.235, "step": 17050 }, { "epoch": 0.3893021769887271, "grad_norm": 3.421875, "learning_rate": 1.2591808408026913e-05, "loss": 1.208, "step": 17060 }, { "epoch": 0.38953037287207337, "grad_norm": 2.953125, "learning_rate": 1.2587103300632837e-05, "loss": 1.1666, "step": 17070 }, { "epoch": 0.38975856875541964, "grad_norm": 2.9375, "learning_rate": 1.258239819323876e-05, "loss": 1.2018, "step": 17080 }, { "epoch": 0.3899867646387659, "grad_norm": 3.359375, "learning_rate": 1.2577693085844684e-05, "loss": 1.2128, "step": 17090 }, { "epoch": 0.3902149605221122, "grad_norm": 3.0, "learning_rate": 1.2572987978450608e-05, "loss": 1.1985, "step": 17100 }, { "epoch": 0.39044315640545846, "grad_norm": 3.015625, "learning_rate": 1.2568282871056532e-05, "loss": 1.1114, "step": 17110 }, { "epoch": 0.39067135228880473, "grad_norm": 3.4375, "learning_rate": 1.2563577763662455e-05, "loss": 1.2021, "step": 17120 }, { "epoch": 0.390899548172151, "grad_norm": 3.0, "learning_rate": 1.2558872656268379e-05, "loss": 1.1807, "step": 17130 }, { "epoch": 0.3911277440554972, "grad_norm": 3.109375, "learning_rate": 1.2554167548874303e-05, "loss": 1.1691, "step": 17140 }, { "epoch": 0.3913559399388435, "grad_norm": 3.078125, "learning_rate": 1.2549462441480226e-05, "loss": 1.1731, "step": 17150 }, { "epoch": 0.39158413582218976, "grad_norm": 2.984375, "learning_rate": 1.254475733408615e-05, "loss": 1.2421, "step": 17160 }, { "epoch": 0.39181233170553603, "grad_norm": 2.96875, "learning_rate": 1.2540052226692074e-05, "loss": 1.171, "step": 17170 }, { "epoch": 0.3920405275888823, "grad_norm": 2.828125, "learning_rate": 1.2535347119297998e-05, "loss": 1.1595, "step": 17180 }, { "epoch": 0.3922687234722286, "grad_norm": 3.46875, "learning_rate": 1.2530642011903921e-05, "loss": 1.202, "step": 17190 }, { "epoch": 0.39249691935557485, "grad_norm": 3.296875, "learning_rate": 1.2525936904509845e-05, "loss": 1.1483, "step": 17200 }, { "epoch": 0.39272511523892106, "grad_norm": 3.15625, "learning_rate": 1.2521231797115769e-05, "loss": 1.1783, "step": 17210 }, { "epoch": 0.39295331112226733, "grad_norm": 3.015625, "learning_rate": 1.2516526689721692e-05, "loss": 1.263, "step": 17220 }, { "epoch": 0.3931815070056136, "grad_norm": 3.1875, "learning_rate": 1.2511821582327616e-05, "loss": 1.1593, "step": 17230 }, { "epoch": 0.3934097028889599, "grad_norm": 3.046875, "learning_rate": 1.250711647493354e-05, "loss": 1.2072, "step": 17240 }, { "epoch": 0.39363789877230615, "grad_norm": 3.125, "learning_rate": 1.2502411367539464e-05, "loss": 1.2129, "step": 17250 }, { "epoch": 0.3938660946556524, "grad_norm": 3.109375, "learning_rate": 1.2497706260145387e-05, "loss": 1.1871, "step": 17260 }, { "epoch": 0.3940942905389987, "grad_norm": 3.1875, "learning_rate": 1.2493001152751311e-05, "loss": 1.1784, "step": 17270 }, { "epoch": 0.39432248642234496, "grad_norm": 3.25, "learning_rate": 1.2488296045357235e-05, "loss": 1.1978, "step": 17280 }, { "epoch": 0.3945506823056912, "grad_norm": 3.125, "learning_rate": 1.2483590937963158e-05, "loss": 1.2005, "step": 17290 }, { "epoch": 0.39477887818903745, "grad_norm": 3.125, "learning_rate": 1.2478885830569082e-05, "loss": 1.196, "step": 17300 }, { "epoch": 0.3950070740723837, "grad_norm": 2.921875, "learning_rate": 1.2474180723175006e-05, "loss": 1.1712, "step": 17310 }, { "epoch": 0.39523526995573, "grad_norm": 3.546875, "learning_rate": 1.2469475615780931e-05, "loss": 1.2016, "step": 17320 }, { "epoch": 0.39546346583907627, "grad_norm": 3.234375, "learning_rate": 1.2464770508386855e-05, "loss": 1.1732, "step": 17330 }, { "epoch": 0.39569166172242254, "grad_norm": 3.1875, "learning_rate": 1.2460065400992779e-05, "loss": 1.171, "step": 17340 }, { "epoch": 0.3959198576057688, "grad_norm": 3.453125, "learning_rate": 1.2455360293598702e-05, "loss": 1.1783, "step": 17350 }, { "epoch": 0.3961480534891151, "grad_norm": 3.125, "learning_rate": 1.2450655186204626e-05, "loss": 1.1894, "step": 17360 }, { "epoch": 0.3963762493724613, "grad_norm": 3.0625, "learning_rate": 1.244595007881055e-05, "loss": 1.1984, "step": 17370 }, { "epoch": 0.39660444525580757, "grad_norm": 3.28125, "learning_rate": 1.2441244971416474e-05, "loss": 1.2006, "step": 17380 }, { "epoch": 0.39683264113915384, "grad_norm": 3.03125, "learning_rate": 1.2436539864022397e-05, "loss": 1.2496, "step": 17390 }, { "epoch": 0.3970608370225001, "grad_norm": 3.03125, "learning_rate": 1.2431834756628321e-05, "loss": 1.2214, "step": 17400 }, { "epoch": 0.3972890329058464, "grad_norm": 3.109375, "learning_rate": 1.2427129649234245e-05, "loss": 1.2505, "step": 17410 }, { "epoch": 0.39751722878919266, "grad_norm": 2.984375, "learning_rate": 1.2422424541840168e-05, "loss": 1.1794, "step": 17420 }, { "epoch": 0.3977454246725389, "grad_norm": 3.125, "learning_rate": 1.2417719434446092e-05, "loss": 1.2245, "step": 17430 }, { "epoch": 0.3979736205558852, "grad_norm": 2.96875, "learning_rate": 1.2413014327052016e-05, "loss": 1.2643, "step": 17440 }, { "epoch": 0.3982018164392314, "grad_norm": 3.515625, "learning_rate": 1.240830921965794e-05, "loss": 1.1768, "step": 17450 }, { "epoch": 0.3984300123225777, "grad_norm": 2.84375, "learning_rate": 1.2403604112263863e-05, "loss": 1.2245, "step": 17460 }, { "epoch": 0.39865820820592396, "grad_norm": 3.375, "learning_rate": 1.2398899004869787e-05, "loss": 1.2175, "step": 17470 }, { "epoch": 0.39888640408927023, "grad_norm": 2.828125, "learning_rate": 1.239419389747571e-05, "loss": 1.1696, "step": 17480 }, { "epoch": 0.3991145999726165, "grad_norm": 2.984375, "learning_rate": 1.2389488790081634e-05, "loss": 1.2177, "step": 17490 }, { "epoch": 0.3993427958559628, "grad_norm": 3.234375, "learning_rate": 1.2384783682687558e-05, "loss": 1.186, "step": 17500 }, { "epoch": 0.39957099173930904, "grad_norm": 3.140625, "learning_rate": 1.2380078575293482e-05, "loss": 1.2118, "step": 17510 }, { "epoch": 0.39979918762265526, "grad_norm": 3.046875, "learning_rate": 1.2375373467899406e-05, "loss": 1.1997, "step": 17520 }, { "epoch": 0.40002738350600153, "grad_norm": 2.984375, "learning_rate": 1.237066836050533e-05, "loss": 1.2258, "step": 17530 }, { "epoch": 0.4002555793893478, "grad_norm": 3.15625, "learning_rate": 1.2365963253111253e-05, "loss": 1.1998, "step": 17540 }, { "epoch": 0.4004837752726941, "grad_norm": 3.109375, "learning_rate": 1.2361258145717177e-05, "loss": 1.1496, "step": 17550 }, { "epoch": 0.40071197115604035, "grad_norm": 3.140625, "learning_rate": 1.23565530383231e-05, "loss": 1.1689, "step": 17560 }, { "epoch": 0.4009401670393866, "grad_norm": 2.90625, "learning_rate": 1.2351847930929024e-05, "loss": 1.2477, "step": 17570 }, { "epoch": 0.4011683629227329, "grad_norm": 3.03125, "learning_rate": 1.2347142823534948e-05, "loss": 1.2212, "step": 17580 }, { "epoch": 0.40139655880607916, "grad_norm": 3.0625, "learning_rate": 1.2342437716140872e-05, "loss": 1.1939, "step": 17590 }, { "epoch": 0.4016247546894254, "grad_norm": 2.96875, "learning_rate": 1.2337732608746795e-05, "loss": 1.167, "step": 17600 }, { "epoch": 0.40185295057277165, "grad_norm": 2.890625, "learning_rate": 1.2333027501352719e-05, "loss": 1.2247, "step": 17610 }, { "epoch": 0.4020811464561179, "grad_norm": 3.546875, "learning_rate": 1.2328322393958643e-05, "loss": 1.1727, "step": 17620 }, { "epoch": 0.4023093423394642, "grad_norm": 3.25, "learning_rate": 1.2323617286564566e-05, "loss": 1.1988, "step": 17630 }, { "epoch": 0.40253753822281046, "grad_norm": 2.890625, "learning_rate": 1.231891217917049e-05, "loss": 1.1494, "step": 17640 }, { "epoch": 0.40276573410615674, "grad_norm": 3.0625, "learning_rate": 1.2314207071776414e-05, "loss": 1.228, "step": 17650 }, { "epoch": 0.402993929989503, "grad_norm": 3.09375, "learning_rate": 1.2309501964382338e-05, "loss": 1.1423, "step": 17660 }, { "epoch": 0.4032221258728493, "grad_norm": 3.453125, "learning_rate": 1.2304796856988261e-05, "loss": 1.2354, "step": 17670 }, { "epoch": 0.4034503217561955, "grad_norm": 3.125, "learning_rate": 1.2300091749594185e-05, "loss": 1.2779, "step": 17680 }, { "epoch": 0.40367851763954177, "grad_norm": 3.0, "learning_rate": 1.2295386642200109e-05, "loss": 1.2069, "step": 17690 }, { "epoch": 0.40390671352288804, "grad_norm": 3.109375, "learning_rate": 1.2290681534806032e-05, "loss": 1.1496, "step": 17700 }, { "epoch": 0.4041349094062343, "grad_norm": 3.21875, "learning_rate": 1.2285976427411956e-05, "loss": 1.1515, "step": 17710 }, { "epoch": 0.4043631052895806, "grad_norm": 3.109375, "learning_rate": 1.228127132001788e-05, "loss": 1.1678, "step": 17720 }, { "epoch": 0.40459130117292685, "grad_norm": 3.046875, "learning_rate": 1.2276566212623804e-05, "loss": 1.1911, "step": 17730 }, { "epoch": 0.4048194970562731, "grad_norm": 3.140625, "learning_rate": 1.2271861105229727e-05, "loss": 1.2232, "step": 17740 }, { "epoch": 0.40504769293961934, "grad_norm": 3.296875, "learning_rate": 1.2267155997835651e-05, "loss": 1.211, "step": 17750 }, { "epoch": 0.4052758888229656, "grad_norm": 3.078125, "learning_rate": 1.2262450890441575e-05, "loss": 1.1933, "step": 17760 }, { "epoch": 0.4055040847063119, "grad_norm": 2.984375, "learning_rate": 1.2257745783047498e-05, "loss": 1.1882, "step": 17770 }, { "epoch": 0.40573228058965816, "grad_norm": 2.921875, "learning_rate": 1.2253040675653422e-05, "loss": 1.1635, "step": 17780 }, { "epoch": 0.40596047647300443, "grad_norm": 3.203125, "learning_rate": 1.2248335568259346e-05, "loss": 1.214, "step": 17790 }, { "epoch": 0.4061886723563507, "grad_norm": 3.03125, "learning_rate": 1.224363046086527e-05, "loss": 1.2201, "step": 17800 }, { "epoch": 0.40641686823969697, "grad_norm": 3.09375, "learning_rate": 1.2238925353471193e-05, "loss": 1.1606, "step": 17810 }, { "epoch": 0.40664506412304324, "grad_norm": 3.03125, "learning_rate": 1.2234220246077117e-05, "loss": 1.2044, "step": 17820 }, { "epoch": 0.40687326000638946, "grad_norm": 3.53125, "learning_rate": 1.222951513868304e-05, "loss": 1.2152, "step": 17830 }, { "epoch": 0.40710145588973573, "grad_norm": 3.015625, "learning_rate": 1.2224810031288964e-05, "loss": 1.1938, "step": 17840 }, { "epoch": 0.407329651773082, "grad_norm": 3.265625, "learning_rate": 1.2220104923894888e-05, "loss": 1.1626, "step": 17850 }, { "epoch": 0.4075578476564283, "grad_norm": 3.328125, "learning_rate": 1.2215399816500812e-05, "loss": 1.189, "step": 17860 }, { "epoch": 0.40778604353977455, "grad_norm": 3.34375, "learning_rate": 1.2210694709106735e-05, "loss": 1.223, "step": 17870 }, { "epoch": 0.4080142394231208, "grad_norm": 3.28125, "learning_rate": 1.220598960171266e-05, "loss": 1.2124, "step": 17880 }, { "epoch": 0.4082424353064671, "grad_norm": 3.03125, "learning_rate": 1.2201284494318583e-05, "loss": 1.1578, "step": 17890 }, { "epoch": 0.40847063118981336, "grad_norm": 3.03125, "learning_rate": 1.2196579386924508e-05, "loss": 1.2152, "step": 17900 }, { "epoch": 0.4086988270731596, "grad_norm": 3.296875, "learning_rate": 1.2191874279530432e-05, "loss": 1.2107, "step": 17910 }, { "epoch": 0.40892702295650585, "grad_norm": 3.046875, "learning_rate": 1.2187169172136356e-05, "loss": 1.2041, "step": 17920 }, { "epoch": 0.4091552188398521, "grad_norm": 3.15625, "learning_rate": 1.218246406474228e-05, "loss": 1.2158, "step": 17930 }, { "epoch": 0.4093834147231984, "grad_norm": 3.03125, "learning_rate": 1.2177758957348203e-05, "loss": 1.2221, "step": 17940 }, { "epoch": 0.40961161060654466, "grad_norm": 3.0, "learning_rate": 1.2173053849954127e-05, "loss": 1.1839, "step": 17950 }, { "epoch": 0.40983980648989093, "grad_norm": 3.109375, "learning_rate": 1.216834874256005e-05, "loss": 1.1514, "step": 17960 }, { "epoch": 0.4100680023732372, "grad_norm": 2.71875, "learning_rate": 1.2163643635165974e-05, "loss": 1.225, "step": 17970 }, { "epoch": 0.4102961982565835, "grad_norm": 3.28125, "learning_rate": 1.2158938527771898e-05, "loss": 1.2322, "step": 17980 }, { "epoch": 0.4105243941399297, "grad_norm": 3.125, "learning_rate": 1.2154233420377822e-05, "loss": 1.1888, "step": 17990 }, { "epoch": 0.41075259002327597, "grad_norm": 2.984375, "learning_rate": 1.2149528312983745e-05, "loss": 1.1896, "step": 18000 }, { "epoch": 0.41098078590662224, "grad_norm": 3.015625, "learning_rate": 1.2144823205589669e-05, "loss": 1.1396, "step": 18010 }, { "epoch": 0.4112089817899685, "grad_norm": 3.3125, "learning_rate": 1.2140118098195593e-05, "loss": 1.2043, "step": 18020 }, { "epoch": 0.4114371776733148, "grad_norm": 3.03125, "learning_rate": 1.2135412990801517e-05, "loss": 1.1788, "step": 18030 }, { "epoch": 0.41166537355666105, "grad_norm": 3.234375, "learning_rate": 1.213070788340744e-05, "loss": 1.1903, "step": 18040 }, { "epoch": 0.4118935694400073, "grad_norm": 3.09375, "learning_rate": 1.2126002776013364e-05, "loss": 1.2159, "step": 18050 }, { "epoch": 0.41212176532335354, "grad_norm": 3.171875, "learning_rate": 1.2121297668619288e-05, "loss": 1.2249, "step": 18060 }, { "epoch": 0.4123499612066998, "grad_norm": 2.90625, "learning_rate": 1.2116592561225211e-05, "loss": 1.1912, "step": 18070 }, { "epoch": 0.4125781570900461, "grad_norm": 3.21875, "learning_rate": 1.2111887453831135e-05, "loss": 1.1476, "step": 18080 }, { "epoch": 0.41280635297339235, "grad_norm": 2.96875, "learning_rate": 1.2107182346437059e-05, "loss": 1.127, "step": 18090 }, { "epoch": 0.4130345488567386, "grad_norm": 2.96875, "learning_rate": 1.2102477239042983e-05, "loss": 1.2087, "step": 18100 }, { "epoch": 0.4132627447400849, "grad_norm": 3.515625, "learning_rate": 1.2097772131648906e-05, "loss": 1.2124, "step": 18110 }, { "epoch": 0.41349094062343117, "grad_norm": 2.78125, "learning_rate": 1.209306702425483e-05, "loss": 1.1929, "step": 18120 }, { "epoch": 0.41371913650677744, "grad_norm": 3.390625, "learning_rate": 1.2088361916860754e-05, "loss": 1.1206, "step": 18130 }, { "epoch": 0.41394733239012366, "grad_norm": 3.234375, "learning_rate": 1.2083656809466677e-05, "loss": 1.1987, "step": 18140 }, { "epoch": 0.41417552827346993, "grad_norm": 3.125, "learning_rate": 1.2078951702072601e-05, "loss": 1.2017, "step": 18150 }, { "epoch": 0.4144037241568162, "grad_norm": 3.046875, "learning_rate": 1.2074246594678525e-05, "loss": 1.2101, "step": 18160 }, { "epoch": 0.41463192004016247, "grad_norm": 2.984375, "learning_rate": 1.2069541487284449e-05, "loss": 1.1651, "step": 18170 }, { "epoch": 0.41486011592350874, "grad_norm": 3.0, "learning_rate": 1.2064836379890372e-05, "loss": 1.2008, "step": 18180 }, { "epoch": 0.415088311806855, "grad_norm": 2.96875, "learning_rate": 1.2060131272496296e-05, "loss": 1.2015, "step": 18190 }, { "epoch": 0.4153165076902013, "grad_norm": 3.015625, "learning_rate": 1.205542616510222e-05, "loss": 1.2019, "step": 18200 }, { "epoch": 0.41554470357354756, "grad_norm": 3.0, "learning_rate": 1.2050721057708143e-05, "loss": 1.2194, "step": 18210 }, { "epoch": 0.4157728994568938, "grad_norm": 2.9375, "learning_rate": 1.2046015950314067e-05, "loss": 1.1554, "step": 18220 }, { "epoch": 0.41600109534024005, "grad_norm": 3.0625, "learning_rate": 1.204131084291999e-05, "loss": 1.2021, "step": 18230 }, { "epoch": 0.4162292912235863, "grad_norm": 3.125, "learning_rate": 1.2036605735525915e-05, "loss": 1.2025, "step": 18240 }, { "epoch": 0.4164574871069326, "grad_norm": 3.28125, "learning_rate": 1.2031900628131838e-05, "loss": 1.2331, "step": 18250 }, { "epoch": 0.41668568299027886, "grad_norm": 3.0, "learning_rate": 1.2027195520737762e-05, "loss": 1.1547, "step": 18260 }, { "epoch": 0.41691387887362513, "grad_norm": 3.21875, "learning_rate": 1.2022490413343686e-05, "loss": 1.1885, "step": 18270 }, { "epoch": 0.4171420747569714, "grad_norm": 3.1875, "learning_rate": 1.201778530594961e-05, "loss": 1.1407, "step": 18280 }, { "epoch": 0.4173702706403177, "grad_norm": 2.875, "learning_rate": 1.2013080198555533e-05, "loss": 1.1895, "step": 18290 }, { "epoch": 0.4175984665236639, "grad_norm": 3.296875, "learning_rate": 1.2008375091161457e-05, "loss": 1.2464, "step": 18300 }, { "epoch": 0.41782666240701016, "grad_norm": 3.109375, "learning_rate": 1.200366998376738e-05, "loss": 1.1903, "step": 18310 }, { "epoch": 0.41805485829035643, "grad_norm": 3.1875, "learning_rate": 1.1998964876373304e-05, "loss": 1.2123, "step": 18320 }, { "epoch": 0.4182830541737027, "grad_norm": 2.890625, "learning_rate": 1.1994259768979228e-05, "loss": 1.1598, "step": 18330 }, { "epoch": 0.418511250057049, "grad_norm": 2.9375, "learning_rate": 1.1989554661585152e-05, "loss": 1.1744, "step": 18340 }, { "epoch": 0.41873944594039525, "grad_norm": 2.890625, "learning_rate": 1.1984849554191075e-05, "loss": 1.1854, "step": 18350 }, { "epoch": 0.4189676418237415, "grad_norm": 3.0, "learning_rate": 1.1980144446796999e-05, "loss": 1.2072, "step": 18360 }, { "epoch": 0.41919583770708774, "grad_norm": 3.328125, "learning_rate": 1.1975439339402923e-05, "loss": 1.2394, "step": 18370 }, { "epoch": 0.419424033590434, "grad_norm": 3.296875, "learning_rate": 1.1970734232008847e-05, "loss": 1.1943, "step": 18380 }, { "epoch": 0.4196522294737803, "grad_norm": 2.8125, "learning_rate": 1.196602912461477e-05, "loss": 1.2144, "step": 18390 }, { "epoch": 0.41988042535712655, "grad_norm": 3.234375, "learning_rate": 1.1961324017220694e-05, "loss": 1.1736, "step": 18400 }, { "epoch": 0.4201086212404728, "grad_norm": 3.078125, "learning_rate": 1.1956618909826618e-05, "loss": 1.1917, "step": 18410 }, { "epoch": 0.4203368171238191, "grad_norm": 3.203125, "learning_rate": 1.1951913802432541e-05, "loss": 1.1776, "step": 18420 }, { "epoch": 0.42056501300716537, "grad_norm": 2.890625, "learning_rate": 1.1947208695038465e-05, "loss": 1.1483, "step": 18430 }, { "epoch": 0.42079320889051164, "grad_norm": 3.09375, "learning_rate": 1.1942503587644389e-05, "loss": 1.146, "step": 18440 }, { "epoch": 0.42102140477385785, "grad_norm": 3.03125, "learning_rate": 1.1937798480250312e-05, "loss": 1.1688, "step": 18450 }, { "epoch": 0.4212496006572041, "grad_norm": 3.015625, "learning_rate": 1.1933093372856236e-05, "loss": 1.1654, "step": 18460 }, { "epoch": 0.4214777965405504, "grad_norm": 3.40625, "learning_rate": 1.1928388265462162e-05, "loss": 1.2247, "step": 18470 }, { "epoch": 0.42170599242389667, "grad_norm": 2.96875, "learning_rate": 1.1923683158068085e-05, "loss": 1.2314, "step": 18480 }, { "epoch": 0.42193418830724294, "grad_norm": 3.0, "learning_rate": 1.1918978050674009e-05, "loss": 1.2299, "step": 18490 }, { "epoch": 0.4221623841905892, "grad_norm": 3.203125, "learning_rate": 1.1914272943279933e-05, "loss": 1.2182, "step": 18500 }, { "epoch": 0.4223905800739355, "grad_norm": 2.96875, "learning_rate": 1.1909567835885856e-05, "loss": 1.1833, "step": 18510 }, { "epoch": 0.42261877595728176, "grad_norm": 3.265625, "learning_rate": 1.190486272849178e-05, "loss": 1.1457, "step": 18520 }, { "epoch": 0.42284697184062797, "grad_norm": 2.75, "learning_rate": 1.1900157621097704e-05, "loss": 1.2203, "step": 18530 }, { "epoch": 0.42307516772397424, "grad_norm": 3.5625, "learning_rate": 1.1895452513703628e-05, "loss": 1.2072, "step": 18540 }, { "epoch": 0.4233033636073205, "grad_norm": 2.96875, "learning_rate": 1.1890747406309551e-05, "loss": 1.1445, "step": 18550 }, { "epoch": 0.4235315594906668, "grad_norm": 3.046875, "learning_rate": 1.1886042298915475e-05, "loss": 1.1207, "step": 18560 }, { "epoch": 0.42375975537401306, "grad_norm": 3.34375, "learning_rate": 1.1881337191521399e-05, "loss": 1.183, "step": 18570 }, { "epoch": 0.42398795125735933, "grad_norm": 3.03125, "learning_rate": 1.1876632084127322e-05, "loss": 1.1124, "step": 18580 }, { "epoch": 0.4242161471407056, "grad_norm": 3.046875, "learning_rate": 1.1871926976733246e-05, "loss": 1.2069, "step": 18590 }, { "epoch": 0.4244443430240519, "grad_norm": 3.171875, "learning_rate": 1.186722186933917e-05, "loss": 1.219, "step": 18600 }, { "epoch": 0.4246725389073981, "grad_norm": 3.234375, "learning_rate": 1.1862516761945094e-05, "loss": 1.2535, "step": 18610 }, { "epoch": 0.42490073479074436, "grad_norm": 3.46875, "learning_rate": 1.1857811654551017e-05, "loss": 1.1989, "step": 18620 }, { "epoch": 0.42512893067409063, "grad_norm": 3.21875, "learning_rate": 1.1853106547156941e-05, "loss": 1.1824, "step": 18630 }, { "epoch": 0.4253571265574369, "grad_norm": 3.078125, "learning_rate": 1.1848401439762865e-05, "loss": 1.2194, "step": 18640 }, { "epoch": 0.4255853224407832, "grad_norm": 3.1875, "learning_rate": 1.1843696332368788e-05, "loss": 1.1714, "step": 18650 }, { "epoch": 0.42581351832412945, "grad_norm": 3.140625, "learning_rate": 1.1838991224974712e-05, "loss": 1.2262, "step": 18660 }, { "epoch": 0.4260417142074757, "grad_norm": 3.34375, "learning_rate": 1.1834286117580636e-05, "loss": 1.2267, "step": 18670 }, { "epoch": 0.42626991009082194, "grad_norm": 3.125, "learning_rate": 1.182958101018656e-05, "loss": 1.1738, "step": 18680 }, { "epoch": 0.4264981059741682, "grad_norm": 2.90625, "learning_rate": 1.1824875902792483e-05, "loss": 1.2036, "step": 18690 }, { "epoch": 0.4267263018575145, "grad_norm": 2.984375, "learning_rate": 1.1820170795398407e-05, "loss": 1.1922, "step": 18700 }, { "epoch": 0.42695449774086075, "grad_norm": 3.015625, "learning_rate": 1.181546568800433e-05, "loss": 1.2037, "step": 18710 }, { "epoch": 0.427182693624207, "grad_norm": 2.859375, "learning_rate": 1.1810760580610254e-05, "loss": 1.225, "step": 18720 }, { "epoch": 0.4274108895075533, "grad_norm": 3.40625, "learning_rate": 1.1806055473216178e-05, "loss": 1.2116, "step": 18730 }, { "epoch": 0.42763908539089956, "grad_norm": 3.046875, "learning_rate": 1.1801350365822102e-05, "loss": 1.2013, "step": 18740 }, { "epoch": 0.42786728127424584, "grad_norm": 3.046875, "learning_rate": 1.1796645258428026e-05, "loss": 1.1903, "step": 18750 }, { "epoch": 0.42809547715759205, "grad_norm": 2.953125, "learning_rate": 1.179194015103395e-05, "loss": 1.1825, "step": 18760 }, { "epoch": 0.4283236730409383, "grad_norm": 2.921875, "learning_rate": 1.1787235043639873e-05, "loss": 1.2005, "step": 18770 }, { "epoch": 0.4285518689242846, "grad_norm": 3.109375, "learning_rate": 1.1782529936245797e-05, "loss": 1.2062, "step": 18780 }, { "epoch": 0.42878006480763087, "grad_norm": 2.984375, "learning_rate": 1.177782482885172e-05, "loss": 1.2366, "step": 18790 }, { "epoch": 0.42900826069097714, "grad_norm": 2.96875, "learning_rate": 1.1773119721457644e-05, "loss": 1.186, "step": 18800 }, { "epoch": 0.4292364565743234, "grad_norm": 3.703125, "learning_rate": 1.1768414614063568e-05, "loss": 1.1374, "step": 18810 }, { "epoch": 0.4294646524576697, "grad_norm": 3.046875, "learning_rate": 1.1763709506669492e-05, "loss": 1.2339, "step": 18820 }, { "epoch": 0.42969284834101595, "grad_norm": 3.390625, "learning_rate": 1.1759004399275415e-05, "loss": 1.1762, "step": 18830 }, { "epoch": 0.42992104422436217, "grad_norm": 3.421875, "learning_rate": 1.1754299291881339e-05, "loss": 1.2382, "step": 18840 }, { "epoch": 0.43014924010770844, "grad_norm": 3.046875, "learning_rate": 1.1749594184487263e-05, "loss": 1.196, "step": 18850 }, { "epoch": 0.4303774359910547, "grad_norm": 2.890625, "learning_rate": 1.1744889077093186e-05, "loss": 1.2174, "step": 18860 }, { "epoch": 0.430605631874401, "grad_norm": 3.015625, "learning_rate": 1.174018396969911e-05, "loss": 1.1659, "step": 18870 }, { "epoch": 0.43083382775774726, "grad_norm": 2.984375, "learning_rate": 1.1735478862305032e-05, "loss": 1.2361, "step": 18880 }, { "epoch": 0.43106202364109353, "grad_norm": 2.890625, "learning_rate": 1.1730773754910956e-05, "loss": 1.2098, "step": 18890 }, { "epoch": 0.4312902195244398, "grad_norm": 3.96875, "learning_rate": 1.172606864751688e-05, "loss": 1.2084, "step": 18900 }, { "epoch": 0.43151841540778607, "grad_norm": 3.0, "learning_rate": 1.1721363540122803e-05, "loss": 1.2031, "step": 18910 }, { "epoch": 0.4317466112911323, "grad_norm": 2.9375, "learning_rate": 1.1716658432728727e-05, "loss": 1.1708, "step": 18920 }, { "epoch": 0.43197480717447856, "grad_norm": 3.171875, "learning_rate": 1.171195332533465e-05, "loss": 1.2127, "step": 18930 }, { "epoch": 0.43220300305782483, "grad_norm": 3.25, "learning_rate": 1.1707248217940574e-05, "loss": 1.1885, "step": 18940 }, { "epoch": 0.4324311989411711, "grad_norm": 2.859375, "learning_rate": 1.1702543110546498e-05, "loss": 1.2142, "step": 18950 }, { "epoch": 0.4326593948245174, "grad_norm": 2.984375, "learning_rate": 1.1697838003152422e-05, "loss": 1.17, "step": 18960 }, { "epoch": 0.43288759070786365, "grad_norm": 3.234375, "learning_rate": 1.1693132895758346e-05, "loss": 1.1718, "step": 18970 }, { "epoch": 0.4331157865912099, "grad_norm": 3.0, "learning_rate": 1.168842778836427e-05, "loss": 1.2227, "step": 18980 }, { "epoch": 0.43334398247455613, "grad_norm": 2.84375, "learning_rate": 1.1683722680970193e-05, "loss": 1.1726, "step": 18990 }, { "epoch": 0.4335721783579024, "grad_norm": 2.984375, "learning_rate": 1.1679017573576117e-05, "loss": 1.2254, "step": 19000 }, { "epoch": 0.4338003742412487, "grad_norm": 3.171875, "learning_rate": 1.167431246618204e-05, "loss": 1.2047, "step": 19010 }, { "epoch": 0.43402857012459495, "grad_norm": 3.21875, "learning_rate": 1.1669607358787964e-05, "loss": 1.2149, "step": 19020 }, { "epoch": 0.4342567660079412, "grad_norm": 3.015625, "learning_rate": 1.1664902251393888e-05, "loss": 1.1431, "step": 19030 }, { "epoch": 0.4344849618912875, "grad_norm": 3.171875, "learning_rate": 1.1660197143999812e-05, "loss": 1.1877, "step": 19040 }, { "epoch": 0.43471315777463376, "grad_norm": 3.125, "learning_rate": 1.1655492036605735e-05, "loss": 1.1537, "step": 19050 }, { "epoch": 0.43494135365798003, "grad_norm": 3.703125, "learning_rate": 1.1650786929211659e-05, "loss": 1.2254, "step": 19060 }, { "epoch": 0.43516954954132625, "grad_norm": 3.09375, "learning_rate": 1.1646081821817583e-05, "loss": 1.1734, "step": 19070 }, { "epoch": 0.4353977454246725, "grad_norm": 2.953125, "learning_rate": 1.1641376714423506e-05, "loss": 1.203, "step": 19080 }, { "epoch": 0.4356259413080188, "grad_norm": 3.140625, "learning_rate": 1.163667160702943e-05, "loss": 1.1626, "step": 19090 }, { "epoch": 0.43585413719136507, "grad_norm": 3.046875, "learning_rate": 1.1631966499635354e-05, "loss": 1.1942, "step": 19100 }, { "epoch": 0.43608233307471134, "grad_norm": 3.09375, "learning_rate": 1.1627261392241278e-05, "loss": 1.2225, "step": 19110 }, { "epoch": 0.4363105289580576, "grad_norm": 3.265625, "learning_rate": 1.1622556284847201e-05, "loss": 1.192, "step": 19120 }, { "epoch": 0.4365387248414039, "grad_norm": 2.796875, "learning_rate": 1.1617851177453125e-05, "loss": 1.1856, "step": 19130 }, { "epoch": 0.43676692072475015, "grad_norm": 3.1875, "learning_rate": 1.1613146070059049e-05, "loss": 1.1835, "step": 19140 }, { "epoch": 0.43699511660809637, "grad_norm": 3.1875, "learning_rate": 1.1608440962664972e-05, "loss": 1.1669, "step": 19150 }, { "epoch": 0.43722331249144264, "grad_norm": 3.203125, "learning_rate": 1.1603735855270896e-05, "loss": 1.2184, "step": 19160 }, { "epoch": 0.4374515083747889, "grad_norm": 3.03125, "learning_rate": 1.159903074787682e-05, "loss": 1.1657, "step": 19170 }, { "epoch": 0.4376797042581352, "grad_norm": 2.84375, "learning_rate": 1.1594325640482743e-05, "loss": 1.1671, "step": 19180 }, { "epoch": 0.43790790014148145, "grad_norm": 2.984375, "learning_rate": 1.1589620533088667e-05, "loss": 1.2467, "step": 19190 }, { "epoch": 0.4381360960248277, "grad_norm": 2.734375, "learning_rate": 1.1584915425694591e-05, "loss": 1.1184, "step": 19200 }, { "epoch": 0.438364291908174, "grad_norm": 2.90625, "learning_rate": 1.1580210318300515e-05, "loss": 1.2081, "step": 19210 }, { "epoch": 0.4385924877915202, "grad_norm": 3.703125, "learning_rate": 1.1575505210906438e-05, "loss": 1.1613, "step": 19220 }, { "epoch": 0.4388206836748665, "grad_norm": 3.375, "learning_rate": 1.1570800103512364e-05, "loss": 1.186, "step": 19230 }, { "epoch": 0.43904887955821276, "grad_norm": 3.625, "learning_rate": 1.1566094996118287e-05, "loss": 1.1795, "step": 19240 }, { "epoch": 0.43927707544155903, "grad_norm": 3.15625, "learning_rate": 1.1561389888724211e-05, "loss": 1.2347, "step": 19250 }, { "epoch": 0.4395052713249053, "grad_norm": 3.046875, "learning_rate": 1.1556684781330135e-05, "loss": 1.2002, "step": 19260 }, { "epoch": 0.43973346720825157, "grad_norm": 3.40625, "learning_rate": 1.1551979673936059e-05, "loss": 1.2053, "step": 19270 }, { "epoch": 0.43996166309159784, "grad_norm": 3.28125, "learning_rate": 1.1547274566541982e-05, "loss": 1.1783, "step": 19280 }, { "epoch": 0.4401898589749441, "grad_norm": 3.078125, "learning_rate": 1.1542569459147906e-05, "loss": 1.2048, "step": 19290 }, { "epoch": 0.44041805485829033, "grad_norm": 2.984375, "learning_rate": 1.153786435175383e-05, "loss": 1.233, "step": 19300 }, { "epoch": 0.4406462507416366, "grad_norm": 2.96875, "learning_rate": 1.1533159244359753e-05, "loss": 1.2006, "step": 19310 }, { "epoch": 0.4408744466249829, "grad_norm": 2.9375, "learning_rate": 1.1528454136965677e-05, "loss": 1.1704, "step": 19320 }, { "epoch": 0.44110264250832915, "grad_norm": 3.0625, "learning_rate": 1.1523749029571601e-05, "loss": 1.1712, "step": 19330 }, { "epoch": 0.4413308383916754, "grad_norm": 3.328125, "learning_rate": 1.1519043922177525e-05, "loss": 1.208, "step": 19340 }, { "epoch": 0.4415590342750217, "grad_norm": 2.96875, "learning_rate": 1.1514338814783448e-05, "loss": 1.2004, "step": 19350 }, { "epoch": 0.44178723015836796, "grad_norm": 3.140625, "learning_rate": 1.1509633707389372e-05, "loss": 1.1869, "step": 19360 }, { "epoch": 0.44201542604171423, "grad_norm": 2.828125, "learning_rate": 1.1504928599995296e-05, "loss": 1.2113, "step": 19370 }, { "epoch": 0.44224362192506045, "grad_norm": 3.015625, "learning_rate": 1.150022349260122e-05, "loss": 1.1501, "step": 19380 }, { "epoch": 0.4424718178084067, "grad_norm": 3.125, "learning_rate": 1.1495518385207143e-05, "loss": 1.1711, "step": 19390 }, { "epoch": 0.442700013691753, "grad_norm": 3.1875, "learning_rate": 1.1490813277813067e-05, "loss": 1.2138, "step": 19400 }, { "epoch": 0.44292820957509926, "grad_norm": 3.15625, "learning_rate": 1.148610817041899e-05, "loss": 1.2578, "step": 19410 }, { "epoch": 0.44315640545844553, "grad_norm": 3.015625, "learning_rate": 1.1481403063024914e-05, "loss": 1.1842, "step": 19420 }, { "epoch": 0.4433846013417918, "grad_norm": 3.140625, "learning_rate": 1.1476697955630838e-05, "loss": 1.2109, "step": 19430 }, { "epoch": 0.4436127972251381, "grad_norm": 3.109375, "learning_rate": 1.1471992848236762e-05, "loss": 1.1795, "step": 19440 }, { "epoch": 0.44384099310848435, "grad_norm": 2.828125, "learning_rate": 1.1467287740842685e-05, "loss": 1.2428, "step": 19450 }, { "epoch": 0.44406918899183057, "grad_norm": 3.109375, "learning_rate": 1.1462582633448609e-05, "loss": 1.1561, "step": 19460 }, { "epoch": 0.44429738487517684, "grad_norm": 3.328125, "learning_rate": 1.1457877526054533e-05, "loss": 1.2757, "step": 19470 }, { "epoch": 0.4445255807585231, "grad_norm": 3.203125, "learning_rate": 1.1453172418660457e-05, "loss": 1.2242, "step": 19480 }, { "epoch": 0.4447537766418694, "grad_norm": 3.03125, "learning_rate": 1.144846731126638e-05, "loss": 1.2187, "step": 19490 }, { "epoch": 0.44498197252521565, "grad_norm": 3.03125, "learning_rate": 1.1443762203872304e-05, "loss": 1.209, "step": 19500 }, { "epoch": 0.4452101684085619, "grad_norm": 3.078125, "learning_rate": 1.1439057096478228e-05, "loss": 1.1854, "step": 19510 }, { "epoch": 0.4454383642919082, "grad_norm": 3.1875, "learning_rate": 1.1434351989084151e-05, "loss": 1.2314, "step": 19520 }, { "epoch": 0.4456665601752544, "grad_norm": 3.796875, "learning_rate": 1.1429646881690075e-05, "loss": 1.2632, "step": 19530 }, { "epoch": 0.4458947560586007, "grad_norm": 3.15625, "learning_rate": 1.1424941774295999e-05, "loss": 1.2216, "step": 19540 }, { "epoch": 0.44612295194194695, "grad_norm": 3.0625, "learning_rate": 1.1420236666901923e-05, "loss": 1.2109, "step": 19550 }, { "epoch": 0.4463511478252932, "grad_norm": 3.171875, "learning_rate": 1.1415531559507846e-05, "loss": 1.2006, "step": 19560 }, { "epoch": 0.4465793437086395, "grad_norm": 2.875, "learning_rate": 1.141082645211377e-05, "loss": 1.1739, "step": 19570 }, { "epoch": 0.44680753959198577, "grad_norm": 3.203125, "learning_rate": 1.1406121344719694e-05, "loss": 1.2249, "step": 19580 }, { "epoch": 0.44703573547533204, "grad_norm": 2.90625, "learning_rate": 1.1401416237325617e-05, "loss": 1.1932, "step": 19590 }, { "epoch": 0.4472639313586783, "grad_norm": 3.09375, "learning_rate": 1.1396711129931541e-05, "loss": 1.1845, "step": 19600 }, { "epoch": 0.44749212724202453, "grad_norm": 3.46875, "learning_rate": 1.1392006022537465e-05, "loss": 1.1927, "step": 19610 }, { "epoch": 0.4477203231253708, "grad_norm": 3.234375, "learning_rate": 1.1387300915143389e-05, "loss": 1.2246, "step": 19620 }, { "epoch": 0.44794851900871707, "grad_norm": 3.40625, "learning_rate": 1.1382595807749312e-05, "loss": 1.1443, "step": 19630 }, { "epoch": 0.44817671489206334, "grad_norm": 3.078125, "learning_rate": 1.1377890700355236e-05, "loss": 1.1713, "step": 19640 }, { "epoch": 0.4484049107754096, "grad_norm": 3.09375, "learning_rate": 1.137318559296116e-05, "loss": 1.2166, "step": 19650 }, { "epoch": 0.4486331066587559, "grad_norm": 2.984375, "learning_rate": 1.1368480485567083e-05, "loss": 1.1685, "step": 19660 }, { "epoch": 0.44886130254210216, "grad_norm": 2.859375, "learning_rate": 1.1363775378173007e-05, "loss": 1.1804, "step": 19670 }, { "epoch": 0.44908949842544843, "grad_norm": 3.15625, "learning_rate": 1.135907027077893e-05, "loss": 1.1702, "step": 19680 }, { "epoch": 0.44931769430879465, "grad_norm": 3.0625, "learning_rate": 1.1354365163384855e-05, "loss": 1.18, "step": 19690 }, { "epoch": 0.4495458901921409, "grad_norm": 3.328125, "learning_rate": 1.1349660055990778e-05, "loss": 1.2117, "step": 19700 }, { "epoch": 0.4497740860754872, "grad_norm": 3.25, "learning_rate": 1.1344954948596702e-05, "loss": 1.164, "step": 19710 }, { "epoch": 0.45000228195883346, "grad_norm": 3.28125, "learning_rate": 1.1340249841202626e-05, "loss": 1.2209, "step": 19720 }, { "epoch": 0.45023047784217973, "grad_norm": 3.484375, "learning_rate": 1.133554473380855e-05, "loss": 1.1876, "step": 19730 }, { "epoch": 0.450458673725526, "grad_norm": 3.328125, "learning_rate": 1.1330839626414473e-05, "loss": 1.1777, "step": 19740 }, { "epoch": 0.4506868696088723, "grad_norm": 3.03125, "learning_rate": 1.1326134519020397e-05, "loss": 1.1754, "step": 19750 }, { "epoch": 0.45091506549221855, "grad_norm": 3.09375, "learning_rate": 1.132142941162632e-05, "loss": 1.1656, "step": 19760 }, { "epoch": 0.45114326137556476, "grad_norm": 3.046875, "learning_rate": 1.1316724304232244e-05, "loss": 1.1942, "step": 19770 }, { "epoch": 0.45137145725891104, "grad_norm": 3.09375, "learning_rate": 1.1312019196838168e-05, "loss": 1.1456, "step": 19780 }, { "epoch": 0.4515996531422573, "grad_norm": 2.828125, "learning_rate": 1.1307314089444092e-05, "loss": 1.2239, "step": 19790 }, { "epoch": 0.4518278490256036, "grad_norm": 2.9375, "learning_rate": 1.1302608982050017e-05, "loss": 1.2019, "step": 19800 }, { "epoch": 0.45205604490894985, "grad_norm": 3.234375, "learning_rate": 1.129790387465594e-05, "loss": 1.1859, "step": 19810 }, { "epoch": 0.4522842407922961, "grad_norm": 3.046875, "learning_rate": 1.1293198767261864e-05, "loss": 1.157, "step": 19820 }, { "epoch": 0.4525124366756424, "grad_norm": 3.109375, "learning_rate": 1.1288493659867788e-05, "loss": 1.2025, "step": 19830 }, { "epoch": 0.4527406325589886, "grad_norm": 2.90625, "learning_rate": 1.1283788552473712e-05, "loss": 1.1781, "step": 19840 }, { "epoch": 0.4529688284423349, "grad_norm": 2.921875, "learning_rate": 1.1279083445079636e-05, "loss": 1.2091, "step": 19850 }, { "epoch": 0.45319702432568115, "grad_norm": 3.390625, "learning_rate": 1.127437833768556e-05, "loss": 1.201, "step": 19860 }, { "epoch": 0.4534252202090274, "grad_norm": 3.171875, "learning_rate": 1.1269673230291483e-05, "loss": 1.2125, "step": 19870 }, { "epoch": 0.4536534160923737, "grad_norm": 3.234375, "learning_rate": 1.1264968122897407e-05, "loss": 1.2361, "step": 19880 }, { "epoch": 0.45388161197571997, "grad_norm": 3.140625, "learning_rate": 1.126026301550333e-05, "loss": 1.1994, "step": 19890 }, { "epoch": 0.45410980785906624, "grad_norm": 2.78125, "learning_rate": 1.1255557908109254e-05, "loss": 1.2358, "step": 19900 }, { "epoch": 0.4543380037424125, "grad_norm": 3.375, "learning_rate": 1.1250852800715178e-05, "loss": 1.2054, "step": 19910 }, { "epoch": 0.4545661996257587, "grad_norm": 3.0, "learning_rate": 1.1246147693321102e-05, "loss": 1.1829, "step": 19920 }, { "epoch": 0.454794395509105, "grad_norm": 3.0625, "learning_rate": 1.1241442585927025e-05, "loss": 1.179, "step": 19930 }, { "epoch": 0.45502259139245127, "grad_norm": 3.109375, "learning_rate": 1.1236737478532949e-05, "loss": 1.179, "step": 19940 }, { "epoch": 0.45525078727579754, "grad_norm": 3.03125, "learning_rate": 1.1232032371138873e-05, "loss": 1.1881, "step": 19950 }, { "epoch": 0.4554789831591438, "grad_norm": 3.140625, "learning_rate": 1.1227327263744796e-05, "loss": 1.1692, "step": 19960 }, { "epoch": 0.4557071790424901, "grad_norm": 3.09375, "learning_rate": 1.122262215635072e-05, "loss": 1.1412, "step": 19970 }, { "epoch": 0.45593537492583636, "grad_norm": 3.15625, "learning_rate": 1.1217917048956644e-05, "loss": 1.1652, "step": 19980 }, { "epoch": 0.45616357080918263, "grad_norm": 3.171875, "learning_rate": 1.1213211941562568e-05, "loss": 1.127, "step": 19990 }, { "epoch": 0.45639176669252884, "grad_norm": 3.296875, "learning_rate": 1.1208506834168491e-05, "loss": 1.2107, "step": 20000 }, { "epoch": 0.4566199625758751, "grad_norm": 3.03125, "learning_rate": 1.1203801726774415e-05, "loss": 1.1498, "step": 20010 }, { "epoch": 0.4568481584592214, "grad_norm": 2.9375, "learning_rate": 1.1199096619380339e-05, "loss": 1.2078, "step": 20020 }, { "epoch": 0.45707635434256766, "grad_norm": 2.9375, "learning_rate": 1.1194391511986262e-05, "loss": 1.175, "step": 20030 }, { "epoch": 0.45730455022591393, "grad_norm": 3.390625, "learning_rate": 1.1189686404592186e-05, "loss": 1.2801, "step": 20040 }, { "epoch": 0.4575327461092602, "grad_norm": 3.234375, "learning_rate": 1.118498129719811e-05, "loss": 1.2289, "step": 20050 }, { "epoch": 0.4577609419926065, "grad_norm": 3.203125, "learning_rate": 1.1180276189804034e-05, "loss": 1.179, "step": 20060 }, { "epoch": 0.45798913787595275, "grad_norm": 3.109375, "learning_rate": 1.1175571082409957e-05, "loss": 1.2351, "step": 20070 }, { "epoch": 0.45821733375929896, "grad_norm": 3.046875, "learning_rate": 1.1170865975015881e-05, "loss": 1.1801, "step": 20080 }, { "epoch": 0.45844552964264523, "grad_norm": 3.234375, "learning_rate": 1.1166160867621805e-05, "loss": 1.1623, "step": 20090 }, { "epoch": 0.4586737255259915, "grad_norm": 2.953125, "learning_rate": 1.1161455760227728e-05, "loss": 1.1873, "step": 20100 }, { "epoch": 0.4589019214093378, "grad_norm": 3.25, "learning_rate": 1.1156750652833652e-05, "loss": 1.1697, "step": 20110 }, { "epoch": 0.45913011729268405, "grad_norm": 3.015625, "learning_rate": 1.1152045545439576e-05, "loss": 1.2319, "step": 20120 }, { "epoch": 0.4593583131760303, "grad_norm": 3.0625, "learning_rate": 1.11473404380455e-05, "loss": 1.1543, "step": 20130 }, { "epoch": 0.4595865090593766, "grad_norm": 3.3125, "learning_rate": 1.1142635330651423e-05, "loss": 1.2516, "step": 20140 }, { "epoch": 0.4598147049427228, "grad_norm": 2.875, "learning_rate": 1.1137930223257347e-05, "loss": 1.1923, "step": 20150 }, { "epoch": 0.4600429008260691, "grad_norm": 3.15625, "learning_rate": 1.113322511586327e-05, "loss": 1.154, "step": 20160 }, { "epoch": 0.46027109670941535, "grad_norm": 3.296875, "learning_rate": 1.1128520008469194e-05, "loss": 1.1847, "step": 20170 }, { "epoch": 0.4604992925927616, "grad_norm": 2.921875, "learning_rate": 1.1123814901075118e-05, "loss": 1.2058, "step": 20180 }, { "epoch": 0.4607274884761079, "grad_norm": 3.390625, "learning_rate": 1.1119109793681042e-05, "loss": 1.1677, "step": 20190 }, { "epoch": 0.46095568435945417, "grad_norm": 3.140625, "learning_rate": 1.1114404686286966e-05, "loss": 1.211, "step": 20200 }, { "epoch": 0.46118388024280044, "grad_norm": 2.859375, "learning_rate": 1.110969957889289e-05, "loss": 1.1993, "step": 20210 }, { "epoch": 0.4614120761261467, "grad_norm": 3.375, "learning_rate": 1.1104994471498813e-05, "loss": 1.1172, "step": 20220 }, { "epoch": 0.4616402720094929, "grad_norm": 3.28125, "learning_rate": 1.1100289364104737e-05, "loss": 1.2414, "step": 20230 }, { "epoch": 0.4618684678928392, "grad_norm": 3.265625, "learning_rate": 1.109558425671066e-05, "loss": 1.251, "step": 20240 }, { "epoch": 0.46209666377618547, "grad_norm": 3.078125, "learning_rate": 1.1090879149316584e-05, "loss": 1.1799, "step": 20250 }, { "epoch": 0.46232485965953174, "grad_norm": 2.859375, "learning_rate": 1.1086174041922508e-05, "loss": 1.1637, "step": 20260 }, { "epoch": 0.462553055542878, "grad_norm": 3.65625, "learning_rate": 1.1081468934528432e-05, "loss": 1.1311, "step": 20270 }, { "epoch": 0.4627812514262243, "grad_norm": 3.0, "learning_rate": 1.1076763827134355e-05, "loss": 1.2163, "step": 20280 }, { "epoch": 0.46300944730957055, "grad_norm": 2.84375, "learning_rate": 1.1072058719740279e-05, "loss": 1.2106, "step": 20290 }, { "epoch": 0.4632376431929168, "grad_norm": 2.9375, "learning_rate": 1.1067353612346203e-05, "loss": 1.1607, "step": 20300 }, { "epoch": 0.46346583907626304, "grad_norm": 3.484375, "learning_rate": 1.1062648504952126e-05, "loss": 1.2217, "step": 20310 }, { "epoch": 0.4636940349596093, "grad_norm": 2.984375, "learning_rate": 1.105794339755805e-05, "loss": 1.1702, "step": 20320 }, { "epoch": 0.4639222308429556, "grad_norm": 2.953125, "learning_rate": 1.1053238290163974e-05, "loss": 1.1422, "step": 20330 }, { "epoch": 0.46415042672630186, "grad_norm": 2.96875, "learning_rate": 1.1048533182769898e-05, "loss": 1.1669, "step": 20340 }, { "epoch": 0.46437862260964813, "grad_norm": 3.171875, "learning_rate": 1.1043828075375821e-05, "loss": 1.2771, "step": 20350 }, { "epoch": 0.4646068184929944, "grad_norm": 3.0, "learning_rate": 1.1039122967981745e-05, "loss": 1.1827, "step": 20360 }, { "epoch": 0.46483501437634067, "grad_norm": 3.21875, "learning_rate": 1.1034417860587669e-05, "loss": 1.2069, "step": 20370 }, { "epoch": 0.4650632102596869, "grad_norm": 3.15625, "learning_rate": 1.1029712753193594e-05, "loss": 1.2365, "step": 20380 }, { "epoch": 0.46529140614303316, "grad_norm": 3.296875, "learning_rate": 1.1025007645799518e-05, "loss": 1.1643, "step": 20390 }, { "epoch": 0.46551960202637943, "grad_norm": 3.40625, "learning_rate": 1.1020302538405441e-05, "loss": 1.2117, "step": 20400 }, { "epoch": 0.4657477979097257, "grad_norm": 3.171875, "learning_rate": 1.1015597431011365e-05, "loss": 1.2296, "step": 20410 }, { "epoch": 0.465975993793072, "grad_norm": 2.859375, "learning_rate": 1.1010892323617289e-05, "loss": 1.1658, "step": 20420 }, { "epoch": 0.46620418967641825, "grad_norm": 3.09375, "learning_rate": 1.1006187216223213e-05, "loss": 1.1877, "step": 20430 }, { "epoch": 0.4664323855597645, "grad_norm": 2.953125, "learning_rate": 1.1001482108829136e-05, "loss": 1.1795, "step": 20440 }, { "epoch": 0.4666605814431108, "grad_norm": 3.203125, "learning_rate": 1.099677700143506e-05, "loss": 1.2146, "step": 20450 }, { "epoch": 0.466888777326457, "grad_norm": 3.3125, "learning_rate": 1.0992071894040984e-05, "loss": 1.226, "step": 20460 }, { "epoch": 0.4671169732098033, "grad_norm": 3.25, "learning_rate": 1.0987366786646907e-05, "loss": 1.1867, "step": 20470 }, { "epoch": 0.46734516909314955, "grad_norm": 3.296875, "learning_rate": 1.0982661679252831e-05, "loss": 1.1652, "step": 20480 }, { "epoch": 0.4675733649764958, "grad_norm": 3.0, "learning_rate": 1.0977956571858755e-05, "loss": 1.2671, "step": 20490 }, { "epoch": 0.4678015608598421, "grad_norm": 3.171875, "learning_rate": 1.0973251464464679e-05, "loss": 1.206, "step": 20500 }, { "epoch": 0.46802975674318836, "grad_norm": 3.09375, "learning_rate": 1.0968546357070602e-05, "loss": 1.1853, "step": 20510 }, { "epoch": 0.46825795262653463, "grad_norm": 3.125, "learning_rate": 1.0963841249676526e-05, "loss": 1.2295, "step": 20520 }, { "epoch": 0.4684861485098809, "grad_norm": 3.171875, "learning_rate": 1.095913614228245e-05, "loss": 1.1413, "step": 20530 }, { "epoch": 0.4687143443932271, "grad_norm": 3.140625, "learning_rate": 1.0954431034888373e-05, "loss": 1.2104, "step": 20540 }, { "epoch": 0.4689425402765734, "grad_norm": 3.046875, "learning_rate": 1.0949725927494297e-05, "loss": 1.2009, "step": 20550 }, { "epoch": 0.46917073615991967, "grad_norm": 3.109375, "learning_rate": 1.0945020820100221e-05, "loss": 1.1869, "step": 20560 }, { "epoch": 0.46939893204326594, "grad_norm": 3.34375, "learning_rate": 1.0940315712706145e-05, "loss": 1.1954, "step": 20570 }, { "epoch": 0.4696271279266122, "grad_norm": 3.140625, "learning_rate": 1.0935610605312068e-05, "loss": 1.1961, "step": 20580 }, { "epoch": 0.4698553238099585, "grad_norm": 3.359375, "learning_rate": 1.0930905497917992e-05, "loss": 1.2336, "step": 20590 }, { "epoch": 0.47008351969330475, "grad_norm": 2.984375, "learning_rate": 1.0926200390523916e-05, "loss": 1.1844, "step": 20600 }, { "epoch": 0.470311715576651, "grad_norm": 2.90625, "learning_rate": 1.092149528312984e-05, "loss": 1.1732, "step": 20610 }, { "epoch": 0.47053991145999724, "grad_norm": 3.28125, "learning_rate": 1.0916790175735763e-05, "loss": 1.1617, "step": 20620 }, { "epoch": 0.4707681073433435, "grad_norm": 3.375, "learning_rate": 1.0912085068341687e-05, "loss": 1.2161, "step": 20630 }, { "epoch": 0.4709963032266898, "grad_norm": 3.0, "learning_rate": 1.090737996094761e-05, "loss": 1.1784, "step": 20640 }, { "epoch": 0.47122449911003605, "grad_norm": 3.234375, "learning_rate": 1.0902674853553534e-05, "loss": 1.2284, "step": 20650 }, { "epoch": 0.4714526949933823, "grad_norm": 3.21875, "learning_rate": 1.0897969746159458e-05, "loss": 1.1903, "step": 20660 }, { "epoch": 0.4716808908767286, "grad_norm": 3.6875, "learning_rate": 1.0893264638765382e-05, "loss": 1.1779, "step": 20670 }, { "epoch": 0.47190908676007487, "grad_norm": 3.328125, "learning_rate": 1.0888559531371305e-05, "loss": 1.1971, "step": 20680 }, { "epoch": 0.4721372826434211, "grad_norm": 3.015625, "learning_rate": 1.088385442397723e-05, "loss": 1.1631, "step": 20690 }, { "epoch": 0.47236547852676736, "grad_norm": 3.015625, "learning_rate": 1.0879149316583153e-05, "loss": 1.1239, "step": 20700 }, { "epoch": 0.47259367441011363, "grad_norm": 3.21875, "learning_rate": 1.0874444209189077e-05, "loss": 1.1815, "step": 20710 }, { "epoch": 0.4728218702934599, "grad_norm": 2.90625, "learning_rate": 1.0869739101795e-05, "loss": 1.2071, "step": 20720 }, { "epoch": 0.47305006617680617, "grad_norm": 3.34375, "learning_rate": 1.0865033994400922e-05, "loss": 1.1838, "step": 20730 }, { "epoch": 0.47327826206015244, "grad_norm": 3.28125, "learning_rate": 1.0860328887006846e-05, "loss": 1.1167, "step": 20740 }, { "epoch": 0.4735064579434987, "grad_norm": 3.03125, "learning_rate": 1.085562377961277e-05, "loss": 1.1515, "step": 20750 }, { "epoch": 0.473734653826845, "grad_norm": 3.078125, "learning_rate": 1.0850918672218693e-05, "loss": 1.1787, "step": 20760 }, { "epoch": 0.4739628497101912, "grad_norm": 3.0, "learning_rate": 1.0846213564824617e-05, "loss": 1.173, "step": 20770 }, { "epoch": 0.4741910455935375, "grad_norm": 3.140625, "learning_rate": 1.0841508457430541e-05, "loss": 1.206, "step": 20780 }, { "epoch": 0.47441924147688375, "grad_norm": 3.40625, "learning_rate": 1.0836803350036465e-05, "loss": 1.1953, "step": 20790 }, { "epoch": 0.47464743736023, "grad_norm": 2.859375, "learning_rate": 1.0832098242642388e-05, "loss": 1.1485, "step": 20800 }, { "epoch": 0.4748756332435763, "grad_norm": 3.1875, "learning_rate": 1.0827393135248312e-05, "loss": 1.1831, "step": 20810 }, { "epoch": 0.47510382912692256, "grad_norm": 3.640625, "learning_rate": 1.0822688027854236e-05, "loss": 1.1935, "step": 20820 }, { "epoch": 0.47533202501026883, "grad_norm": 3.203125, "learning_rate": 1.081798292046016e-05, "loss": 1.1049, "step": 20830 }, { "epoch": 0.4755602208936151, "grad_norm": 3.140625, "learning_rate": 1.0813277813066083e-05, "loss": 1.1454, "step": 20840 }, { "epoch": 0.4757884167769613, "grad_norm": 3.0625, "learning_rate": 1.0808572705672007e-05, "loss": 1.1724, "step": 20850 }, { "epoch": 0.4760166126603076, "grad_norm": 2.9375, "learning_rate": 1.080386759827793e-05, "loss": 1.2098, "step": 20860 }, { "epoch": 0.47624480854365386, "grad_norm": 2.9375, "learning_rate": 1.0799162490883854e-05, "loss": 1.1882, "step": 20870 }, { "epoch": 0.47647300442700014, "grad_norm": 2.921875, "learning_rate": 1.0794457383489778e-05, "loss": 1.1985, "step": 20880 }, { "epoch": 0.4767012003103464, "grad_norm": 3.375, "learning_rate": 1.0789752276095702e-05, "loss": 1.2099, "step": 20890 }, { "epoch": 0.4769293961936927, "grad_norm": 3.109375, "learning_rate": 1.0785047168701625e-05, "loss": 1.2056, "step": 20900 }, { "epoch": 0.47715759207703895, "grad_norm": 2.96875, "learning_rate": 1.0780342061307549e-05, "loss": 1.2291, "step": 20910 }, { "epoch": 0.4773857879603852, "grad_norm": 3.09375, "learning_rate": 1.0775636953913473e-05, "loss": 1.1504, "step": 20920 }, { "epoch": 0.47761398384373144, "grad_norm": 3.390625, "learning_rate": 1.0770931846519397e-05, "loss": 1.247, "step": 20930 }, { "epoch": 0.4778421797270777, "grad_norm": 3.125, "learning_rate": 1.076622673912532e-05, "loss": 1.1403, "step": 20940 }, { "epoch": 0.478070375610424, "grad_norm": 3.359375, "learning_rate": 1.0761521631731244e-05, "loss": 1.2189, "step": 20950 }, { "epoch": 0.47829857149377025, "grad_norm": 3.015625, "learning_rate": 1.0756816524337168e-05, "loss": 1.1905, "step": 20960 }, { "epoch": 0.4785267673771165, "grad_norm": 2.953125, "learning_rate": 1.0752111416943091e-05, "loss": 1.148, "step": 20970 }, { "epoch": 0.4787549632604628, "grad_norm": 3.109375, "learning_rate": 1.0747406309549015e-05, "loss": 1.2455, "step": 20980 }, { "epoch": 0.47898315914380907, "grad_norm": 2.96875, "learning_rate": 1.0742701202154939e-05, "loss": 1.2109, "step": 20990 }, { "epoch": 0.4792113550271553, "grad_norm": 3.140625, "learning_rate": 1.0737996094760863e-05, "loss": 1.2105, "step": 21000 }, { "epoch": 0.47943955091050156, "grad_norm": 2.984375, "learning_rate": 1.0733290987366786e-05, "loss": 1.2342, "step": 21010 }, { "epoch": 0.4796677467938478, "grad_norm": 3.359375, "learning_rate": 1.072858587997271e-05, "loss": 1.2238, "step": 21020 }, { "epoch": 0.4798959426771941, "grad_norm": 3.375, "learning_rate": 1.0723880772578634e-05, "loss": 1.1651, "step": 21030 }, { "epoch": 0.48012413856054037, "grad_norm": 2.96875, "learning_rate": 1.0719175665184557e-05, "loss": 1.176, "step": 21040 }, { "epoch": 0.48035233444388664, "grad_norm": 3.1875, "learning_rate": 1.0714470557790481e-05, "loss": 1.1214, "step": 21050 }, { "epoch": 0.4805805303272329, "grad_norm": 3.046875, "learning_rate": 1.0709765450396405e-05, "loss": 1.1954, "step": 21060 }, { "epoch": 0.4808087262105792, "grad_norm": 2.96875, "learning_rate": 1.0705060343002329e-05, "loss": 1.1867, "step": 21070 }, { "epoch": 0.4810369220939254, "grad_norm": 3.328125, "learning_rate": 1.0700355235608252e-05, "loss": 1.1699, "step": 21080 }, { "epoch": 0.4812651179772717, "grad_norm": 2.890625, "learning_rate": 1.0695650128214176e-05, "loss": 1.1709, "step": 21090 }, { "epoch": 0.48149331386061794, "grad_norm": 3.28125, "learning_rate": 1.06909450208201e-05, "loss": 1.2597, "step": 21100 }, { "epoch": 0.4817215097439642, "grad_norm": 3.59375, "learning_rate": 1.0686239913426023e-05, "loss": 1.2044, "step": 21110 }, { "epoch": 0.4819497056273105, "grad_norm": 3.453125, "learning_rate": 1.0681534806031947e-05, "loss": 1.2003, "step": 21120 }, { "epoch": 0.48217790151065676, "grad_norm": 2.953125, "learning_rate": 1.0676829698637873e-05, "loss": 1.1694, "step": 21130 }, { "epoch": 0.48240609739400303, "grad_norm": 3.09375, "learning_rate": 1.0672124591243796e-05, "loss": 1.1957, "step": 21140 }, { "epoch": 0.4826342932773493, "grad_norm": 3.28125, "learning_rate": 1.066741948384972e-05, "loss": 1.182, "step": 21150 }, { "epoch": 0.4828624891606955, "grad_norm": 3.21875, "learning_rate": 1.0662714376455644e-05, "loss": 1.1409, "step": 21160 }, { "epoch": 0.4830906850440418, "grad_norm": 3.125, "learning_rate": 1.0658009269061567e-05, "loss": 1.1581, "step": 21170 }, { "epoch": 0.48331888092738806, "grad_norm": 3.015625, "learning_rate": 1.0653304161667491e-05, "loss": 1.1866, "step": 21180 }, { "epoch": 0.48354707681073433, "grad_norm": 3.25, "learning_rate": 1.0648599054273415e-05, "loss": 1.1784, "step": 21190 }, { "epoch": 0.4837752726940806, "grad_norm": 2.953125, "learning_rate": 1.0643893946879338e-05, "loss": 1.2022, "step": 21200 }, { "epoch": 0.4840034685774269, "grad_norm": 3.25, "learning_rate": 1.0639188839485262e-05, "loss": 1.2429, "step": 21210 }, { "epoch": 0.48423166446077315, "grad_norm": 3.390625, "learning_rate": 1.0634483732091186e-05, "loss": 1.2021, "step": 21220 }, { "epoch": 0.4844598603441194, "grad_norm": 3.15625, "learning_rate": 1.062977862469711e-05, "loss": 1.1826, "step": 21230 }, { "epoch": 0.48468805622746564, "grad_norm": 3.21875, "learning_rate": 1.0625073517303033e-05, "loss": 1.2665, "step": 21240 }, { "epoch": 0.4849162521108119, "grad_norm": 3.25, "learning_rate": 1.0620368409908957e-05, "loss": 1.1124, "step": 21250 }, { "epoch": 0.4851444479941582, "grad_norm": 3.25, "learning_rate": 1.061566330251488e-05, "loss": 1.1961, "step": 21260 }, { "epoch": 0.48537264387750445, "grad_norm": 3.5, "learning_rate": 1.0610958195120804e-05, "loss": 1.1626, "step": 21270 }, { "epoch": 0.4856008397608507, "grad_norm": 3.015625, "learning_rate": 1.0606253087726728e-05, "loss": 1.2416, "step": 21280 }, { "epoch": 0.485829035644197, "grad_norm": 2.828125, "learning_rate": 1.0601547980332652e-05, "loss": 1.1319, "step": 21290 }, { "epoch": 0.48605723152754327, "grad_norm": 3.28125, "learning_rate": 1.0596842872938576e-05, "loss": 1.2151, "step": 21300 }, { "epoch": 0.4862854274108895, "grad_norm": 3.046875, "learning_rate": 1.05921377655445e-05, "loss": 1.2236, "step": 21310 }, { "epoch": 0.48651362329423575, "grad_norm": 3.4375, "learning_rate": 1.0587432658150423e-05, "loss": 1.2043, "step": 21320 }, { "epoch": 0.486741819177582, "grad_norm": 3.1875, "learning_rate": 1.0582727550756347e-05, "loss": 1.179, "step": 21330 }, { "epoch": 0.4869700150609283, "grad_norm": 2.953125, "learning_rate": 1.057802244336227e-05, "loss": 1.1472, "step": 21340 }, { "epoch": 0.48719821094427457, "grad_norm": 3.0625, "learning_rate": 1.0573317335968194e-05, "loss": 1.1818, "step": 21350 }, { "epoch": 0.48742640682762084, "grad_norm": 3.171875, "learning_rate": 1.0568612228574118e-05, "loss": 1.2285, "step": 21360 }, { "epoch": 0.4876546027109671, "grad_norm": 3.09375, "learning_rate": 1.0563907121180042e-05, "loss": 1.1853, "step": 21370 }, { "epoch": 0.4878827985943134, "grad_norm": 3.171875, "learning_rate": 1.0559202013785965e-05, "loss": 1.1717, "step": 21380 }, { "epoch": 0.4881109944776596, "grad_norm": 3.203125, "learning_rate": 1.0554496906391889e-05, "loss": 1.1674, "step": 21390 }, { "epoch": 0.48833919036100587, "grad_norm": 2.859375, "learning_rate": 1.0549791798997813e-05, "loss": 1.1727, "step": 21400 }, { "epoch": 0.48856738624435214, "grad_norm": 3.21875, "learning_rate": 1.0545086691603736e-05, "loss": 1.1635, "step": 21410 }, { "epoch": 0.4887955821276984, "grad_norm": 3.3125, "learning_rate": 1.054038158420966e-05, "loss": 1.1686, "step": 21420 }, { "epoch": 0.4890237780110447, "grad_norm": 3.125, "learning_rate": 1.0535676476815584e-05, "loss": 1.1537, "step": 21430 }, { "epoch": 0.48925197389439096, "grad_norm": 3.21875, "learning_rate": 1.0530971369421508e-05, "loss": 1.1704, "step": 21440 }, { "epoch": 0.48948016977773723, "grad_norm": 2.8125, "learning_rate": 1.0526266262027431e-05, "loss": 1.1625, "step": 21450 }, { "epoch": 0.4897083656610835, "grad_norm": 3.0625, "learning_rate": 1.0521561154633355e-05, "loss": 1.2308, "step": 21460 }, { "epoch": 0.4899365615444297, "grad_norm": 3.09375, "learning_rate": 1.0516856047239279e-05, "loss": 1.2013, "step": 21470 }, { "epoch": 0.490164757427776, "grad_norm": 3.265625, "learning_rate": 1.0512150939845202e-05, "loss": 1.2345, "step": 21480 }, { "epoch": 0.49039295331112226, "grad_norm": 3.109375, "learning_rate": 1.0507445832451126e-05, "loss": 1.1829, "step": 21490 }, { "epoch": 0.49062114919446853, "grad_norm": 3.078125, "learning_rate": 1.050274072505705e-05, "loss": 1.2147, "step": 21500 }, { "epoch": 0.4908493450778148, "grad_norm": 3.171875, "learning_rate": 1.0498035617662974e-05, "loss": 1.1961, "step": 21510 }, { "epoch": 0.4910775409611611, "grad_norm": 3.0625, "learning_rate": 1.0493330510268897e-05, "loss": 1.1689, "step": 21520 }, { "epoch": 0.49130573684450735, "grad_norm": 3.1875, "learning_rate": 1.0488625402874821e-05, "loss": 1.1826, "step": 21530 }, { "epoch": 0.49153393272785356, "grad_norm": 3.203125, "learning_rate": 1.0483920295480745e-05, "loss": 1.2234, "step": 21540 }, { "epoch": 0.49176212861119983, "grad_norm": 3.25, "learning_rate": 1.0479215188086668e-05, "loss": 1.1556, "step": 21550 }, { "epoch": 0.4919903244945461, "grad_norm": 3.09375, "learning_rate": 1.0474510080692592e-05, "loss": 1.2398, "step": 21560 }, { "epoch": 0.4922185203778924, "grad_norm": 3.203125, "learning_rate": 1.0469804973298516e-05, "loss": 1.1993, "step": 21570 }, { "epoch": 0.49244671626123865, "grad_norm": 3.15625, "learning_rate": 1.046509986590444e-05, "loss": 1.1878, "step": 21580 }, { "epoch": 0.4926749121445849, "grad_norm": 3.3125, "learning_rate": 1.0460394758510363e-05, "loss": 1.1687, "step": 21590 }, { "epoch": 0.4929031080279312, "grad_norm": 3.125, "learning_rate": 1.0455689651116287e-05, "loss": 1.2103, "step": 21600 }, { "epoch": 0.49313130391127746, "grad_norm": 3.265625, "learning_rate": 1.045098454372221e-05, "loss": 1.1904, "step": 21610 }, { "epoch": 0.4933594997946237, "grad_norm": 3.015625, "learning_rate": 1.0446279436328134e-05, "loss": 1.1383, "step": 21620 }, { "epoch": 0.49358769567796995, "grad_norm": 2.84375, "learning_rate": 1.0441574328934058e-05, "loss": 1.2506, "step": 21630 }, { "epoch": 0.4938158915613162, "grad_norm": 3.75, "learning_rate": 1.0436869221539982e-05, "loss": 1.1705, "step": 21640 }, { "epoch": 0.4940440874446625, "grad_norm": 3.109375, "learning_rate": 1.0432164114145906e-05, "loss": 1.1842, "step": 21650 }, { "epoch": 0.49427228332800877, "grad_norm": 3.203125, "learning_rate": 1.042745900675183e-05, "loss": 1.1475, "step": 21660 }, { "epoch": 0.49450047921135504, "grad_norm": 2.9375, "learning_rate": 1.0422753899357753e-05, "loss": 1.1852, "step": 21670 }, { "epoch": 0.4947286750947013, "grad_norm": 3.046875, "learning_rate": 1.0418048791963677e-05, "loss": 1.2278, "step": 21680 }, { "epoch": 0.4949568709780476, "grad_norm": 3.234375, "learning_rate": 1.04133436845696e-05, "loss": 1.1759, "step": 21690 }, { "epoch": 0.4951850668613938, "grad_norm": 3.140625, "learning_rate": 1.0408638577175524e-05, "loss": 1.225, "step": 21700 }, { "epoch": 0.49541326274474007, "grad_norm": 3.078125, "learning_rate": 1.040393346978145e-05, "loss": 1.2385, "step": 21710 }, { "epoch": 0.49564145862808634, "grad_norm": 2.921875, "learning_rate": 1.0399228362387373e-05, "loss": 1.2278, "step": 21720 }, { "epoch": 0.4958696545114326, "grad_norm": 2.96875, "learning_rate": 1.0394523254993297e-05, "loss": 1.1787, "step": 21730 }, { "epoch": 0.4960978503947789, "grad_norm": 3.0, "learning_rate": 1.038981814759922e-05, "loss": 1.1357, "step": 21740 }, { "epoch": 0.49632604627812515, "grad_norm": 3.1875, "learning_rate": 1.0385113040205144e-05, "loss": 1.239, "step": 21750 }, { "epoch": 0.4965542421614714, "grad_norm": 3.34375, "learning_rate": 1.0380407932811068e-05, "loss": 1.2074, "step": 21760 }, { "epoch": 0.4967824380448177, "grad_norm": 2.984375, "learning_rate": 1.0375702825416992e-05, "loss": 1.169, "step": 21770 }, { "epoch": 0.4970106339281639, "grad_norm": 3.515625, "learning_rate": 1.0370997718022916e-05, "loss": 1.1339, "step": 21780 }, { "epoch": 0.4972388298115102, "grad_norm": 3.078125, "learning_rate": 1.036629261062884e-05, "loss": 1.1691, "step": 21790 }, { "epoch": 0.49746702569485646, "grad_norm": 3.15625, "learning_rate": 1.0361587503234763e-05, "loss": 1.1783, "step": 21800 }, { "epoch": 0.49769522157820273, "grad_norm": 3.171875, "learning_rate": 1.0356882395840687e-05, "loss": 1.1801, "step": 21810 }, { "epoch": 0.497923417461549, "grad_norm": 3.25, "learning_rate": 1.035217728844661e-05, "loss": 1.1825, "step": 21820 }, { "epoch": 0.49815161334489527, "grad_norm": 3.28125, "learning_rate": 1.0347472181052534e-05, "loss": 1.1948, "step": 21830 }, { "epoch": 0.49837980922824154, "grad_norm": 3.125, "learning_rate": 1.0342767073658458e-05, "loss": 1.178, "step": 21840 }, { "epoch": 0.49860800511158776, "grad_norm": 3.21875, "learning_rate": 1.0338061966264381e-05, "loss": 1.1966, "step": 21850 }, { "epoch": 0.49883620099493403, "grad_norm": 3.09375, "learning_rate": 1.0333356858870305e-05, "loss": 1.1886, "step": 21860 }, { "epoch": 0.4990643968782803, "grad_norm": 3.453125, "learning_rate": 1.0328651751476229e-05, "loss": 1.162, "step": 21870 }, { "epoch": 0.4992925927616266, "grad_norm": 2.875, "learning_rate": 1.0323946644082153e-05, "loss": 1.1814, "step": 21880 }, { "epoch": 0.49952078864497285, "grad_norm": 3.078125, "learning_rate": 1.0319241536688076e-05, "loss": 1.268, "step": 21890 }, { "epoch": 0.4997489845283191, "grad_norm": 3.0625, "learning_rate": 1.0314536429294e-05, "loss": 1.1998, "step": 21900 }, { "epoch": 0.4999771804116654, "grad_norm": 2.984375, "learning_rate": 1.0309831321899924e-05, "loss": 1.151, "step": 21910 }, { "epoch": 0.5002053762950116, "grad_norm": 3.109375, "learning_rate": 1.0305126214505847e-05, "loss": 1.1854, "step": 21920 }, { "epoch": 0.5004335721783579, "grad_norm": 3.21875, "learning_rate": 1.0300421107111771e-05, "loss": 1.1575, "step": 21930 }, { "epoch": 0.5006617680617041, "grad_norm": 2.78125, "learning_rate": 1.0295715999717695e-05, "loss": 1.1929, "step": 21940 }, { "epoch": 0.5008899639450505, "grad_norm": 3.046875, "learning_rate": 1.0291010892323619e-05, "loss": 1.1987, "step": 21950 }, { "epoch": 0.5011181598283967, "grad_norm": 3.15625, "learning_rate": 1.0286305784929542e-05, "loss": 1.199, "step": 21960 }, { "epoch": 0.5013463557117429, "grad_norm": 2.796875, "learning_rate": 1.0281600677535466e-05, "loss": 1.192, "step": 21970 }, { "epoch": 0.5015745515950892, "grad_norm": 3.109375, "learning_rate": 1.027689557014139e-05, "loss": 1.2163, "step": 21980 }, { "epoch": 0.5018027474784355, "grad_norm": 2.984375, "learning_rate": 1.0272190462747313e-05, "loss": 1.1839, "step": 21990 }, { "epoch": 0.5020309433617818, "grad_norm": 2.953125, "learning_rate": 1.0267485355353237e-05, "loss": 1.2306, "step": 22000 }, { "epoch": 0.502259139245128, "grad_norm": 3.203125, "learning_rate": 1.0262780247959161e-05, "loss": 1.2356, "step": 22010 }, { "epoch": 0.5024873351284743, "grad_norm": 3.0625, "learning_rate": 1.0258075140565085e-05, "loss": 1.1783, "step": 22020 }, { "epoch": 0.5027155310118205, "grad_norm": 3.109375, "learning_rate": 1.0253370033171008e-05, "loss": 1.181, "step": 22030 }, { "epoch": 0.5029437268951669, "grad_norm": 3.15625, "learning_rate": 1.0248664925776932e-05, "loss": 1.1735, "step": 22040 }, { "epoch": 0.5031719227785131, "grad_norm": 3.046875, "learning_rate": 1.0243959818382856e-05, "loss": 1.1803, "step": 22050 }, { "epoch": 0.5034001186618593, "grad_norm": 3.21875, "learning_rate": 1.023925471098878e-05, "loss": 1.1959, "step": 22060 }, { "epoch": 0.5036283145452056, "grad_norm": 2.890625, "learning_rate": 1.0234549603594703e-05, "loss": 1.1598, "step": 22070 }, { "epoch": 0.5038565104285518, "grad_norm": 3.140625, "learning_rate": 1.0229844496200627e-05, "loss": 1.1932, "step": 22080 }, { "epoch": 0.5040847063118982, "grad_norm": 3.34375, "learning_rate": 1.022513938880655e-05, "loss": 1.1645, "step": 22090 }, { "epoch": 0.5043129021952444, "grad_norm": 3.09375, "learning_rate": 1.0220434281412474e-05, "loss": 1.1856, "step": 22100 }, { "epoch": 0.5045410980785907, "grad_norm": 3.09375, "learning_rate": 1.0215729174018398e-05, "loss": 1.1796, "step": 22110 }, { "epoch": 0.5047692939619369, "grad_norm": 3.171875, "learning_rate": 1.0211024066624322e-05, "loss": 1.2156, "step": 22120 }, { "epoch": 0.5049974898452831, "grad_norm": 3.265625, "learning_rate": 1.0206318959230245e-05, "loss": 1.2779, "step": 22130 }, { "epoch": 0.5052256857286295, "grad_norm": 3.421875, "learning_rate": 1.0201613851836169e-05, "loss": 1.2281, "step": 22140 }, { "epoch": 0.5054538816119757, "grad_norm": 2.953125, "learning_rate": 1.0196908744442093e-05, "loss": 1.1926, "step": 22150 }, { "epoch": 0.505682077495322, "grad_norm": 2.90625, "learning_rate": 1.0192203637048017e-05, "loss": 1.2588, "step": 22160 }, { "epoch": 0.5059102733786682, "grad_norm": 3.234375, "learning_rate": 1.018749852965394e-05, "loss": 1.1385, "step": 22170 }, { "epoch": 0.5061384692620146, "grad_norm": 3.03125, "learning_rate": 1.0182793422259864e-05, "loss": 1.2105, "step": 22180 }, { "epoch": 0.5063666651453608, "grad_norm": 3.296875, "learning_rate": 1.0178088314865788e-05, "loss": 1.1773, "step": 22190 }, { "epoch": 0.506594861028707, "grad_norm": 3.28125, "learning_rate": 1.0173383207471711e-05, "loss": 1.2494, "step": 22200 }, { "epoch": 0.5068230569120533, "grad_norm": 3.375, "learning_rate": 1.0168678100077635e-05, "loss": 1.1463, "step": 22210 }, { "epoch": 0.5070512527953995, "grad_norm": 3.25, "learning_rate": 1.0163972992683559e-05, "loss": 1.2166, "step": 22220 }, { "epoch": 0.5072794486787459, "grad_norm": 3.09375, "learning_rate": 1.0159267885289483e-05, "loss": 1.1242, "step": 22230 }, { "epoch": 0.5075076445620921, "grad_norm": 3.171875, "learning_rate": 1.0154562777895406e-05, "loss": 1.1862, "step": 22240 }, { "epoch": 0.5077358404454384, "grad_norm": 3.21875, "learning_rate": 1.014985767050133e-05, "loss": 1.1722, "step": 22250 }, { "epoch": 0.5079640363287846, "grad_norm": 3.296875, "learning_rate": 1.0145152563107254e-05, "loss": 1.1789, "step": 22260 }, { "epoch": 0.508192232212131, "grad_norm": 3.078125, "learning_rate": 1.0140447455713177e-05, "loss": 1.1103, "step": 22270 }, { "epoch": 0.5084204280954772, "grad_norm": 3.21875, "learning_rate": 1.0135742348319103e-05, "loss": 1.1696, "step": 22280 }, { "epoch": 0.5086486239788234, "grad_norm": 2.953125, "learning_rate": 1.0131037240925027e-05, "loss": 1.1942, "step": 22290 }, { "epoch": 0.5088768198621697, "grad_norm": 2.953125, "learning_rate": 1.012633213353095e-05, "loss": 1.1496, "step": 22300 }, { "epoch": 0.5091050157455159, "grad_norm": 3.03125, "learning_rate": 1.0121627026136874e-05, "loss": 1.1725, "step": 22310 }, { "epoch": 0.5093332116288622, "grad_norm": 3.0625, "learning_rate": 1.0116921918742798e-05, "loss": 1.1968, "step": 22320 }, { "epoch": 0.5095614075122085, "grad_norm": 3.28125, "learning_rate": 1.0112216811348721e-05, "loss": 1.1811, "step": 22330 }, { "epoch": 0.5097896033955548, "grad_norm": 2.828125, "learning_rate": 1.0107511703954645e-05, "loss": 1.1459, "step": 22340 }, { "epoch": 0.510017799278901, "grad_norm": 2.921875, "learning_rate": 1.0102806596560569e-05, "loss": 1.1944, "step": 22350 }, { "epoch": 0.5102459951622472, "grad_norm": 2.984375, "learning_rate": 1.0098101489166493e-05, "loss": 1.1583, "step": 22360 }, { "epoch": 0.5104741910455936, "grad_norm": 3.015625, "learning_rate": 1.0093396381772416e-05, "loss": 1.1857, "step": 22370 }, { "epoch": 0.5107023869289398, "grad_norm": 3.234375, "learning_rate": 1.008869127437834e-05, "loss": 1.2132, "step": 22380 }, { "epoch": 0.5109305828122861, "grad_norm": 3.265625, "learning_rate": 1.0083986166984264e-05, "loss": 1.2213, "step": 22390 }, { "epoch": 0.5111587786956323, "grad_norm": 2.84375, "learning_rate": 1.0079281059590187e-05, "loss": 1.2007, "step": 22400 }, { "epoch": 0.5113869745789786, "grad_norm": 3.015625, "learning_rate": 1.0074575952196111e-05, "loss": 1.1755, "step": 22410 }, { "epoch": 0.5116151704623249, "grad_norm": 3.171875, "learning_rate": 1.0069870844802035e-05, "loss": 1.1918, "step": 22420 }, { "epoch": 0.5118433663456711, "grad_norm": 3.34375, "learning_rate": 1.0065165737407959e-05, "loss": 1.2352, "step": 22430 }, { "epoch": 0.5120715622290174, "grad_norm": 3.0625, "learning_rate": 1.0060460630013882e-05, "loss": 1.1724, "step": 22440 }, { "epoch": 0.5122997581123636, "grad_norm": 3.1875, "learning_rate": 1.0055755522619806e-05, "loss": 1.1761, "step": 22450 }, { "epoch": 0.5125279539957099, "grad_norm": 2.828125, "learning_rate": 1.005105041522573e-05, "loss": 1.1558, "step": 22460 }, { "epoch": 0.5127561498790562, "grad_norm": 3.046875, "learning_rate": 1.0046345307831653e-05, "loss": 1.2227, "step": 22470 }, { "epoch": 0.5129843457624025, "grad_norm": 3.140625, "learning_rate": 1.0041640200437577e-05, "loss": 1.2184, "step": 22480 }, { "epoch": 0.5132125416457487, "grad_norm": 3.25, "learning_rate": 1.00369350930435e-05, "loss": 1.1805, "step": 22490 }, { "epoch": 0.513440737529095, "grad_norm": 3.125, "learning_rate": 1.0032229985649424e-05, "loss": 1.1374, "step": 22500 }, { "epoch": 0.5136689334124412, "grad_norm": 3.03125, "learning_rate": 1.0027524878255348e-05, "loss": 1.1819, "step": 22510 }, { "epoch": 0.5138971292957875, "grad_norm": 2.921875, "learning_rate": 1.0022819770861272e-05, "loss": 1.1385, "step": 22520 }, { "epoch": 0.5141253251791338, "grad_norm": 3.03125, "learning_rate": 1.0018114663467196e-05, "loss": 1.1976, "step": 22530 }, { "epoch": 0.51435352106248, "grad_norm": 2.90625, "learning_rate": 1.001340955607312e-05, "loss": 1.2008, "step": 22540 }, { "epoch": 0.5145817169458263, "grad_norm": 2.96875, "learning_rate": 1.0008704448679043e-05, "loss": 1.1654, "step": 22550 }, { "epoch": 0.5148099128291725, "grad_norm": 3.203125, "learning_rate": 1.0003999341284967e-05, "loss": 1.2386, "step": 22560 }, { "epoch": 0.5150381087125189, "grad_norm": 3.234375, "learning_rate": 9.999294233890889e-06, "loss": 1.17, "step": 22570 }, { "epoch": 0.5152663045958651, "grad_norm": 2.9375, "learning_rate": 9.994589126496812e-06, "loss": 1.1875, "step": 22580 }, { "epoch": 0.5154945004792113, "grad_norm": 3.015625, "learning_rate": 9.989884019102736e-06, "loss": 1.172, "step": 22590 }, { "epoch": 0.5157226963625576, "grad_norm": 3.203125, "learning_rate": 9.98517891170866e-06, "loss": 1.2115, "step": 22600 }, { "epoch": 0.5159508922459038, "grad_norm": 3.046875, "learning_rate": 9.980473804314584e-06, "loss": 1.18, "step": 22610 }, { "epoch": 0.5161790881292502, "grad_norm": 3.109375, "learning_rate": 9.975768696920507e-06, "loss": 1.2236, "step": 22620 }, { "epoch": 0.5164072840125964, "grad_norm": 3.21875, "learning_rate": 9.971063589526431e-06, "loss": 1.2208, "step": 22630 }, { "epoch": 0.5166354798959427, "grad_norm": 3.234375, "learning_rate": 9.966358482132355e-06, "loss": 1.1926, "step": 22640 }, { "epoch": 0.5168636757792889, "grad_norm": 3.0, "learning_rate": 9.961653374738278e-06, "loss": 1.1943, "step": 22650 }, { "epoch": 0.5170918716626353, "grad_norm": 2.859375, "learning_rate": 9.956948267344204e-06, "loss": 1.1958, "step": 22660 }, { "epoch": 0.5173200675459815, "grad_norm": 3.390625, "learning_rate": 9.952243159950128e-06, "loss": 1.2341, "step": 22670 }, { "epoch": 0.5175482634293277, "grad_norm": 2.9375, "learning_rate": 9.947538052556051e-06, "loss": 1.1283, "step": 22680 }, { "epoch": 0.517776459312674, "grad_norm": 3.25, "learning_rate": 9.942832945161975e-06, "loss": 1.2147, "step": 22690 }, { "epoch": 0.5180046551960202, "grad_norm": 3.109375, "learning_rate": 9.938127837767899e-06, "loss": 1.2064, "step": 22700 }, { "epoch": 0.5182328510793666, "grad_norm": 3.21875, "learning_rate": 9.933422730373822e-06, "loss": 1.1966, "step": 22710 }, { "epoch": 0.5184610469627128, "grad_norm": 3.046875, "learning_rate": 9.928717622979746e-06, "loss": 1.1433, "step": 22720 }, { "epoch": 0.5186892428460591, "grad_norm": 3.578125, "learning_rate": 9.92401251558567e-06, "loss": 1.1749, "step": 22730 }, { "epoch": 0.5189174387294053, "grad_norm": 2.984375, "learning_rate": 9.919307408191594e-06, "loss": 1.1505, "step": 22740 }, { "epoch": 0.5191456346127515, "grad_norm": 3.265625, "learning_rate": 9.914602300797517e-06, "loss": 1.1551, "step": 22750 }, { "epoch": 0.5193738304960979, "grad_norm": 3.46875, "learning_rate": 9.909897193403441e-06, "loss": 1.2364, "step": 22760 }, { "epoch": 0.5196020263794441, "grad_norm": 3.171875, "learning_rate": 9.905192086009365e-06, "loss": 1.15, "step": 22770 }, { "epoch": 0.5198302222627904, "grad_norm": 3.109375, "learning_rate": 9.900486978615288e-06, "loss": 1.162, "step": 22780 }, { "epoch": 0.5200584181461366, "grad_norm": 2.984375, "learning_rate": 9.895781871221212e-06, "loss": 1.17, "step": 22790 }, { "epoch": 0.520286614029483, "grad_norm": 3.1875, "learning_rate": 9.891076763827136e-06, "loss": 1.1752, "step": 22800 }, { "epoch": 0.5205148099128292, "grad_norm": 3.3125, "learning_rate": 9.88637165643306e-06, "loss": 1.1778, "step": 22810 }, { "epoch": 0.5207430057961754, "grad_norm": 3.0625, "learning_rate": 9.881666549038983e-06, "loss": 1.1443, "step": 22820 }, { "epoch": 0.5209712016795217, "grad_norm": 3.109375, "learning_rate": 9.876961441644907e-06, "loss": 1.202, "step": 22830 }, { "epoch": 0.5211993975628679, "grad_norm": 3.25, "learning_rate": 9.87225633425083e-06, "loss": 1.1955, "step": 22840 }, { "epoch": 0.5214275934462143, "grad_norm": 3.5625, "learning_rate": 9.867551226856754e-06, "loss": 1.1982, "step": 22850 }, { "epoch": 0.5216557893295605, "grad_norm": 3.078125, "learning_rate": 9.862846119462678e-06, "loss": 1.2022, "step": 22860 }, { "epoch": 0.5218839852129068, "grad_norm": 3.265625, "learning_rate": 9.858141012068602e-06, "loss": 1.1837, "step": 22870 }, { "epoch": 0.522112181096253, "grad_norm": 3.078125, "learning_rate": 9.853435904674526e-06, "loss": 1.1612, "step": 22880 }, { "epoch": 0.5223403769795993, "grad_norm": 3.09375, "learning_rate": 9.84873079728045e-06, "loss": 1.2066, "step": 22890 }, { "epoch": 0.5225685728629456, "grad_norm": 3.140625, "learning_rate": 9.844025689886373e-06, "loss": 1.2247, "step": 22900 }, { "epoch": 0.5227967687462918, "grad_norm": 3.21875, "learning_rate": 9.839320582492297e-06, "loss": 1.1915, "step": 22910 }, { "epoch": 0.5230249646296381, "grad_norm": 2.984375, "learning_rate": 9.83461547509822e-06, "loss": 1.1708, "step": 22920 }, { "epoch": 0.5232531605129843, "grad_norm": 2.875, "learning_rate": 9.829910367704144e-06, "loss": 1.1726, "step": 22930 }, { "epoch": 0.5234813563963306, "grad_norm": 3.1875, "learning_rate": 9.825205260310068e-06, "loss": 1.1807, "step": 22940 }, { "epoch": 0.5237095522796769, "grad_norm": 2.984375, "learning_rate": 9.820500152915992e-06, "loss": 1.2047, "step": 22950 }, { "epoch": 0.5239377481630232, "grad_norm": 3.390625, "learning_rate": 9.815795045521915e-06, "loss": 1.1829, "step": 22960 }, { "epoch": 0.5241659440463694, "grad_norm": 3.046875, "learning_rate": 9.811089938127839e-06, "loss": 1.2405, "step": 22970 }, { "epoch": 0.5243941399297156, "grad_norm": 3.1875, "learning_rate": 9.806384830733763e-06, "loss": 1.2471, "step": 22980 }, { "epoch": 0.524622335813062, "grad_norm": 3.078125, "learning_rate": 9.801679723339686e-06, "loss": 1.1545, "step": 22990 }, { "epoch": 0.5248505316964082, "grad_norm": 3.8125, "learning_rate": 9.79697461594561e-06, "loss": 1.1923, "step": 23000 }, { "epoch": 0.5250787275797545, "grad_norm": 3.15625, "learning_rate": 9.792269508551534e-06, "loss": 1.1855, "step": 23010 }, { "epoch": 0.5253069234631007, "grad_norm": 3.25, "learning_rate": 9.787564401157458e-06, "loss": 1.1555, "step": 23020 }, { "epoch": 0.525535119346447, "grad_norm": 3.140625, "learning_rate": 9.782859293763381e-06, "loss": 1.2446, "step": 23030 }, { "epoch": 0.5257633152297932, "grad_norm": 3.421875, "learning_rate": 9.778154186369305e-06, "loss": 1.1935, "step": 23040 }, { "epoch": 0.5259915111131395, "grad_norm": 3.421875, "learning_rate": 9.773449078975229e-06, "loss": 1.1888, "step": 23050 }, { "epoch": 0.5262197069964858, "grad_norm": 3.65625, "learning_rate": 9.768743971581152e-06, "loss": 1.1971, "step": 23060 }, { "epoch": 0.526447902879832, "grad_norm": 3.3125, "learning_rate": 9.764038864187076e-06, "loss": 1.1609, "step": 23070 }, { "epoch": 0.5266760987631783, "grad_norm": 3.25, "learning_rate": 9.759333756793e-06, "loss": 1.2449, "step": 23080 }, { "epoch": 0.5269042946465246, "grad_norm": 3.109375, "learning_rate": 9.754628649398924e-06, "loss": 1.1966, "step": 23090 }, { "epoch": 0.5271324905298709, "grad_norm": 2.859375, "learning_rate": 9.749923542004847e-06, "loss": 1.1745, "step": 23100 }, { "epoch": 0.5273606864132171, "grad_norm": 3.640625, "learning_rate": 9.745218434610771e-06, "loss": 1.2, "step": 23110 }, { "epoch": 0.5275888822965634, "grad_norm": 3.0, "learning_rate": 9.740513327216695e-06, "loss": 1.1743, "step": 23120 }, { "epoch": 0.5278170781799096, "grad_norm": 3.046875, "learning_rate": 9.735808219822618e-06, "loss": 1.1899, "step": 23130 }, { "epoch": 0.5280452740632559, "grad_norm": 3.15625, "learning_rate": 9.731103112428542e-06, "loss": 1.2168, "step": 23140 }, { "epoch": 0.5282734699466022, "grad_norm": 3.234375, "learning_rate": 9.726398005034466e-06, "loss": 1.214, "step": 23150 }, { "epoch": 0.5285016658299484, "grad_norm": 2.890625, "learning_rate": 9.72169289764039e-06, "loss": 1.1966, "step": 23160 }, { "epoch": 0.5287298617132947, "grad_norm": 3.484375, "learning_rate": 9.716987790246313e-06, "loss": 1.194, "step": 23170 }, { "epoch": 0.5289580575966409, "grad_norm": 2.96875, "learning_rate": 9.712282682852237e-06, "loss": 1.1503, "step": 23180 }, { "epoch": 0.5291862534799873, "grad_norm": 3.03125, "learning_rate": 9.70757757545816e-06, "loss": 1.1589, "step": 23190 }, { "epoch": 0.5294144493633335, "grad_norm": 3.0, "learning_rate": 9.702872468064084e-06, "loss": 1.2013, "step": 23200 }, { "epoch": 0.5296426452466797, "grad_norm": 2.71875, "learning_rate": 9.698167360670008e-06, "loss": 1.1561, "step": 23210 }, { "epoch": 0.529870841130026, "grad_norm": 3.359375, "learning_rate": 9.693462253275932e-06, "loss": 1.1978, "step": 23220 }, { "epoch": 0.5300990370133722, "grad_norm": 3.515625, "learning_rate": 9.688757145881855e-06, "loss": 1.191, "step": 23230 }, { "epoch": 0.5303272328967186, "grad_norm": 3.3125, "learning_rate": 9.68405203848778e-06, "loss": 1.1157, "step": 23240 }, { "epoch": 0.5305554287800648, "grad_norm": 3.109375, "learning_rate": 9.679346931093703e-06, "loss": 1.2118, "step": 23250 }, { "epoch": 0.5307836246634111, "grad_norm": 3.5, "learning_rate": 9.674641823699627e-06, "loss": 1.2073, "step": 23260 }, { "epoch": 0.5310118205467573, "grad_norm": 3.453125, "learning_rate": 9.66993671630555e-06, "loss": 1.2592, "step": 23270 }, { "epoch": 0.5312400164301035, "grad_norm": 2.96875, "learning_rate": 9.665231608911474e-06, "loss": 1.2489, "step": 23280 }, { "epoch": 0.5314682123134499, "grad_norm": 3.015625, "learning_rate": 9.660526501517398e-06, "loss": 1.1553, "step": 23290 }, { "epoch": 0.5316964081967961, "grad_norm": 3.453125, "learning_rate": 9.655821394123321e-06, "loss": 1.1613, "step": 23300 }, { "epoch": 0.5319246040801424, "grad_norm": 2.96875, "learning_rate": 9.651116286729245e-06, "loss": 1.1812, "step": 23310 }, { "epoch": 0.5321527999634886, "grad_norm": 3.03125, "learning_rate": 9.646411179335169e-06, "loss": 1.1499, "step": 23320 }, { "epoch": 0.532380995846835, "grad_norm": 3.953125, "learning_rate": 9.641706071941093e-06, "loss": 1.2569, "step": 23330 }, { "epoch": 0.5326091917301812, "grad_norm": 3.546875, "learning_rate": 9.637000964547016e-06, "loss": 1.1641, "step": 23340 }, { "epoch": 0.5328373876135275, "grad_norm": 2.828125, "learning_rate": 9.63229585715294e-06, "loss": 1.1657, "step": 23350 }, { "epoch": 0.5330655834968737, "grad_norm": 3.140625, "learning_rate": 9.627590749758864e-06, "loss": 1.2368, "step": 23360 }, { "epoch": 0.5332937793802199, "grad_norm": 3.0, "learning_rate": 9.622885642364787e-06, "loss": 1.178, "step": 23370 }, { "epoch": 0.5335219752635663, "grad_norm": 3.140625, "learning_rate": 9.618180534970711e-06, "loss": 1.1706, "step": 23380 }, { "epoch": 0.5337501711469125, "grad_norm": 3.203125, "learning_rate": 9.613475427576635e-06, "loss": 1.216, "step": 23390 }, { "epoch": 0.5339783670302588, "grad_norm": 3.28125, "learning_rate": 9.608770320182559e-06, "loss": 1.1745, "step": 23400 }, { "epoch": 0.534206562913605, "grad_norm": 3.1875, "learning_rate": 9.604065212788482e-06, "loss": 1.1996, "step": 23410 }, { "epoch": 0.5344347587969513, "grad_norm": 3.0625, "learning_rate": 9.599360105394406e-06, "loss": 1.2022, "step": 23420 }, { "epoch": 0.5346629546802976, "grad_norm": 3.0, "learning_rate": 9.59465499800033e-06, "loss": 1.1701, "step": 23430 }, { "epoch": 0.5348911505636438, "grad_norm": 3.109375, "learning_rate": 9.589949890606253e-06, "loss": 1.262, "step": 23440 }, { "epoch": 0.5351193464469901, "grad_norm": 3.015625, "learning_rate": 9.585244783212177e-06, "loss": 1.1765, "step": 23450 }, { "epoch": 0.5353475423303363, "grad_norm": 3.046875, "learning_rate": 9.580539675818101e-06, "loss": 1.2254, "step": 23460 }, { "epoch": 0.5355757382136827, "grad_norm": 3.171875, "learning_rate": 9.575834568424025e-06, "loss": 1.1985, "step": 23470 }, { "epoch": 0.5358039340970289, "grad_norm": 3.609375, "learning_rate": 9.571129461029948e-06, "loss": 1.2008, "step": 23480 }, { "epoch": 0.5360321299803752, "grad_norm": 3.15625, "learning_rate": 9.566424353635872e-06, "loss": 1.1915, "step": 23490 }, { "epoch": 0.5362603258637214, "grad_norm": 3.109375, "learning_rate": 9.561719246241796e-06, "loss": 1.1864, "step": 23500 }, { "epoch": 0.5364885217470677, "grad_norm": 3.171875, "learning_rate": 9.55701413884772e-06, "loss": 1.1793, "step": 23510 }, { "epoch": 0.536716717630414, "grad_norm": 3.234375, "learning_rate": 9.552309031453643e-06, "loss": 1.1998, "step": 23520 }, { "epoch": 0.5369449135137602, "grad_norm": 3.078125, "learning_rate": 9.547603924059567e-06, "loss": 1.159, "step": 23530 }, { "epoch": 0.5371731093971065, "grad_norm": 3.203125, "learning_rate": 9.54289881666549e-06, "loss": 1.1725, "step": 23540 }, { "epoch": 0.5374013052804527, "grad_norm": 3.171875, "learning_rate": 9.538193709271414e-06, "loss": 1.1466, "step": 23550 }, { "epoch": 0.537629501163799, "grad_norm": 3.515625, "learning_rate": 9.533488601877338e-06, "loss": 1.1598, "step": 23560 }, { "epoch": 0.5378576970471453, "grad_norm": 3.21875, "learning_rate": 9.528783494483262e-06, "loss": 1.2259, "step": 23570 }, { "epoch": 0.5380858929304916, "grad_norm": 3.109375, "learning_rate": 9.524078387089185e-06, "loss": 1.1877, "step": 23580 }, { "epoch": 0.5383140888138378, "grad_norm": 3.15625, "learning_rate": 9.519373279695109e-06, "loss": 1.225, "step": 23590 }, { "epoch": 0.538542284697184, "grad_norm": 3.3125, "learning_rate": 9.514668172301033e-06, "loss": 1.1707, "step": 23600 }, { "epoch": 0.5387704805805303, "grad_norm": 3.53125, "learning_rate": 9.509963064906958e-06, "loss": 1.1553, "step": 23610 }, { "epoch": 0.5389986764638766, "grad_norm": 3.03125, "learning_rate": 9.505257957512882e-06, "loss": 1.181, "step": 23620 }, { "epoch": 0.5392268723472229, "grad_norm": 3.265625, "learning_rate": 9.500552850118806e-06, "loss": 1.1696, "step": 23630 }, { "epoch": 0.5394550682305691, "grad_norm": 3.28125, "learning_rate": 9.49584774272473e-06, "loss": 1.2249, "step": 23640 }, { "epoch": 0.5396832641139154, "grad_norm": 3.015625, "learning_rate": 9.491142635330653e-06, "loss": 1.1908, "step": 23650 }, { "epoch": 0.5399114599972616, "grad_norm": 3.25, "learning_rate": 9.486437527936577e-06, "loss": 1.2338, "step": 23660 }, { "epoch": 0.5401396558806079, "grad_norm": 3.015625, "learning_rate": 9.4817324205425e-06, "loss": 1.1759, "step": 23670 }, { "epoch": 0.5403678517639542, "grad_norm": 3.0625, "learning_rate": 9.477027313148424e-06, "loss": 1.1898, "step": 23680 }, { "epoch": 0.5405960476473004, "grad_norm": 3.0, "learning_rate": 9.472322205754348e-06, "loss": 1.232, "step": 23690 }, { "epoch": 0.5408242435306467, "grad_norm": 3.359375, "learning_rate": 9.467617098360272e-06, "loss": 1.1785, "step": 23700 }, { "epoch": 0.541052439413993, "grad_norm": 3.078125, "learning_rate": 9.462911990966195e-06, "loss": 1.2013, "step": 23710 }, { "epoch": 0.5412806352973393, "grad_norm": 3.625, "learning_rate": 9.458206883572119e-06, "loss": 1.2148, "step": 23720 }, { "epoch": 0.5415088311806855, "grad_norm": 3.171875, "learning_rate": 9.453501776178043e-06, "loss": 1.124, "step": 23730 }, { "epoch": 0.5417370270640318, "grad_norm": 3.25, "learning_rate": 9.448796668783967e-06, "loss": 1.1739, "step": 23740 }, { "epoch": 0.541965222947378, "grad_norm": 3.078125, "learning_rate": 9.44409156138989e-06, "loss": 1.1493, "step": 23750 }, { "epoch": 0.5421934188307242, "grad_norm": 3.171875, "learning_rate": 9.439386453995814e-06, "loss": 1.1697, "step": 23760 }, { "epoch": 0.5424216147140706, "grad_norm": 3.3125, "learning_rate": 9.434681346601738e-06, "loss": 1.2229, "step": 23770 }, { "epoch": 0.5426498105974168, "grad_norm": 3.296875, "learning_rate": 9.429976239207661e-06, "loss": 1.167, "step": 23780 }, { "epoch": 0.5428780064807631, "grad_norm": 3.15625, "learning_rate": 9.425271131813585e-06, "loss": 1.178, "step": 23790 }, { "epoch": 0.5431062023641093, "grad_norm": 3.109375, "learning_rate": 9.420566024419509e-06, "loss": 1.1554, "step": 23800 }, { "epoch": 0.5433343982474557, "grad_norm": 2.84375, "learning_rate": 9.415860917025433e-06, "loss": 1.1729, "step": 23810 }, { "epoch": 0.5435625941308019, "grad_norm": 3.1875, "learning_rate": 9.411155809631356e-06, "loss": 1.1609, "step": 23820 }, { "epoch": 0.5437907900141481, "grad_norm": 3.140625, "learning_rate": 9.40645070223728e-06, "loss": 1.1474, "step": 23830 }, { "epoch": 0.5440189858974944, "grad_norm": 2.90625, "learning_rate": 9.401745594843204e-06, "loss": 1.2039, "step": 23840 }, { "epoch": 0.5442471817808406, "grad_norm": 3.03125, "learning_rate": 9.397040487449127e-06, "loss": 1.1313, "step": 23850 }, { "epoch": 0.544475377664187, "grad_norm": 3.140625, "learning_rate": 9.392335380055051e-06, "loss": 1.1886, "step": 23860 }, { "epoch": 0.5447035735475332, "grad_norm": 3.171875, "learning_rate": 9.387630272660975e-06, "loss": 1.1862, "step": 23870 }, { "epoch": 0.5449317694308795, "grad_norm": 3.046875, "learning_rate": 9.382925165266898e-06, "loss": 1.1237, "step": 23880 }, { "epoch": 0.5451599653142257, "grad_norm": 3.15625, "learning_rate": 9.378220057872822e-06, "loss": 1.1727, "step": 23890 }, { "epoch": 0.5453881611975719, "grad_norm": 3.15625, "learning_rate": 9.373514950478746e-06, "loss": 1.141, "step": 23900 }, { "epoch": 0.5456163570809183, "grad_norm": 3.21875, "learning_rate": 9.36880984308467e-06, "loss": 1.2254, "step": 23910 }, { "epoch": 0.5458445529642645, "grad_norm": 3.3125, "learning_rate": 9.364104735690593e-06, "loss": 1.1653, "step": 23920 }, { "epoch": 0.5460727488476108, "grad_norm": 3.53125, "learning_rate": 9.359399628296517e-06, "loss": 1.1048, "step": 23930 }, { "epoch": 0.546300944730957, "grad_norm": 3.234375, "learning_rate": 9.35469452090244e-06, "loss": 1.2593, "step": 23940 }, { "epoch": 0.5465291406143034, "grad_norm": 3.609375, "learning_rate": 9.349989413508364e-06, "loss": 1.1977, "step": 23950 }, { "epoch": 0.5467573364976496, "grad_norm": 3.03125, "learning_rate": 9.345284306114287e-06, "loss": 1.1122, "step": 23960 }, { "epoch": 0.5469855323809959, "grad_norm": 3.03125, "learning_rate": 9.34057919872021e-06, "loss": 1.172, "step": 23970 }, { "epoch": 0.5472137282643421, "grad_norm": 3.296875, "learning_rate": 9.335874091326134e-06, "loss": 1.2086, "step": 23980 }, { "epoch": 0.5474419241476883, "grad_norm": 2.984375, "learning_rate": 9.33116898393206e-06, "loss": 1.1904, "step": 23990 }, { "epoch": 0.5476701200310347, "grad_norm": 3.375, "learning_rate": 9.326463876537983e-06, "loss": 1.1954, "step": 24000 } ], "logging_steps": 10, "max_steps": 43822, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.209087320939823e+18, "train_batch_size": 10, "trial_name": null, "trial_params": null }